SOLR-9160: Sync 6x and 7.0 move of UninvertingReader, SlowCompositeReaderWrapper for Solr (LUCENE-7283)

This commit is contained in:
yonik 2016-05-25 05:35:18 -04:00
parent aec3654fb8
commit 5525f42928
55 changed files with 9651 additions and 107 deletions

View File

@ -661,7 +661,7 @@ public class MultiDocValues {
public final OrdinalMap mapping; public final OrdinalMap mapping;
/** Creates a new MultiSortedDocValues over <code>values</code> */ /** Creates a new MultiSortedDocValues over <code>values</code> */
MultiSortedDocValues(SortedDocValues values[], int docStarts[], OrdinalMap mapping) throws IOException { public MultiSortedDocValues(SortedDocValues values[], int docStarts[], OrdinalMap mapping) throws IOException {
assert docStarts.length == values.length + 1; assert docStarts.length == values.length + 1;
this.values = values; this.values = values;
this.docStarts = docStarts; this.docStarts = docStarts;

View File

@ -303,6 +303,9 @@ Other Changes
* SOLR-9110: Move JoinFromCollection- SubQueryTransformer- BlockJoinFacet- Distrib Tests to SolrCloudTestCase (Mikhail Khludnev) * SOLR-9110: Move JoinFromCollection- SubQueryTransformer- BlockJoinFacet- Distrib Tests to SolrCloudTestCase (Mikhail Khludnev)
* SOLR-9160: Sync 6x and 7.0 move of UninvertingReader, SlowCompositeReaderWrapper for Solr (LUCENE-7283)
(yonik)
================== 6.0.1 ================== ================== 6.0.1 ==================
(No Changes) (No Changes)

View File

@ -36,12 +36,12 @@ import org.apache.lucene.search.DocValuesRangeQuery;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.search.SortField; import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TermRangeQuery; import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.uninverting.UninvertingReader.Type;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode; import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.SolrException;
import org.apache.solr.response.TextResponseWriter; import org.apache.solr.response.TextResponseWriter;
import org.apache.solr.search.QParser; import org.apache.solr.search.QParser;
import org.apache.solr.uninverting.UninvertingReader.Type;
import com.ibm.icu.text.Collator; import com.ibm.icu.text.Collator;
import com.ibm.icu.text.RuleBasedCollator; import com.ibm.icu.text.RuleBasedCollator;

View File

@ -26,16 +26,6 @@ import java.util.Iterator;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import com.carrotsearch.hppc.IntHashSet;
import com.carrotsearch.hppc.IntObjectHashMap;
import com.carrotsearch.hppc.LongHashSet;
import com.carrotsearch.hppc.LongObjectHashMap;
import com.carrotsearch.hppc.LongObjectMap;
import com.carrotsearch.hppc.cursors.IntObjectCursor;
import com.carrotsearch.hppc.cursors.LongCursor;
import com.carrotsearch.hppc.cursors.LongObjectCursor;
import com.carrotsearch.hppc.cursors.ObjectCursor;
import org.apache.lucene.index.DocValues; import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfo;
@ -53,7 +43,6 @@ import org.apache.lucene.search.Collector;
import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.LeafCollector; import org.apache.lucene.search.LeafCollector;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.solr.search.QueryWrapperFilter;
import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Scorer; import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Sort; import org.apache.lucene.search.Sort;
@ -61,7 +50,6 @@ import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopDocsCollector; import org.apache.lucene.search.TopDocsCollector;
import org.apache.lucene.search.TopFieldCollector; import org.apache.lucene.search.TopFieldCollector;
import org.apache.lucene.search.TopScoreDocCollector; import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.uninverting.UninvertingReader;
import org.apache.lucene.util.BitSetIterator; import org.apache.lucene.util.BitSetIterator;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder; import org.apache.lucene.util.BytesRefBuilder;
@ -87,12 +75,24 @@ import org.apache.solr.search.DocIterator;
import org.apache.solr.search.DocList; import org.apache.solr.search.DocList;
import org.apache.solr.search.DocSlice; import org.apache.solr.search.DocSlice;
import org.apache.solr.search.QParser; import org.apache.solr.search.QParser;
import org.apache.solr.search.QueryWrapperFilter;
import org.apache.solr.search.SolrConstantScoreQuery; import org.apache.solr.search.SolrConstantScoreQuery;
import org.apache.solr.search.SolrIndexSearcher; import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.search.SortSpecParsing; import org.apache.solr.search.SortSpecParsing;
import org.apache.solr.uninverting.UninvertingReader;
import org.apache.solr.util.plugin.PluginInfoInitialized; import org.apache.solr.util.plugin.PluginInfoInitialized;
import org.apache.solr.util.plugin.SolrCoreAware; import org.apache.solr.util.plugin.SolrCoreAware;
import com.carrotsearch.hppc.IntHashSet;
import com.carrotsearch.hppc.IntObjectHashMap;
import com.carrotsearch.hppc.LongHashSet;
import com.carrotsearch.hppc.LongObjectHashMap;
import com.carrotsearch.hppc.LongObjectMap;
import com.carrotsearch.hppc.cursors.IntObjectCursor;
import com.carrotsearch.hppc.cursors.LongCursor;
import com.carrotsearch.hppc.cursors.LongObjectCursor;
import com.carrotsearch.hppc.cursors.ObjectCursor;
/** /**
* The ExpandComponent is designed to work with the CollapsingPostFilter. * The ExpandComponent is designed to work with the CollapsingPostFilter.
* The CollapsingPostFilter collapses a result set on a field. * The CollapsingPostFilter collapses a result set on a field.

View File

@ -0,0 +1,296 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.index;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.CompositeReader;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReader.CoreClosedListener;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiDocValues.MultiSortedDocValues;
import org.apache.lucene.index.MultiDocValues.MultiSortedSetDocValues;
import org.apache.lucene.index.MultiDocValues.OrdinalMap;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.MultiReader;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.PointValues;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.StoredFieldVisitor;
import org.apache.lucene.search.Sort;
import org.apache.lucene.util.Bits;
/**
* This class forces a composite reader (eg a {@link
* MultiReader} or {@link DirectoryReader}) to emulate a
* {@link LeafReader}. This requires implementing the postings
* APIs on-the-fly, using the static methods in {@link
* MultiFields}, {@link MultiDocValues}, by stepping through
* the sub-readers to merge fields/terms, appending docs, etc.
*
* <p><b>NOTE</b>: this class almost always results in a
* performance hit. If this is important to your use case,
* you'll get better performance by gathering the sub readers using
* {@link IndexReader#getContext()} to get the
* leaves and then operate per-LeafReader,
* instead of using this class.
*/
public final class SlowCompositeReaderWrapper extends LeafReader {
private final CompositeReader in;
private final Fields fields;
private final boolean merging;
/** This method is sugar for getting an {@link LeafReader} from
* an {@link IndexReader} of any kind. If the reader is already atomic,
* it is returned unchanged, otherwise wrapped by this class.
*/
public static LeafReader wrap(IndexReader reader) throws IOException {
if (reader instanceof CompositeReader) {
return new SlowCompositeReaderWrapper((CompositeReader) reader, false);
} else {
assert reader instanceof LeafReader;
return (LeafReader) reader;
}
}
SlowCompositeReaderWrapper(CompositeReader reader, boolean merging) throws IOException {
super();
in = reader;
if (getFieldInfos().hasPointValues()) {
throw new IllegalArgumentException("cannot wrap points");
}
fields = MultiFields.getFields(in);
in.registerParentReader(this);
this.merging = merging;
}
@Override
public String toString() {
return "SlowCompositeReaderWrapper(" + in + ")";
}
@Override
public void addCoreClosedListener(CoreClosedListener listener) {
addCoreClosedListenerAsReaderClosedListener(in, listener);
}
@Override
public void removeCoreClosedListener(CoreClosedListener listener) {
removeCoreClosedListenerAsReaderClosedListener(in, listener);
}
@Override
public Fields fields() {
ensureOpen();
return fields;
}
@Override
public NumericDocValues getNumericDocValues(String field) throws IOException {
ensureOpen();
return MultiDocValues.getNumericValues(in, field);
}
@Override
public Bits getDocsWithField(String field) throws IOException {
ensureOpen();
return MultiDocValues.getDocsWithField(in, field);
}
@Override
public BinaryDocValues getBinaryDocValues(String field) throws IOException {
ensureOpen();
return MultiDocValues.getBinaryValues(in, field);
}
@Override
public SortedNumericDocValues getSortedNumericDocValues(String field) throws IOException {
ensureOpen();
return MultiDocValues.getSortedNumericValues(in, field);
}
@Override
public SortedDocValues getSortedDocValues(String field) throws IOException {
ensureOpen();
OrdinalMap map = null;
synchronized (cachedOrdMaps) {
map = cachedOrdMaps.get(field);
if (map == null) {
// uncached, or not a multi dv
SortedDocValues dv = MultiDocValues.getSortedValues(in, field);
if (dv instanceof MultiSortedDocValues) {
map = ((MultiSortedDocValues)dv).mapping;
if (map.owner == getCoreCacheKey() && merging == false) {
cachedOrdMaps.put(field, map);
}
}
return dv;
}
}
int size = in.leaves().size();
final SortedDocValues[] values = new SortedDocValues[size];
final int[] starts = new int[size+1];
for (int i = 0; i < size; i++) {
LeafReaderContext context = in.leaves().get(i);
final LeafReader reader = context.reader();
final FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field);
if (fieldInfo != null && fieldInfo.getDocValuesType() != DocValuesType.SORTED) {
return null;
}
SortedDocValues v = reader.getSortedDocValues(field);
if (v == null) {
v = DocValues.emptySorted();
}
values[i] = v;
starts[i] = context.docBase;
}
starts[size] = maxDoc();
return new MultiSortedDocValues(values, starts, map);
}
@Override
public SortedSetDocValues getSortedSetDocValues(String field) throws IOException {
ensureOpen();
OrdinalMap map = null;
synchronized (cachedOrdMaps) {
map = cachedOrdMaps.get(field);
if (map == null) {
// uncached, or not a multi dv
SortedSetDocValues dv = MultiDocValues.getSortedSetValues(in, field);
if (dv instanceof MultiSortedSetDocValues) {
map = ((MultiSortedSetDocValues)dv).mapping;
if (map.owner == getCoreCacheKey() && merging == false) {
cachedOrdMaps.put(field, map);
}
}
return dv;
}
}
assert map != null;
int size = in.leaves().size();
final SortedSetDocValues[] values = new SortedSetDocValues[size];
final int[] starts = new int[size+1];
for (int i = 0; i < size; i++) {
LeafReaderContext context = in.leaves().get(i);
final LeafReader reader = context.reader();
final FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field);
if(fieldInfo != null && fieldInfo.getDocValuesType() != DocValuesType.SORTED_SET){
return null;
}
SortedSetDocValues v = reader.getSortedSetDocValues(field);
if (v == null) {
v = DocValues.emptySortedSet();
}
values[i] = v;
starts[i] = context.docBase;
}
starts[size] = maxDoc();
return new MultiSortedSetDocValues(values, starts, map);
}
// TODO: this could really be a weak map somewhere else on the coreCacheKey,
// but do we really need to optimize slow-wrapper any more?
private final Map<String,OrdinalMap> cachedOrdMaps = new HashMap<>();
@Override
public NumericDocValues getNormValues(String field) throws IOException {
ensureOpen();
return MultiDocValues.getNormValues(in, field);
}
@Override
public Fields getTermVectors(int docID) throws IOException {
ensureOpen();
return in.getTermVectors(docID);
}
@Override
public int numDocs() {
// Don't call ensureOpen() here (it could affect performance)
return in.numDocs();
}
@Override
public int maxDoc() {
// Don't call ensureOpen() here (it could affect performance)
return in.maxDoc();
}
@Override
public void document(int docID, StoredFieldVisitor visitor) throws IOException {
ensureOpen();
in.document(docID, visitor);
}
@Override
public Bits getLiveDocs() {
ensureOpen();
return MultiFields.getLiveDocs(in);
}
@Override
public PointValues getPointValues() {
ensureOpen();
return null;
}
@Override
public FieldInfos getFieldInfos() {
ensureOpen();
return MultiFields.getMergedFieldInfos(in);
}
@Override
public Object getCoreCacheKey() {
return in.getCoreCacheKey();
}
@Override
public Object getCombinedCoreAndDeletesKey() {
return in.getCombinedCoreAndDeletesKey();
}
@Override
protected void doClose() throws IOException {
// TODO: as this is a wrapper, should we really close the delegate?
in.close();
}
@Override
public void checkIntegrity() throws IOException {
ensureOpen();
for (LeafReaderContext ctx : in.leaves()) {
ctx.reader().checkIntegrity();
}
}
}

View File

@ -0,0 +1,65 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.index;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.MergePolicy;
import org.apache.lucene.index.MergePolicyWrapper;
import org.apache.lucene.index.MergeState;
import org.apache.lucene.index.MergeTrigger;
import org.apache.lucene.index.MultiReader;
import org.apache.lucene.index.SegmentCommitInfo;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.SegmentInfos;
import org.apache.lucene.index.SegmentReader;
import org.apache.lucene.search.Sort;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.InfoStream;
import org.apache.lucene.util.packed.PackedInts;
import org.apache.lucene.util.packed.PackedLongValues;
import org.apache.solr.index.SlowCompositeReaderWrapper;
// TODO: remove this and add indexSort specification directly to solrconfig.xml? But for BWC, also accept SortingMergePolicy specifiction?
public final class SortingMergePolicy extends MergePolicyWrapper {
private final Sort sort;
/** Create a new {@code MergePolicy} that sorts documents with the given {@code sort}. */
public SortingMergePolicy(MergePolicy in, Sort sort) {
super(in);
this.sort = sort;
}
/** Return the {@link Sort} order that is used to sort segments when merging. */
public Sort getSort() {
return sort;
}
@Override
public String toString() {
return "SortingMergePolicy(" + in + ", sort=" + sort + ")";
}
}

View File

@ -28,6 +28,7 @@ import java.util.Set;
import java.util.TreeSet; import java.util.TreeSet;
import java.util.concurrent.Callable; import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutionException;
import org.apache.lucene.document.Field; import org.apache.lucene.document.Field;
import org.apache.lucene.document.StoredField; import org.apache.lucene.document.StoredField;
import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.IndexableField;
@ -41,22 +42,16 @@ import org.apache.lucene.spatial.SpatialStrategy;
import org.apache.lucene.spatial.query.SpatialArgs; import org.apache.lucene.spatial.query.SpatialArgs;
import org.apache.lucene.spatial.query.SpatialArgsParser; import org.apache.lucene.spatial.query.SpatialArgsParser;
import org.apache.lucene.spatial.query.SpatialOperation; import org.apache.lucene.spatial.query.SpatialOperation;
import org.apache.lucene.uninverting.UninvertingReader.Type;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode; import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.params.SolrParams;
import org.apache.solr.response.TextResponseWriter; import org.apache.solr.response.TextResponseWriter;
import org.apache.solr.search.QParser; import org.apache.solr.search.QParser;
import org.apache.solr.search.SpatialOptions; import org.apache.solr.search.SpatialOptions;
import org.apache.solr.uninverting.UninvertingReader.Type;
import org.apache.solr.util.DistanceUnits; import org.apache.solr.util.DistanceUnits;
import org.apache.solr.util.MapListener; import org.apache.solr.util.MapListener;
import org.apache.solr.util.SpatialUtils; import org.apache.solr.util.SpatialUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.base.Throwables;
import com.google.common.cache.Cache;
import com.google.common.cache.CacheBuilder;
import org.locationtech.spatial4j.context.SpatialContext; import org.locationtech.spatial4j.context.SpatialContext;
import org.locationtech.spatial4j.context.SpatialContextFactory; import org.locationtech.spatial4j.context.SpatialContextFactory;
import org.locationtech.spatial4j.distance.DistanceUtils; import org.locationtech.spatial4j.distance.DistanceUtils;
@ -66,6 +61,12 @@ import org.locationtech.spatial4j.io.SupportedFormats;
import org.locationtech.spatial4j.shape.Point; import org.locationtech.spatial4j.shape.Point;
import org.locationtech.spatial4j.shape.Rectangle; import org.locationtech.spatial4j.shape.Rectangle;
import org.locationtech.spatial4j.shape.Shape; import org.locationtech.spatial4j.shape.Shape;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.base.Throwables;
import com.google.common.cache.Cache;
import com.google.common.cache.CacheBuilder;
/** /**
* Abstract base class for Solr FieldTypes based on a Lucene 4 {@link SpatialStrategy}. * Abstract base class for Solr FieldTypes based on a Lucene 4 {@link SpatialStrategy}.

View File

@ -23,10 +23,10 @@ import java.nio.ByteBuffer;
import org.apache.lucene.document.Field; import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.IndexableField;
import org.apache.lucene.search.SortField; import org.apache.lucene.search.SortField;
import org.apache.lucene.uninverting.UninvertingReader.Type;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.solr.common.util.Base64; import org.apache.solr.common.util.Base64;
import org.apache.solr.response.TextResponseWriter; import org.apache.solr.response.TextResponseWriter;
import org.apache.solr.uninverting.UninvertingReader.Type;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;

View File

@ -22,15 +22,14 @@ import java.util.Map;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.DocValues; import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.queries.function.FunctionValues; import org.apache.lucene.queries.function.FunctionValues;
import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.queries.function.docvalues.BoolDocValues; import org.apache.lucene.queries.function.docvalues.BoolDocValues;
import org.apache.lucene.search.SortField; import org.apache.lucene.search.SortField;
import org.apache.lucene.uninverting.UninvertingReader.Type;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef; import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.CharsRefBuilder; import org.apache.lucene.util.CharsRefBuilder;
@ -40,6 +39,7 @@ import org.apache.solr.analysis.SolrAnalyzer;
import org.apache.solr.response.TextResponseWriter; import org.apache.solr.response.TextResponseWriter;
import org.apache.solr.search.QParser; import org.apache.solr.search.QParser;
import org.apache.solr.search.function.OrdFieldSource; import org.apache.solr.search.function.OrdFieldSource;
import org.apache.solr.uninverting.UninvertingReader.Type;
/** /**
* *
*/ */

View File

@ -40,12 +40,12 @@ import org.apache.lucene.search.DocValuesRangeQuery;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.search.SortField; import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TermRangeQuery; import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.uninverting.UninvertingReader.Type;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode; import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.SolrException;
import org.apache.solr.response.TextResponseWriter; import org.apache.solr.response.TextResponseWriter;
import org.apache.solr.search.QParser; import org.apache.solr.search.QParser;
import org.apache.solr.uninverting.UninvertingReader.Type;
/** /**
* Field for collated sort keys. * Field for collated sort keys.

View File

@ -44,7 +44,7 @@ import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.FieldValueQuery; import org.apache.lucene.search.FieldValueQuery;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.search.SortField; import org.apache.lucene.search.SortField;
import org.apache.lucene.uninverting.UninvertingReader.Type; import org.apache.solr.uninverting.UninvertingReader.Type;
import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode; import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.response.TextResponseWriter; import org.apache.solr.response.TextResponseWriter;

View File

@ -16,12 +16,6 @@
*/ */
package org.apache.solr.schema; package org.apache.solr.schema;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.lang.invoke.MethodHandles; import java.lang.invoke.MethodHandles;
@ -31,6 +25,12 @@ import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Locale; import java.util.Locale;
import java.util.Map; import java.util.Map;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import org.apache.lucene.document.FieldType; import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.LegacyIntField; import org.apache.lucene.document.LegacyIntField;
@ -45,7 +45,6 @@ import org.apache.lucene.search.DocValuesRangeQuery;
import org.apache.lucene.search.LegacyNumericRangeQuery; import org.apache.lucene.search.LegacyNumericRangeQuery;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.search.SortField; import org.apache.lucene.search.SortField;
import org.apache.lucene.uninverting.UninvertingReader.Type;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder; import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.CharsRef; import org.apache.lucene.util.CharsRef;
@ -55,6 +54,7 @@ import org.apache.solr.common.EnumFieldValue;
import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException;
import org.apache.solr.response.TextResponseWriter; import org.apache.solr.response.TextResponseWriter;
import org.apache.solr.search.QParser; import org.apache.solr.search.QParser;
import org.apache.solr.uninverting.UninvertingReader.Type;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import org.w3c.dom.Document; import org.w3c.dom.Document;

View File

@ -16,17 +16,17 @@
*/ */
package org.apache.solr.schema; package org.apache.solr.schema;
import java.io.IOException;
import java.util.Map;
import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.IndexableField;
import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.search.SortField; import org.apache.lucene.search.SortField;
import org.apache.lucene.uninverting.UninvertingReader.Type;
import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException;
import org.apache.solr.response.TextResponseWriter; import org.apache.solr.response.TextResponseWriter;
import org.apache.solr.search.QParser; import org.apache.solr.search.QParser;
import org.apache.solr.search.function.FileFloatSource; import org.apache.solr.search.function.FileFloatSource;
import org.apache.solr.uninverting.UninvertingReader.Type;
import java.io.IOException;
import java.util.Map;
/** Get values from an external file instead of the index. /** Get values from an external file instead of the index.
* *

View File

@ -49,7 +49,6 @@ import org.apache.lucene.search.SortedNumericSelector;
import org.apache.lucene.search.SortedSetSelector; import org.apache.lucene.search.SortedSetSelector;
import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.uninverting.UninvertingReader;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder; import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.CharsRef; import org.apache.lucene.util.CharsRef;
@ -66,6 +65,7 @@ import org.apache.solr.query.SolrRangeQuery;
import org.apache.solr.response.TextResponseWriter; import org.apache.solr.response.TextResponseWriter;
import org.apache.solr.search.QParser; import org.apache.solr.search.QParser;
import org.apache.solr.search.Sorting; import org.apache.solr.search.Sorting;
import org.apache.solr.uninverting.UninvertingReader;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;

View File

@ -18,23 +18,23 @@ package org.apache.solr.schema;
import java.io.IOException; import java.io.IOException;
import org.locationtech.spatial4j.context.SpatialContext;
import org.locationtech.spatial4j.distance.DistanceUtils;
import org.locationtech.spatial4j.io.GeohashUtils;
import org.locationtech.spatial4j.shape.Point;
import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.IndexableField;
import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.queries.function.valuesource.LiteralValueSource; import org.apache.lucene.queries.function.valuesource.LiteralValueSource;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.search.SortField; import org.apache.lucene.search.SortField;
import org.apache.lucene.uninverting.UninvertingReader.Type;
import org.apache.solr.response.TextResponseWriter; import org.apache.solr.response.TextResponseWriter;
import org.apache.solr.search.QParser; import org.apache.solr.search.QParser;
import org.apache.solr.search.SolrConstantScoreQuery; import org.apache.solr.search.SolrConstantScoreQuery;
import org.apache.solr.search.SpatialOptions; import org.apache.solr.search.SpatialOptions;
import org.apache.solr.search.function.ValueSourceRangeFilter; import org.apache.solr.search.function.ValueSourceRangeFilter;
import org.apache.solr.search.function.distance.GeohashHaversineFunction; import org.apache.solr.search.function.distance.GeohashHaversineFunction;
import org.apache.solr.uninverting.UninvertingReader.Type;
import org.apache.solr.util.SpatialUtils; import org.apache.solr.util.SpatialUtils;
import org.locationtech.spatial4j.context.SpatialContext;
import org.locationtech.spatial4j.distance.DistanceUtils;
import org.locationtech.spatial4j.io.GeohashUtils;
import org.locationtech.spatial4j.shape.Point;
/** /**
* This is a class that represents a <a * This is a class that represents a <a

View File

@ -51,7 +51,7 @@ import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.MultiFields; import org.apache.lucene.index.MultiFields;
import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.uninverting.UninvertingReader; import org.apache.solr.uninverting.UninvertingReader;
import org.apache.lucene.util.Version; import org.apache.lucene.util.Version;
import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode; import org.apache.solr.common.SolrException.ErrorCode;

View File

@ -22,8 +22,8 @@ import java.util.Map;
import org.apache.lucene.document.FieldType; import org.apache.lucene.document.FieldType;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.queries.function.FunctionValues; import org.apache.lucene.queries.function.FunctionValues;
import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.queries.function.valuesource.VectorValueSource; import org.apache.lucene.queries.function.valuesource.VectorValueSource;
@ -37,7 +37,6 @@ import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer; import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.SortField; import org.apache.lucene.search.SortField;
import org.apache.lucene.search.Weight; import org.apache.lucene.search.Weight;
import org.apache.lucene.uninverting.UninvertingReader.Type;
import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException;
import org.apache.solr.response.TextResponseWriter; import org.apache.solr.response.TextResponseWriter;
import org.apache.solr.search.DelegatingCollector; import org.apache.solr.search.DelegatingCollector;
@ -45,8 +44,8 @@ import org.apache.solr.search.ExtendedQueryBase;
import org.apache.solr.search.PostFilter; import org.apache.solr.search.PostFilter;
import org.apache.solr.search.QParser; import org.apache.solr.search.QParser;
import org.apache.solr.search.SpatialOptions; import org.apache.solr.search.SpatialOptions;
import org.apache.solr.uninverting.UninvertingReader.Type;
import org.apache.solr.util.SpatialUtils; import org.apache.solr.util.SpatialUtils;
import org.locationtech.spatial4j.context.SpatialContext; import org.locationtech.spatial4j.context.SpatialContext;
import org.locationtech.spatial4j.distance.DistanceUtils; import org.locationtech.spatial4j.distance.DistanceUtils;
import org.locationtech.spatial4j.shape.Point; import org.locationtech.spatial4j.shape.Point;

View File

@ -21,7 +21,6 @@ import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import org.locationtech.spatial4j.distance.DistanceUtils;
import org.apache.lucene.document.FieldType; import org.apache.lucene.document.FieldType;
import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.IndexableField;
import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.ValueSource;
@ -30,13 +29,14 @@ import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.search.SortField; import org.apache.lucene.search.SortField;
import org.apache.lucene.uninverting.UninvertingReader.Type;
import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.MapSolrParams; import org.apache.solr.common.params.MapSolrParams;
import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.params.SolrParams;
import org.apache.solr.response.TextResponseWriter; import org.apache.solr.response.TextResponseWriter;
import org.apache.solr.search.QParser; import org.apache.solr.search.QParser;
import org.apache.solr.search.SpatialOptions; import org.apache.solr.search.SpatialOptions;
import org.apache.solr.uninverting.UninvertingReader.Type;
import org.locationtech.spatial4j.distance.DistanceUtils;
/** /**
* A point type that indexes a point in an n-dimensional space as separate fields and supports range queries. * A point type that indexes a point in an n-dimensional space as separate fields and supports range queries.

View File

@ -33,14 +33,14 @@ import org.apache.lucene.index.IndexableField;
import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.queries.function.valuesource.SortedSetFieldSource; import org.apache.lucene.queries.function.valuesource.SortedSetFieldSource;
import org.apache.lucene.search.SortField; import org.apache.lucene.search.SortField;
import org.apache.lucene.uninverting.UninvertingReader.Type;
import org.apache.lucene.util.AttributeFactory; import org.apache.lucene.util.AttributeFactory;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.AttributeSource.State; import org.apache.lucene.util.AttributeSource.State;
import org.apache.lucene.util.AttributeSource;
import org.apache.solr.analysis.SolrAnalyzer; import org.apache.solr.analysis.SolrAnalyzer;
import org.apache.solr.response.TextResponseWriter; import org.apache.solr.response.TextResponseWriter;
import org.apache.solr.search.QParser; import org.apache.solr.search.QParser;
import org.apache.solr.search.Sorting; import org.apache.solr.search.Sorting;
import org.apache.solr.uninverting.UninvertingReader.Type;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;

View File

@ -20,16 +20,16 @@ import java.io.IOException;
import java.util.Map; import java.util.Map;
import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.ReaderUtil; import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.queries.function.FunctionValues; import org.apache.lucene.queries.function.FunctionValues;
import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.queries.function.docvalues.IntDocValues; import org.apache.lucene.queries.function.docvalues.IntDocValues;
import org.apache.lucene.search.*; import org.apache.lucene.search.*;
import org.apache.lucene.uninverting.UninvertingReader.Type;
import org.apache.solr.response.TextResponseWriter; import org.apache.solr.response.TextResponseWriter;
import org.apache.solr.search.QParser; import org.apache.solr.search.QParser;
import org.apache.solr.uninverting.UninvertingReader.Type;
/** /**
* Utility Field used for random sorting. It should not be passed a value. * Utility Field used for random sorting. It should not be passed a value.

View File

@ -27,10 +27,10 @@ import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.IndexableField;
import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.search.SortField; import org.apache.lucene.search.SortField;
import org.apache.lucene.uninverting.UninvertingReader.Type;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.solr.response.TextResponseWriter; import org.apache.solr.response.TextResponseWriter;
import org.apache.solr.search.QParser; import org.apache.solr.search.QParser;
import org.apache.solr.uninverting.UninvertingReader.Type;
public class StrField extends PrimitiveFieldType { public class StrField extends PrimitiveFieldType {

View File

@ -16,14 +16,16 @@
*/ */
package org.apache.solr.schema; package org.apache.solr.schema;
import java.io.IOException;
import java.util.Map;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.queries.function.valuesource.SortedSetFieldSource; import org.apache.lucene.queries.function.valuesource.SortedSetFieldSource;
import org.apache.lucene.search.*; import org.apache.lucene.search.*;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.uninverting.UninvertingReader.Type;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.QueryBuilder; import org.apache.lucene.util.QueryBuilder;
import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException;
@ -31,9 +33,7 @@ import org.apache.solr.query.SolrRangeQuery;
import org.apache.solr.response.TextResponseWriter; import org.apache.solr.response.TextResponseWriter;
import org.apache.solr.search.QParser; import org.apache.solr.search.QParser;
import org.apache.solr.search.Sorting; import org.apache.solr.search.Sorting;
import org.apache.solr.uninverting.UninvertingReader.Type;
import java.util.Map;
import java.io.IOException;
/** <code>TextField</code> is the basic type for configurable text analysis. /** <code>TextField</code> is the basic type for configurable text analysis.
* Analyzers for field types using this implementation should be defined in the schema. * Analyzers for field types using this implementation should be defined in the schema.

View File

@ -26,8 +26,8 @@ import java.util.List;
import java.util.Locale; import java.util.Locale;
import java.util.Map; import java.util.Map;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.FieldType.LegacyNumericType; import org.apache.lucene.document.FieldType.LegacyNumericType;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.LegacyDoubleField; import org.apache.lucene.document.LegacyDoubleField;
import org.apache.lucene.document.LegacyFloatField; import org.apache.lucene.document.LegacyFloatField;
import org.apache.lucene.document.LegacyIntField; import org.apache.lucene.document.LegacyIntField;
@ -47,7 +47,6 @@ import org.apache.lucene.search.LegacyNumericRangeQuery;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.search.SortField; import org.apache.lucene.search.SortField;
import org.apache.lucene.search.SortedSetSelector; import org.apache.lucene.search.SortedSetSelector;
import org.apache.lucene.uninverting.UninvertingReader.Type;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder; import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.CharsRef; import org.apache.lucene.util.CharsRef;
@ -61,6 +60,7 @@ import org.apache.solr.response.TextResponseWriter;
import org.apache.solr.search.FunctionRangeQuery; import org.apache.solr.search.FunctionRangeQuery;
import org.apache.solr.search.QParser; import org.apache.solr.search.QParser;
import org.apache.solr.search.function.ValueSourceRangeFilter; import org.apache.solr.search.function.ValueSourceRangeFilter;
import org.apache.solr.uninverting.UninvertingReader.Type;
import org.apache.solr.util.DateMathParser; import org.apache.solr.util.DateMathParser;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;

View File

@ -25,15 +25,7 @@ import java.util.Iterator;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import com.carrotsearch.hppc.FloatArrayList;
import com.carrotsearch.hppc.IntArrayList;
import com.carrotsearch.hppc.IntIntHashMap;
import com.carrotsearch.hppc.IntLongHashMap;
import com.carrotsearch.hppc.cursors.IntIntCursor;
import com.carrotsearch.hppc.cursors.IntLongCursor;
import org.apache.commons.lang.StringUtils; import org.apache.commons.lang.StringUtils;
import org.apache.lucene.index.DocValues; import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfo;
@ -49,13 +41,12 @@ import org.apache.lucene.queries.function.FunctionValues;
import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.FieldComparator; import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.search.LeafFieldComparator;
import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.LeafFieldComparator;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer; import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Sort; import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField; import org.apache.lucene.search.SortField;
import org.apache.lucene.uninverting.UninvertingReader;
import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BitSetIterator; import org.apache.lucene.util.BitSetIterator;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
@ -65,8 +56,8 @@ import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.ModifiableSolrParams; import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.NamedList;
import org.apache.solr.handler.component.ResponseBuilder;
import org.apache.solr.handler.component.QueryElevationComponent; import org.apache.solr.handler.component.QueryElevationComponent;
import org.apache.solr.handler.component.ResponseBuilder;
import org.apache.solr.request.LocalSolrQueryRequest; import org.apache.solr.request.LocalSolrQueryRequest;
import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.request.SolrRequestInfo; import org.apache.solr.request.SolrRequestInfo;
@ -75,6 +66,14 @@ import org.apache.solr.schema.StrField;
import org.apache.solr.schema.TrieFloatField; import org.apache.solr.schema.TrieFloatField;
import org.apache.solr.schema.TrieIntField; import org.apache.solr.schema.TrieIntField;
import org.apache.solr.schema.TrieLongField; import org.apache.solr.schema.TrieLongField;
import org.apache.solr.uninverting.UninvertingReader;
import com.carrotsearch.hppc.FloatArrayList;
import com.carrotsearch.hppc.IntArrayList;
import com.carrotsearch.hppc.IntIntHashMap;
import com.carrotsearch.hppc.IntLongHashMap;
import com.carrotsearch.hppc.cursors.IntIntCursor;
import com.carrotsearch.hppc.cursors.IntLongCursor;
/** /**

View File

@ -20,16 +20,16 @@ import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collections; import java.util.Collections;
import org.apache.lucene.index.FilterLeafReader;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.BinaryDocValues; import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.FilterLeafReader;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.uninverting.UninvertingReader; import org.apache.solr.uninverting.UninvertingReader;
/** /**
* Lucene 5.0 removes "accidental" insanity, so you must explicitly * Lucene 5.0 removes "accidental" insanity, so you must explicitly

View File

@ -18,13 +18,12 @@ package org.apache.solr.search;
import java.net.URL; import java.net.URL;
import org.apache.lucene.uninverting.UninvertingReader;
import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap; import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.core.JmxMonitoredMap.JmxAugmentedSolrInfoMBean;
import org.apache.solr.core.SolrCore; import org.apache.solr.core.SolrCore;
import org.apache.solr.core.SolrInfoMBean; import org.apache.solr.core.SolrInfoMBean;
import org.apache.solr.core.JmxMonitoredMap.JmxAugmentedSolrInfoMBean; import org.apache.solr.uninverting.UninvertingReader;
/** /**
* A SolrInfoMBean that provides introspection of the Solr FieldCache * A SolrInfoMBean that provides introspection of the Solr FieldCache

View File

@ -53,7 +53,6 @@ import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiPostingsEnum; import org.apache.lucene.index.MultiPostingsEnum;
import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.SlowCompositeReaderWrapper;
import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.StoredFieldVisitor; import org.apache.lucene.index.StoredFieldVisitor;
@ -94,7 +93,6 @@ import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.search.TotalHitCountCollector; import org.apache.lucene.search.TotalHitCountCollector;
import org.apache.lucene.search.Weight; import org.apache.lucene.search.Weight;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.uninverting.UninvertingReader;
import org.apache.lucene.util.Bits; import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.FixedBitSet;
@ -109,6 +107,7 @@ import org.apache.solr.core.DirectoryFactory.DirContext;
import org.apache.solr.core.SolrConfig; import org.apache.solr.core.SolrConfig;
import org.apache.solr.core.SolrCore; import org.apache.solr.core.SolrCore;
import org.apache.solr.core.SolrInfoMBean; import org.apache.solr.core.SolrInfoMBean;
import org.apache.solr.index.SlowCompositeReaderWrapper;
import org.apache.solr.request.LocalSolrQueryRequest; import org.apache.solr.request.LocalSolrQueryRequest;
import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.request.SolrRequestInfo; import org.apache.solr.request.SolrRequestInfo;
@ -122,6 +121,7 @@ import org.apache.solr.schema.TrieFloatField;
import org.apache.solr.schema.TrieIntField; import org.apache.solr.schema.TrieIntField;
import org.apache.solr.search.facet.UnInvertedField; import org.apache.solr.search.facet.UnInvertedField;
import org.apache.solr.search.stats.StatsSource; import org.apache.solr.search.stats.StatsSource;
import org.apache.solr.uninverting.UninvertingReader;
import org.apache.solr.update.IndexFingerprint; import org.apache.solr.update.IndexFingerprint;
import org.apache.solr.update.SolrIndexConfig; import org.apache.solr.update.SolrIndexConfig;
import org.slf4j.Logger; import org.slf4j.Logger;

View File

@ -27,17 +27,16 @@ import java.util.concurrent.atomic.AtomicLong;
import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.SlowCompositeReaderWrapper;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TermQuery;
import org.apache.lucene.uninverting.DocTermOrds;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRefBuilder; import org.apache.lucene.util.CharsRefBuilder;
import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.FixedBitSet;
import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException;
import org.apache.solr.core.SolrCore; import org.apache.solr.core.SolrCore;
import org.apache.solr.index.SlowCompositeReaderWrapper;
import org.apache.solr.schema.FieldType; import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.TrieField; import org.apache.solr.schema.TrieField;
import org.apache.solr.search.BitDocSet; import org.apache.solr.search.BitDocSet;
@ -45,6 +44,7 @@ import org.apache.solr.search.DocIterator;
import org.apache.solr.search.DocSet; import org.apache.solr.search.DocSet;
import org.apache.solr.search.SolrCache; import org.apache.solr.search.SolrCache;
import org.apache.solr.search.SolrIndexSearcher; import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.uninverting.DocTermOrds;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;

View File

@ -20,13 +20,12 @@ import java.io.IOException;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.DocValues; import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiReader; import org.apache.lucene.index.MultiReader;
import org.apache.lucene.index.ReaderUtil; import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.index.SlowCompositeReaderWrapper;
import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.queries.function.FunctionValues; import org.apache.lucene.queries.function.FunctionValues;
import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.ValueSource;
@ -34,6 +33,7 @@ import org.apache.lucene.queries.function.docvalues.IntDocValues;
import org.apache.lucene.search.SortedSetSelector; import org.apache.lucene.search.SortedSetSelector;
import org.apache.lucene.util.mutable.MutableValue; import org.apache.lucene.util.mutable.MutableValue;
import org.apache.lucene.util.mutable.MutableValueInt; import org.apache.lucene.util.mutable.MutableValueInt;
import org.apache.solr.index.SlowCompositeReaderWrapper;
import org.apache.solr.schema.SchemaField; import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.Insanity; import org.apache.solr.search.Insanity;
import org.apache.solr.search.SolrIndexSearcher; import org.apache.solr.search.SolrIndexSearcher;

View File

@ -20,18 +20,18 @@ import java.io.IOException;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.DocValues; import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiReader; import org.apache.lucene.index.MultiReader;
import org.apache.lucene.index.ReaderUtil; import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.index.SlowCompositeReaderWrapper;
import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.queries.function.FunctionValues; import org.apache.lucene.queries.function.FunctionValues;
import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.queries.function.docvalues.IntDocValues; import org.apache.lucene.queries.function.docvalues.IntDocValues;
import org.apache.lucene.search.SortedSetSelector; import org.apache.lucene.search.SortedSetSelector;
import org.apache.solr.index.SlowCompositeReaderWrapper;
import org.apache.solr.schema.SchemaField; import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.Insanity; import org.apache.solr.search.Insanity;
import org.apache.solr.search.SolrIndexSearcher; import org.apache.solr.search.SolrIndexSearcher;

View File

@ -25,7 +25,6 @@ import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.search.join.JoinUtil; import org.apache.lucene.search.join.JoinUtil;
import org.apache.lucene.search.join.ScoreMode; import org.apache.lucene.search.join.ScoreMode;
import org.apache.lucene.uninverting.UninvertingReader;
import org.apache.solr.cloud.ZkController; import org.apache.solr.cloud.ZkController;
import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException;
import org.apache.solr.common.cloud.Aliases; import org.apache.solr.common.cloud.Aliases;
@ -45,6 +44,7 @@ import org.apache.solr.search.QParser;
import org.apache.solr.search.QParserPlugin; import org.apache.solr.search.QParserPlugin;
import org.apache.solr.search.SolrIndexSearcher; import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.search.SyntaxError; import org.apache.solr.search.SyntaxError;
import org.apache.solr.uninverting.UninvertingReader;
import org.apache.solr.util.RefCounted; import org.apache.solr.util.RefCounted;
/** /**

View File

@ -0,0 +1,887 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.uninverting;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.concurrent.TimeUnit;
import org.apache.lucene.codecs.PostingsFormat; // javadocs
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.PagedBytes;
import org.apache.lucene.util.StringHelper;
/**
* This class enables fast access to multiple term ords for
* a specified field across all docIDs.
*
* Like FieldCache, it uninverts the index and holds a
* packed data structure in RAM to enable fast access.
* Unlike FieldCache, it can handle multi-valued fields,
* and, it does not hold the term bytes in RAM. Rather, you
* must obtain a TermsEnum from the {@link #getOrdTermsEnum}
* method, and then seek-by-ord to get the term's bytes.
*
* While normally term ords are type long, in this API they are
* int as the internal representation here cannot address
* more than MAX_INT unique terms. Also, typically this
* class is used on fields with relatively few unique terms
* vs the number of documents. In addition, there is an
* internal limit (16 MB) on how many bytes each chunk of
* documents may consume. If you trip this limit you'll hit
* an IllegalStateException.
*
* Deleted documents are skipped during uninversion, and if
* you look them up you'll get 0 ords.
*
* The returned per-document ords do not retain their
* original order in the document. Instead they are returned
* in sorted (by ord, ie term's BytesRef comparator) order. They
* are also de-dup'd (ie if doc has same term more than once
* in this field, you'll only get that ord back once).
*
* This class
* will create its own term index internally, allowing to
* create a wrapped TermsEnum that can handle ord. The
* {@link #getOrdTermsEnum} method then provides this
* wrapped enum.
*
* The RAM consumption of this class can be high!
*
* @lucene.experimental
*/
/*
* Final form of the un-inverted field:
* Each document points to a list of term numbers that are contained in that document.
*
* Term numbers are in sorted order, and are encoded as variable-length deltas from the
* previous term number. Real term numbers start at 2 since 0 and 1 are reserved. A
* term number of 0 signals the end of the termNumber list.
*
* There is a single int[maxDoc()] which either contains a pointer into a byte[] for
* the termNumber lists, or directly contains the termNumber list if it fits in the 4
* bytes of an integer. If the first byte in the integer is 1, the next 3 bytes
* are a pointer into a byte[] where the termNumber list starts.
*
* There are actually 256 byte arrays, to compensate for the fact that the pointers
* into the byte arrays are only 3 bytes long. The correct byte array for a document
* is a function of its id.
*
* To save space and speed up faceting, any term that matches enough documents will
* not be un-inverted... it will be skipped while building the un-inverted field structure,
* and will use a set intersection method during faceting.
*
* To further save memory, the terms (the actual string values) are not all stored in
* memory, but a TermIndex is used to convert term numbers to term values only
* for the terms needed after faceting has completed. Only every 128th term value
* is stored, along with its corresponding term number, and this is used as an
* index to find the closest term and iterate until the desired number is hit (very
* much like Lucene's own internal term index).
*
*/
public class DocTermOrds implements Accountable {
// Term ords are shifted by this, internally, to reserve
// values 0 (end term) and 1 (index is a pointer into byte array)
private final static int TNUM_OFFSET = 2;
/** Every 128th term is indexed, by default. */
public final static int DEFAULT_INDEX_INTERVAL_BITS = 7; // decrease to a low number like 2 for testing
private int indexIntervalBits;
private int indexIntervalMask;
private int indexInterval;
/** Don't uninvert terms that exceed this count. */
protected final int maxTermDocFreq;
/** Field we are uninverting. */
protected final String field;
/** Number of terms in the field. */
protected int numTermsInField;
/** Total number of references to term numbers. */
protected long termInstances;
private long memsz;
/** Total time to uninvert the field. */
protected int total_time;
/** Time for phase1 of the uninvert process. */
protected int phase1_time;
/** Holds the per-document ords or a pointer to the ords. */
protected int[] index;
/** Holds term ords for documents. */
protected byte[][] tnums = new byte[256][];
/** Total bytes (sum of term lengths) for all indexed terms.*/
protected long sizeOfIndexedStrings;
/** Holds the indexed (by default every 128th) terms. */
protected BytesRef[] indexedTermsArray = new BytesRef[0];
/** If non-null, only terms matching this prefix were
* indexed. */
protected BytesRef prefix;
/** Ordinal of the first term in the field, or 0 if the
* {@link PostingsFormat} does not implement {@link
* TermsEnum#ord}. */
protected int ordBase;
/** Used while uninverting. */
protected PostingsEnum postingsEnum;
/** If true, check and throw an exception if the field has docValues enabled.
* Normally, docValues should be used in preference to DocTermOrds. */
protected boolean checkForDocValues = true;
/** Returns total bytes used. */
public long ramBytesUsed() {
// can cache the mem size since it shouldn't change
if (memsz!=0) return memsz;
long sz = 8*8 + 32; // local fields
if (index != null) sz += index.length * 4;
if (tnums!=null) {
for (byte[] arr : tnums)
if (arr != null) sz += arr.length;
}
memsz = sz;
return sz;
}
/** Inverts all terms */
public DocTermOrds(LeafReader reader, Bits liveDocs, String field) throws IOException {
this(reader, liveDocs, field, null, Integer.MAX_VALUE);
}
// TODO: instead of all these ctors and options, take termsenum!
/** Inverts only terms starting w/ prefix */
public DocTermOrds(LeafReader reader, Bits liveDocs, String field, BytesRef termPrefix) throws IOException {
this(reader, liveDocs, field, termPrefix, Integer.MAX_VALUE);
}
/** Inverts only terms starting w/ prefix, and only terms
* whose docFreq (not taking deletions into account) is
* &lt;= maxTermDocFreq */
public DocTermOrds(LeafReader reader, Bits liveDocs, String field, BytesRef termPrefix, int maxTermDocFreq) throws IOException {
this(reader, liveDocs, field, termPrefix, maxTermDocFreq, DEFAULT_INDEX_INTERVAL_BITS);
}
/** Inverts only terms starting w/ prefix, and only terms
* whose docFreq (not taking deletions into account) is
* &lt;= maxTermDocFreq, with a custom indexing interval
* (default is every 128nd term). */
public DocTermOrds(LeafReader reader, Bits liveDocs, String field, BytesRef termPrefix, int maxTermDocFreq, int indexIntervalBits) throws IOException {
this(field, maxTermDocFreq, indexIntervalBits);
uninvert(reader, liveDocs, termPrefix);
}
/** Subclass inits w/ this, but be sure you then call
* uninvert, only once */
protected DocTermOrds(String field, int maxTermDocFreq, int indexIntervalBits) {
//System.out.println("DTO init field=" + field + " maxTDFreq=" + maxTermDocFreq);
this.field = field;
this.maxTermDocFreq = maxTermDocFreq;
this.indexIntervalBits = indexIntervalBits;
indexIntervalMask = 0xffffffff >>> (32-indexIntervalBits);
indexInterval = 1 << indexIntervalBits;
}
/**
* Returns a TermsEnum that implements ord, or null if no terms in field.
* <p>
* we build a "private" terms
* index internally (WARNING: consumes RAM) and use that
* index to implement ord. This also enables ord on top
* of a composite reader. The returned TermsEnum is
* unpositioned. This returns null if there are no terms.
* </p>
* <p><b>NOTE</b>: you must pass the same reader that was
* used when creating this class
*/
public TermsEnum getOrdTermsEnum(LeafReader reader) throws IOException {
// NOTE: see LUCENE-6529 before attempting to optimize this method to
// return a TermsEnum directly from the reader if it already supports ord().
assert null != indexedTermsArray;
if (0 == indexedTermsArray.length) {
return null;
} else {
return new OrdWrappedTermsEnum(reader);
}
}
/**
* Returns the number of terms in this field
*/
public int numTerms() {
return numTermsInField;
}
/**
* Returns {@code true} if no terms were indexed.
*/
public boolean isEmpty() {
return index == null;
}
/** Subclass can override this */
protected void visitTerm(TermsEnum te, int termNum) throws IOException {
}
/** Invoked during {@link #uninvert(org.apache.lucene.index.LeafReader,Bits,BytesRef)}
* to record the document frequency for each uninverted
* term. */
protected void setActualDocFreq(int termNum, int df) throws IOException {
}
/** Call this only once (if you subclass!) */
protected void uninvert(final LeafReader reader, Bits liveDocs, final BytesRef termPrefix) throws IOException {
final FieldInfo info = reader.getFieldInfos().fieldInfo(field);
if (checkForDocValues && info != null && info.getDocValuesType() != DocValuesType.NONE) {
throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType());
}
//System.out.println("DTO uninvert field=" + field + " prefix=" + termPrefix);
final long startTime = System.nanoTime();
prefix = termPrefix == null ? null : BytesRef.deepCopyOf(termPrefix);
final int maxDoc = reader.maxDoc();
final int[] index = new int[maxDoc]; // immediate term numbers, or the index into the byte[] representing the last number
final int[] lastTerm = new int[maxDoc]; // last term we saw for this document
final byte[][] bytes = new byte[maxDoc][]; // list of term numbers for the doc (delta encoded vInts)
final Terms terms = reader.terms(field);
if (terms == null) {
// No terms
return;
}
final TermsEnum te = terms.iterator();
final BytesRef seekStart = termPrefix != null ? termPrefix : new BytesRef();
//System.out.println("seekStart=" + seekStart.utf8ToString());
if (te.seekCeil(seekStart) == TermsEnum.SeekStatus.END) {
// No terms match
return;
}
// For our "term index wrapper"
final List<BytesRef> indexedTerms = new ArrayList<>();
final PagedBytes indexedTermsBytes = new PagedBytes(15);
// we need a minimum of 9 bytes, but round up to 12 since the space would
// be wasted with most allocators anyway.
byte[] tempArr = new byte[12];
//
// enumerate all terms, and build an intermediate form of the un-inverted field.
//
// During this intermediate form, every document has a (potential) byte[]
// and the int[maxDoc()] array either contains the termNumber list directly
// or the *end* offset of the termNumber list in its byte array (for faster
// appending and faster creation of the final form).
//
// idea... if things are too large while building, we could do a range of docs
// at a time (but it would be a fair amount slower to build)
// could also do ranges in parallel to take advantage of multiple CPUs
// OPTIONAL: remap the largest df terms to the lowest 128 (single byte)
// values. This requires going over the field first to find the most
// frequent terms ahead of time.
int termNum = 0;
postingsEnum = null;
// Loop begins with te positioned to first term (we call
// seek above):
for (;;) {
final BytesRef t = te.term();
if (t == null || (termPrefix != null && !StringHelper.startsWith(t, termPrefix))) {
break;
}
//System.out.println("visit term=" + t.utf8ToString() + " " + t + " termNum=" + termNum);
visitTerm(te, termNum);
if ((termNum & indexIntervalMask) == 0) {
// Index this term
sizeOfIndexedStrings += t.length;
BytesRef indexedTerm = new BytesRef();
indexedTermsBytes.copy(t, indexedTerm);
// TODO: really should 1) strip off useless suffix,
// and 2) use FST not array/PagedBytes
indexedTerms.add(indexedTerm);
}
final int df = te.docFreq();
if (df <= maxTermDocFreq) {
postingsEnum = te.postings(postingsEnum, PostingsEnum.NONE);
// dF, but takes deletions into account
int actualDF = 0;
for (;;) {
int doc = postingsEnum.nextDoc();
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
break;
}
//System.out.println(" chunk=" + chunk + " docs");
actualDF ++;
termInstances++;
//System.out.println(" docID=" + doc);
// add TNUM_OFFSET to the term number to make room for special reserved values:
// 0 (end term) and 1 (index into byte array follows)
int delta = termNum - lastTerm[doc] + TNUM_OFFSET;
lastTerm[doc] = termNum;
int val = index[doc];
if ((val & 0xff)==1) {
// index into byte array (actually the end of
// the doc-specific byte[] when building)
int pos = val >>> 8;
int ilen = vIntSize(delta);
byte[] arr = bytes[doc];
int newend = pos+ilen;
if (newend > arr.length) {
// We avoid a doubling strategy to lower memory usage.
// this faceting method isn't for docs with many terms.
// In hotspot, objects have 2 words of overhead, then fields, rounded up to a 64-bit boundary.
// TODO: figure out what array lengths we can round up to w/o actually using more memory
// (how much space does a byte[] take up? Is data preceded by a 32 bit length only?
// It should be safe to round up to the nearest 32 bits in any case.
int newLen = (newend + 3) & 0xfffffffc; // 4 byte alignment
byte[] newarr = new byte[newLen];
System.arraycopy(arr, 0, newarr, 0, pos);
arr = newarr;
bytes[doc] = newarr;
}
pos = writeInt(delta, arr, pos);
index[doc] = (pos<<8) | 1; // update pointer to end index in byte[]
} else {
// OK, this int has data in it... find the end (a zero starting byte - not
// part of another number, hence not following a byte with the high bit set).
int ipos;
if (val==0) {
ipos=0;
} else if ((val & 0x0000ff80)==0) {
ipos=1;
} else if ((val & 0x00ff8000)==0) {
ipos=2;
} else if ((val & 0xff800000)==0) {
ipos=3;
} else {
ipos=4;
}
//System.out.println(" ipos=" + ipos);
int endPos = writeInt(delta, tempArr, ipos);
//System.out.println(" endpos=" + endPos);
if (endPos <= 4) {
//System.out.println(" fits!");
// value will fit in the integer... move bytes back
for (int j=ipos; j<endPos; j++) {
val |= (tempArr[j] & 0xff) << (j<<3);
}
index[doc] = val;
} else {
// value won't fit... move integer into byte[]
for (int j=0; j<ipos; j++) {
tempArr[j] = (byte)val;
val >>>=8;
}
// point at the end index in the byte[]
index[doc] = (endPos<<8) | 1;
bytes[doc] = tempArr;
tempArr = new byte[12];
}
}
}
setActualDocFreq(termNum, actualDF);
}
termNum++;
if (te.next() == null) {
break;
}
}
numTermsInField = termNum;
long midPoint = System.nanoTime();
if (termInstances == 0) {
// we didn't invert anything
// lower memory consumption.
tnums = null;
} else {
this.index = index;
//
// transform intermediate form into the final form, building a single byte[]
// at a time, and releasing the intermediate byte[]s as we go to avoid
// increasing the memory footprint.
//
for (int pass = 0; pass<256; pass++) {
byte[] target = tnums[pass];
int pos=0; // end in target;
if (target != null) {
pos = target.length;
} else {
target = new byte[4096];
}
// loop over documents, 0x00ppxxxx, 0x01ppxxxx, 0x02ppxxxx
// where pp is the pass (which array we are building), and xx is all values.
// each pass shares the same byte[] for termNumber lists.
for (int docbase = pass<<16; docbase<maxDoc; docbase+=(1<<24)) {
int lim = Math.min(docbase + (1<<16), maxDoc);
for (int doc=docbase; doc<lim; doc++) {
//System.out.println(" pass=" + pass + " process docID=" + doc);
int val = index[doc];
if ((val&0xff) == 1) {
int len = val >>> 8;
//System.out.println(" ptr pos=" + pos);
index[doc] = (pos<<8)|1; // change index to point to start of array
if ((pos & 0xff000000) != 0) {
// we only have 24 bits for the array index
throw new IllegalStateException("Too many values for UnInvertedField faceting on field "+field);
}
byte[] arr = bytes[doc];
/*
for(byte b : arr) {
//System.out.println(" b=" + Integer.toHexString((int) b));
}
*/
bytes[doc] = null; // IMPORTANT: allow GC to avoid OOM
if (target.length <= pos + len) {
int newlen = target.length;
/*** we don't have to worry about the array getting too large
* since the "pos" param will overflow first (only 24 bits available)
if ((newlen<<1) <= 0) {
// overflow...
newlen = Integer.MAX_VALUE;
if (newlen <= pos + len) {
throw new SolrException(400,"Too many terms to uninvert field!");
}
} else {
while (newlen <= pos + len) newlen<<=1; // doubling strategy
}
****/
while (newlen <= pos + len) newlen<<=1; // doubling strategy
byte[] newtarget = new byte[newlen];
System.arraycopy(target, 0, newtarget, 0, pos);
target = newtarget;
}
System.arraycopy(arr, 0, target, pos, len);
pos += len + 1; // skip single byte at end and leave it 0 for terminator
}
}
}
// shrink array
if (pos < target.length) {
byte[] newtarget = new byte[pos];
System.arraycopy(target, 0, newtarget, 0, pos);
target = newtarget;
}
tnums[pass] = target;
if ((pass << 16) > maxDoc)
break;
}
}
indexedTermsArray = indexedTerms.toArray(new BytesRef[indexedTerms.size()]);
long endTime = System.nanoTime();
total_time = (int) TimeUnit.MILLISECONDS.convert(endTime-startTime, TimeUnit.NANOSECONDS);
phase1_time = (int) TimeUnit.MILLISECONDS.convert(midPoint-startTime, TimeUnit.NANOSECONDS);
}
/** Number of bytes to represent an unsigned int as a vint. */
private static int vIntSize(int x) {
if ((x & (0xffffffff << (7*1))) == 0 ) {
return 1;
}
if ((x & (0xffffffff << (7*2))) == 0 ) {
return 2;
}
if ((x & (0xffffffff << (7*3))) == 0 ) {
return 3;
}
if ((x & (0xffffffff << (7*4))) == 0 ) {
return 4;
}
return 5;
}
// todo: if we know the size of the vInt already, we could do
// a single switch on the size
private static int writeInt(int x, byte[] arr, int pos) {
int a;
a = (x >>> (7*4));
if (a != 0) {
arr[pos++] = (byte)(a | 0x80);
}
a = (x >>> (7*3));
if (a != 0) {
arr[pos++] = (byte)(a | 0x80);
}
a = (x >>> (7*2));
if (a != 0) {
arr[pos++] = (byte)(a | 0x80);
}
a = (x >>> (7*1));
if (a != 0) {
arr[pos++] = (byte)(a | 0x80);
}
arr[pos++] = (byte)(x & 0x7f);
return pos;
}
/**
* "wrap" our own terms index around the original IndexReader.
* Only valid if there are terms for this field rom the original reader
*/
private final class OrdWrappedTermsEnum extends TermsEnum {
private final TermsEnum termsEnum;
private BytesRef term;
private long ord = -indexInterval-1; // force "real" seek
public OrdWrappedTermsEnum(LeafReader reader) throws IOException {
assert indexedTermsArray != null;
assert 0 != indexedTermsArray.length;
termsEnum = reader.fields().terms(field).iterator();
}
@Override
public PostingsEnum postings(PostingsEnum reuse, int flags) throws IOException {
return termsEnum.postings(reuse, flags);
}
@Override
public BytesRef term() {
return term;
}
@Override
public BytesRef next() throws IOException {
if (++ord < 0) {
ord = 0;
}
if (termsEnum.next() == null) {
term = null;
return null;
}
return setTerm(); // this is extra work if we know we are in bounds...
}
@Override
public int docFreq() throws IOException {
return termsEnum.docFreq();
}
@Override
public long totalTermFreq() throws IOException {
return termsEnum.totalTermFreq();
}
@Override
public long ord() {
return ordBase + ord;
}
@Override
public SeekStatus seekCeil(BytesRef target) throws IOException {
// already here
if (term != null && term.equals(target)) {
return SeekStatus.FOUND;
}
int startIdx = Arrays.binarySearch(indexedTermsArray, target);
if (startIdx >= 0) {
// we hit the term exactly... lucky us!
TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(target);
assert seekStatus == TermsEnum.SeekStatus.FOUND;
ord = startIdx << indexIntervalBits;
setTerm();
assert term != null;
return SeekStatus.FOUND;
}
// we didn't hit the term exactly
startIdx = -startIdx-1;
if (startIdx == 0) {
// our target occurs *before* the first term
TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(target);
assert seekStatus == TermsEnum.SeekStatus.NOT_FOUND;
ord = 0;
setTerm();
assert term != null;
return SeekStatus.NOT_FOUND;
}
// back up to the start of the block
startIdx--;
if ((ord >> indexIntervalBits) == startIdx && term != null && term.compareTo(target) <= 0) {
// we are already in the right block and the current term is before the term we want,
// so we don't need to seek.
} else {
// seek to the right block
TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(indexedTermsArray[startIdx]);
assert seekStatus == TermsEnum.SeekStatus.FOUND;
ord = startIdx << indexIntervalBits;
setTerm();
assert term != null; // should be non-null since it's in the index
}
while (term != null && term.compareTo(target) < 0) {
next();
}
if (term == null) {
return SeekStatus.END;
} else if (term.compareTo(target) == 0) {
return SeekStatus.FOUND;
} else {
return SeekStatus.NOT_FOUND;
}
}
@Override
public void seekExact(long targetOrd) throws IOException {
int delta = (int) (targetOrd - ordBase - ord);
//System.out.println(" seek(ord) targetOrd=" + targetOrd + " delta=" + delta + " ord=" + ord + " ii=" + indexInterval);
if (delta < 0 || delta > indexInterval) {
final int idx = (int) (targetOrd >>> indexIntervalBits);
final BytesRef base = indexedTermsArray[idx];
//System.out.println(" do seek term=" + base.utf8ToString());
ord = idx << indexIntervalBits;
delta = (int) (targetOrd - ord);
final TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(base);
assert seekStatus == TermsEnum.SeekStatus.FOUND;
} else {
//System.out.println("seek w/in block");
}
while (--delta >= 0) {
BytesRef br = termsEnum.next();
if (br == null) {
assert false;
return;
}
ord++;
}
setTerm();
assert term != null;
}
private BytesRef setTerm() throws IOException {
term = termsEnum.term();
//System.out.println(" setTerm() term=" + term.utf8ToString() + " vs prefix=" + (prefix == null ? "null" : prefix.utf8ToString()));
if (prefix != null && !StringHelper.startsWith(term, prefix)) {
term = null;
}
return term;
}
}
/** Returns the term ({@link BytesRef}) corresponding to
* the provided ordinal. */
public BytesRef lookupTerm(TermsEnum termsEnum, int ord) throws IOException {
termsEnum.seekExact(ord);
return termsEnum.term();
}
/** Returns a SortedSetDocValues view of this instance */
public SortedSetDocValues iterator(LeafReader reader) throws IOException {
if (isEmpty()) {
return DocValues.emptySortedSet();
} else {
return new Iterator(reader);
}
}
private class Iterator extends SortedSetDocValues {
final LeafReader reader;
final TermsEnum te; // used internally for lookupOrd() and lookupTerm()
// currently we read 5 at a time (using the logic of the old iterator)
final int buffer[] = new int[5];
int bufferUpto;
int bufferLength;
private int tnum;
private int upto;
private byte[] arr;
Iterator(LeafReader reader) throws IOException {
this.reader = reader;
this.te = termsEnum();
}
@Override
public long nextOrd() {
while (bufferUpto == bufferLength) {
if (bufferLength < buffer.length) {
return NO_MORE_ORDS;
} else {
bufferLength = read(buffer);
bufferUpto = 0;
}
}
return buffer[bufferUpto++];
}
/** Buffer must be at least 5 ints long. Returns number
* of term ords placed into buffer; if this count is
* less than buffer.length then that is the end. */
int read(int[] buffer) {
int bufferUpto = 0;
if (arr == null) {
// code is inlined into upto
//System.out.println("inlined");
int code = upto;
int delta = 0;
for (;;) {
delta = (delta << 7) | (code & 0x7f);
if ((code & 0x80)==0) {
if (delta==0) break;
tnum += delta - TNUM_OFFSET;
buffer[bufferUpto++] = ordBase+tnum;
//System.out.println(" tnum=" + tnum);
delta = 0;
}
code >>>= 8;
}
} else {
// code is a pointer
for(;;) {
int delta = 0;
for(;;) {
byte b = arr[upto++];
delta = (delta << 7) | (b & 0x7f);
//System.out.println(" cycle: upto=" + upto + " delta=" + delta + " b=" + b);
if ((b & 0x80) == 0) break;
}
//System.out.println(" delta=" + delta);
if (delta == 0) break;
tnum += delta - TNUM_OFFSET;
//System.out.println(" tnum=" + tnum);
buffer[bufferUpto++] = ordBase+tnum;
if (bufferUpto == buffer.length) {
break;
}
}
}
return bufferUpto;
}
@Override
public void setDocument(int docID) {
tnum = 0;
final int code = index[docID];
if ((code & 0xff)==1) {
// a pointer
upto = code>>>8;
//System.out.println(" pointer! upto=" + upto);
int whichArray = (docID >>> 16) & 0xff;
arr = tnums[whichArray];
} else {
//System.out.println(" inline!");
arr = null;
upto = code;
}
bufferUpto = 0;
bufferLength = read(buffer);
}
@Override
public BytesRef lookupOrd(long ord) {
try {
return DocTermOrds.this.lookupTerm(te, (int) ord);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
@Override
public long getValueCount() {
return numTerms();
}
@Override
public long lookupTerm(BytesRef key) {
try {
switch (te.seekCeil(key)) {
case FOUND:
assert te.ord() >= 0;
return te.ord();
case NOT_FOUND:
assert te.ord() >= 0;
return -te.ord()-1;
default: /* END */
return -numTerms()-1;
}
} catch (IOException e) {
throw new RuntimeException(e);
}
}
@Override
public TermsEnum termsEnum() {
try {
return getOrdTermsEnum(reader);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
}
}

View File

@ -0,0 +1,466 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.uninverting;
import java.io.IOException;
import java.io.PrintStream;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.IndexReader; // javadocs
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LegacyNumericUtils;
import org.apache.lucene.util.NumericUtils;
import org.apache.lucene.util.RamUsageEstimator;
/**
* Expert: Maintains caches of term values.
*
* <p>Created: May 19, 2004 11:13:14 AM
*
* @since lucene 1.4
* @see FieldCacheSanityChecker
*
* @lucene.internal
*/
interface FieldCache {
/**
* Placeholder indicating creation of this cache is currently in-progress.
*/
public static final class CreationPlaceholder implements Accountable {
Accountable value;
@Override
public long ramBytesUsed() {
// don't call on the in-progress value, might make things angry.
return RamUsageEstimator.NUM_BYTES_OBJECT_REF;
}
}
/**
* interface to all parsers. It is used to parse different numeric types.
*/
public interface Parser {
/**
* Pulls a {@link TermsEnum} from the given {@link Terms}. This method allows certain parsers
* to filter the actual TermsEnum before the field cache is filled.
*
* @param terms the {@link Terms} instance to create the {@link TermsEnum} from.
* @return a possibly filtered {@link TermsEnum} instance, this method must not return <code>null</code>.
* @throws IOException if an {@link IOException} occurs
* @deprecated index with Points instead
*/
@Deprecated
public TermsEnum termsEnum(Terms terms) throws IOException;
/** Parse's this field's value */
public long parseValue(BytesRef term);
}
/**
* Base class for points parsers. These parsers do not use the inverted index, but instead
* uninvert point data.
*
* This abstraction can be cleaned up when Parser.termsEnum is removed.
*/
public abstract class PointParser implements Parser {
public final TermsEnum termsEnum(Terms terms) throws IOException {
throw new UnsupportedOperationException("makes no sense for parsing points");
}
}
/** Expert: The cache used internally by sorting and range query classes. */
public static FieldCache DEFAULT = new FieldCacheImpl();
/**
* A parser instance for int values encoded by {@link org.apache.lucene.util.NumericUtils}, e.g. when indexed
* via {@link org.apache.lucene.document.IntPoint}.
*/
public static final Parser INT_POINT_PARSER = new PointParser() {
@Override
public long parseValue(BytesRef point) {
return NumericUtils.sortableBytesToInt(point.bytes, point.offset);
}
@Override
public String toString() {
return FieldCache.class.getName()+".INT_POINT_PARSER";
}
};
/**
* A parser instance for long values encoded by {@link org.apache.lucene.util.NumericUtils}, e.g. when indexed
* via {@link org.apache.lucene.document.LongPoint}.
*/
public static final Parser LONG_POINT_PARSER = new PointParser() {
@Override
public long parseValue(BytesRef point) {
return NumericUtils.sortableBytesToLong(point.bytes, point.offset);
}
@Override
public String toString() {
return FieldCache.class.getName()+".LONG_POINT_PARSER";
}
};
/**
* A parser instance for float values encoded by {@link org.apache.lucene.util.NumericUtils}, e.g. when indexed
* via {@link org.apache.lucene.document.FloatPoint}.
*/
public static final Parser FLOAT_POINT_PARSER = new PointParser() {
@Override
public long parseValue(BytesRef point) {
return NumericUtils.sortableFloatBits(NumericUtils.sortableBytesToInt(point.bytes, point.offset));
}
@Override
public String toString() {
return FieldCache.class.getName()+".FLOAT_POINT_PARSER";
}
};
/**
* A parser instance for double values encoded by {@link org.apache.lucene.util.NumericUtils}, e.g. when indexed
* via {@link org.apache.lucene.document.DoublePoint}.
*/
public static final Parser DOUBLE_POINT_PARSER = new PointParser() {
@Override
public long parseValue(BytesRef point) {
return NumericUtils.sortableDoubleBits(NumericUtils.sortableBytesToLong(point.bytes, point.offset));
}
@Override
public String toString() {
return FieldCache.class.getName()+".DOUBLE_POINT_PARSER";
}
};
/**
* A parser instance for int values encoded by {@link org.apache.lucene.util.LegacyNumericUtils}, e.g. when indexed
* via {@link org.apache.lucene.document.LegacyIntField}/{@link org.apache.lucene.analysis.LegacyNumericTokenStream}.
* @deprecated Index with points and use {@link #INT_POINT_PARSER} instead.
*/
@Deprecated
public static final Parser LEGACY_INT_PARSER = new Parser() {
@Override
public long parseValue(BytesRef term) {
return LegacyNumericUtils.prefixCodedToInt(term);
}
@Override
public TermsEnum termsEnum(Terms terms) throws IOException {
return LegacyNumericUtils.filterPrefixCodedInts(terms.iterator());
}
@Override
public String toString() {
return FieldCache.class.getName()+".LEGACY_INT_PARSER";
}
};
/**
* A parser instance for float values encoded with {@link org.apache.lucene.util.LegacyNumericUtils}, e.g. when indexed
* via {@link org.apache.lucene.document.LegacyFloatField}/{@link org.apache.lucene.analysis.LegacyNumericTokenStream}.
* @deprecated Index with points and use {@link #FLOAT_POINT_PARSER} instead.
*/
@Deprecated
public static final Parser LEGACY_FLOAT_PARSER = new Parser() {
@Override
public long parseValue(BytesRef term) {
int val = LegacyNumericUtils.prefixCodedToInt(term);
if (val<0) val ^= 0x7fffffff;
return val;
}
@Override
public String toString() {
return FieldCache.class.getName()+".LEGACY_FLOAT_PARSER";
}
@Override
public TermsEnum termsEnum(Terms terms) throws IOException {
return LegacyNumericUtils.filterPrefixCodedInts(terms.iterator());
}
};
/**
* A parser instance for long values encoded by {@link org.apache.lucene.util.LegacyNumericUtils}, e.g. when indexed
* via {@link org.apache.lucene.document.LegacyLongField}/{@link org.apache.lucene.analysis.LegacyNumericTokenStream}.
* @deprecated Index with points and use {@link #LONG_POINT_PARSER} instead.
*/
@Deprecated
public static final Parser LEGACY_LONG_PARSER = new Parser() {
@Override
public long parseValue(BytesRef term) {
return LegacyNumericUtils.prefixCodedToLong(term);
}
@Override
public String toString() {
return FieldCache.class.getName()+".LEGACY_LONG_PARSER";
}
@Override
public TermsEnum termsEnum(Terms terms) throws IOException {
return LegacyNumericUtils.filterPrefixCodedLongs(terms.iterator());
}
};
/**
* A parser instance for double values encoded with {@link org.apache.lucene.util.LegacyNumericUtils}, e.g. when indexed
* via {@link org.apache.lucene.document.LegacyDoubleField}/{@link org.apache.lucene.analysis.LegacyNumericTokenStream}.
* @deprecated Index with points and use {@link #DOUBLE_POINT_PARSER} instead.
*/
@Deprecated
public static final Parser LEGACY_DOUBLE_PARSER = new Parser() {
@Override
public long parseValue(BytesRef term) {
long val = LegacyNumericUtils.prefixCodedToLong(term);
if (val<0) val ^= 0x7fffffffffffffffL;
return val;
}
@Override
public String toString() {
return FieldCache.class.getName()+".LEGACY_DOUBLE_PARSER";
}
@Override
public TermsEnum termsEnum(Terms terms) throws IOException {
return LegacyNumericUtils.filterPrefixCodedLongs(terms.iterator());
}
};
/** Checks the internal cache for an appropriate entry, and if none is found,
* reads the terms/points in <code>field</code> and returns a bit set at the size of
* <code>reader.maxDoc()</code>, with turned on bits for each docid that
* does have a value for this field.
* @param parser May be {@code null} if coming from the inverted index, otherwise
* can be a {@link PointParser} to compute from point values.
*/
public Bits getDocsWithField(LeafReader reader, String field, Parser parser) throws IOException;
/**
* Returns a {@link NumericDocValues} over the values found in documents in the given
* field. If the field was indexed as {@link NumericDocValuesField}, it simply
* uses {@link org.apache.lucene.index.LeafReader#getNumericDocValues(String)} to read the values.
* Otherwise, it checks the internal cache for an appropriate entry, and if
* none is found, reads the terms/points in <code>field</code> as longs and returns
* an array of size <code>reader.maxDoc()</code> of the value each document
* has in the given field.
*
* @param reader
* Used to get field values.
* @param field
* Which field contains the longs.
* @param parser
* Computes long for string values. May be {@code null} if the
* requested field was indexed as {@link NumericDocValuesField} or
* {@link org.apache.lucene.document.LegacyLongField}.
* @param setDocsWithField
* If true then {@link #getDocsWithField} will also be computed and
* stored in the FieldCache.
* @return The values in the given field for each document.
* @throws IOException
* If any error occurs.
*/
public NumericDocValues getNumerics(LeafReader reader, String field, Parser parser, boolean setDocsWithField) throws IOException;
/** Checks the internal cache for an appropriate entry, and if none
* is found, reads the term values in <code>field</code>
* and returns a {@link BinaryDocValues} instance, providing a
* method to retrieve the term (as a BytesRef) per document.
* @param reader Used to get field values.
* @param field Which field contains the strings.
* @param setDocsWithField If true then {@link #getDocsWithField} will
* also be computed and stored in the FieldCache.
* @return The values in the given field for each document.
* @throws IOException If any error occurs.
*/
public BinaryDocValues getTerms(LeafReader reader, String field, boolean setDocsWithField) throws IOException;
/** Expert: just like {@link #getTerms(org.apache.lucene.index.LeafReader,String,boolean)},
* but you can specify whether more RAM should be consumed in exchange for
* faster lookups (default is "true"). Note that the
* first call for a given reader and field "wins",
* subsequent calls will share the same cache entry. */
public BinaryDocValues getTerms(LeafReader reader, String field, boolean setDocsWithField, float acceptableOverheadRatio) throws IOException;
/** Checks the internal cache for an appropriate entry, and if none
* is found, reads the term values in <code>field</code>
* and returns a {@link SortedDocValues} instance,
* providing methods to retrieve sort ordinals and terms
* (as a ByteRef) per document.
* @param reader Used to get field values.
* @param field Which field contains the strings.
* @return The values in the given field for each document.
* @throws IOException If any error occurs.
*/
public SortedDocValues getTermsIndex(LeafReader reader, String field) throws IOException;
/** Expert: just like {@link
* #getTermsIndex(org.apache.lucene.index.LeafReader,String)}, but you can specify
* whether more RAM should be consumed in exchange for
* faster lookups (default is "true"). Note that the
* first call for a given reader and field "wins",
* subsequent calls will share the same cache entry. */
public SortedDocValues getTermsIndex(LeafReader reader, String field, float acceptableOverheadRatio) throws IOException;
/** Can be passed to {@link #getDocTermOrds} to filter for 32-bit numeric terms */
public static final BytesRef INT32_TERM_PREFIX = new BytesRef(new byte[] { LegacyNumericUtils.SHIFT_START_INT });
/** Can be passed to {@link #getDocTermOrds} to filter for 64-bit numeric terms */
public static final BytesRef INT64_TERM_PREFIX = new BytesRef(new byte[] { LegacyNumericUtils.SHIFT_START_LONG });
/**
* Checks the internal cache for an appropriate entry, and if none is found, reads the term values
* in <code>field</code> and returns a {@link DocTermOrds} instance, providing a method to retrieve
* the terms (as ords) per document.
*
* @param reader Used to build a {@link DocTermOrds} instance
* @param field Which field contains the strings.
* @param prefix prefix for a subset of the terms which should be uninverted. Can be null or
* {@link #INT32_TERM_PREFIX} or {@link #INT64_TERM_PREFIX}
*
* @return a {@link DocTermOrds} instance
* @throws IOException If any error occurs.
*/
public SortedSetDocValues getDocTermOrds(LeafReader reader, String field, BytesRef prefix) throws IOException;
/**
* EXPERT: A unique Identifier/Description for each item in the FieldCache.
* Can be useful for logging/debugging.
* @lucene.experimental
*/
public final class CacheEntry {
private final Object readerKey;
private final String fieldName;
private final Class<?> cacheType;
private final Object custom;
private final Accountable value;
public CacheEntry(Object readerKey, String fieldName,
Class<?> cacheType,
Object custom,
Accountable value) {
this.readerKey = readerKey;
this.fieldName = fieldName;
this.cacheType = cacheType;
this.custom = custom;
this.value = value;
}
public Object getReaderKey() {
return readerKey;
}
public String getFieldName() {
return fieldName;
}
public Class<?> getCacheType() {
return cacheType;
}
public Object getCustom() {
return custom;
}
public Object getValue() {
return value;
}
/**
* The most recently estimated size of the value, null unless
* estimateSize has been called.
*/
public String getEstimatedSize() {
long bytesUsed = value == null ? 0L : value.ramBytesUsed();
return RamUsageEstimator.humanReadableUnits(bytesUsed);
}
@Override
public String toString() {
StringBuilder b = new StringBuilder(250);
b.append("'").append(getReaderKey()).append("'=>");
b.append("'").append(getFieldName()).append("',");
b.append(getCacheType()).append(",").append(getCustom());
b.append("=>").append(getValue().getClass().getName()).append("#");
b.append(System.identityHashCode(getValue()));
String s = getEstimatedSize();
b.append(" (size =~ ").append(s).append(')');
return b.toString();
}
}
/**
* EXPERT: Generates an array of CacheEntry objects representing all items
* currently in the FieldCache.
* <p>
* NOTE: These CacheEntry objects maintain a strong reference to the
* Cached Values. Maintaining references to a CacheEntry the AtomicIndexReader
* associated with it has garbage collected will prevent the Value itself
* from being garbage collected when the Cache drops the WeakReference.
* </p>
* @lucene.experimental
*/
public CacheEntry[] getCacheEntries();
/**
* <p>
* EXPERT: Instructs the FieldCache to forcibly expunge all entries
* from the underlying caches. This is intended only to be used for
* test methods as a way to ensure a known base state of the Cache
* (with out needing to rely on GC to free WeakReferences).
* It should not be relied on for "Cache maintenance" in general
* application code.
* </p>
* @lucene.experimental
*/
public void purgeAllCaches();
/**
* Expert: drops all cache entries associated with this
* reader {@link IndexReader#getCoreCacheKey}. NOTE: this cache key must
* precisely match the reader that the cache entry is
* keyed on. If you pass a top-level reader, it usually
* will have no effect as Lucene now caches at the segment
* reader level.
*/
public void purgeByCacheKey(Object coreCacheKey);
/**
* If non-null, FieldCacheImpl will warn whenever
* entries are created that are not sane according to
* {@link FieldCacheSanityChecker}.
*/
public void setInfoStream(PrintStream stream);
/** counterpart of {@link #setInfoStream(PrintStream)} */
public PrintStream getInfoStream();
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,425 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.uninverting;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexReaderContext;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.util.MapOfSets;
import org.apache.solr.uninverting.FieldCache.CacheEntry;
/**
* Provides methods for sanity checking that entries in the FieldCache
* are not wasteful or inconsistent.
* </p>
* <p>
* Lucene 2.9 Introduced numerous enhancements into how the FieldCache
* is used by the low levels of Lucene searching (for Sorting and
* ValueSourceQueries) to improve both the speed for Sorting, as well
* as reopening of IndexReaders. But these changes have shifted the
* usage of FieldCache from "top level" IndexReaders (frequently a
* MultiReader or DirectoryReader) down to the leaf level SegmentReaders.
* As a result, existing applications that directly access the FieldCache
* may find RAM usage increase significantly when upgrading to 2.9 or
* Later. This class provides an API for these applications (or their
* Unit tests) to check at run time if the FieldCache contains "insane"
* usages of the FieldCache.
* </p>
* @lucene.experimental
* @see FieldCache
* @see FieldCacheSanityChecker.Insanity
* @see FieldCacheSanityChecker.InsanityType
*/
final class FieldCacheSanityChecker {
public FieldCacheSanityChecker() {
/* NOOP */
}
/**
* Quick and dirty convenience method
* @see #check
*/
public static Insanity[] checkSanity(FieldCache cache) {
return checkSanity(cache.getCacheEntries());
}
/**
* Quick and dirty convenience method that instantiates an instance with
* "good defaults" and uses it to test the CacheEntrys
* @see #check
*/
public static Insanity[] checkSanity(CacheEntry... cacheEntries) {
FieldCacheSanityChecker sanityChecker = new FieldCacheSanityChecker();
return sanityChecker.check(cacheEntries);
}
/**
* Tests a CacheEntry[] for indication of "insane" cache usage.
* <p>
* <B>NOTE:</b>FieldCache CreationPlaceholder objects are ignored.
* (:TODO: is this a bad idea? are we masking a real problem?)
* </p>
*/
public Insanity[] check(CacheEntry... cacheEntries) {
if (null == cacheEntries || 0 == cacheEntries.length)
return new Insanity[0];
// the indirect mapping lets MapOfSet dedup identical valIds for us
//
// maps the (valId) identityhashCode of cache values to
// sets of CacheEntry instances
final MapOfSets<Integer, CacheEntry> valIdToItems = new MapOfSets<>(new HashMap<Integer, Set<CacheEntry>>(17));
// maps ReaderField keys to Sets of ValueIds
final MapOfSets<ReaderField, Integer> readerFieldToValIds = new MapOfSets<>(new HashMap<ReaderField, Set<Integer>>(17));
//
// any keys that we know result in more then one valId
final Set<ReaderField> valMismatchKeys = new HashSet<>();
// iterate over all the cacheEntries to get the mappings we'll need
for (int i = 0; i < cacheEntries.length; i++) {
final CacheEntry item = cacheEntries[i];
final Object val = item.getValue();
// It's OK to have dup entries, where one is eg
// float[] and the other is the Bits (from
// getDocWithField())
if (val instanceof FieldCacheImpl.BitsEntry) {
continue;
}
if (val instanceof FieldCache.CreationPlaceholder)
continue;
final ReaderField rf = new ReaderField(item.getReaderKey(),
item.getFieldName());
final Integer valId = Integer.valueOf(System.identityHashCode(val));
// indirect mapping, so the MapOfSet will dedup identical valIds for us
valIdToItems.put(valId, item);
if (1 < readerFieldToValIds.put(rf, valId)) {
valMismatchKeys.add(rf);
}
}
final List<Insanity> insanity = new ArrayList<>(valMismatchKeys.size() * 3);
insanity.addAll(checkValueMismatch(valIdToItems,
readerFieldToValIds,
valMismatchKeys));
insanity.addAll(checkSubreaders(valIdToItems,
readerFieldToValIds));
return insanity.toArray(new Insanity[insanity.size()]);
}
/**
* Internal helper method used by check that iterates over
* valMismatchKeys and generates a Collection of Insanity
* instances accordingly. The MapOfSets are used to populate
* the Insanity objects.
* @see InsanityType#VALUEMISMATCH
*/
private Collection<Insanity> checkValueMismatch(MapOfSets<Integer, CacheEntry> valIdToItems,
MapOfSets<ReaderField, Integer> readerFieldToValIds,
Set<ReaderField> valMismatchKeys) {
final List<Insanity> insanity = new ArrayList<>(valMismatchKeys.size() * 3);
if (! valMismatchKeys.isEmpty() ) {
// we have multiple values for some ReaderFields
final Map<ReaderField, Set<Integer>> rfMap = readerFieldToValIds.getMap();
final Map<Integer, Set<CacheEntry>> valMap = valIdToItems.getMap();
for (final ReaderField rf : valMismatchKeys) {
final List<CacheEntry> badEntries = new ArrayList<>(valMismatchKeys.size() * 2);
for(final Integer value: rfMap.get(rf)) {
for (final CacheEntry cacheEntry : valMap.get(value)) {
badEntries.add(cacheEntry);
}
}
CacheEntry[] badness = new CacheEntry[badEntries.size()];
badness = badEntries.toArray(badness);
insanity.add(new Insanity(InsanityType.VALUEMISMATCH,
"Multiple distinct value objects for " +
rf.toString(), badness));
}
}
return insanity;
}
/**
* Internal helper method used by check that iterates over
* the keys of readerFieldToValIds and generates a Collection
* of Insanity instances whenever two (or more) ReaderField instances are
* found that have an ancestry relationships.
*
* @see InsanityType#SUBREADER
*/
private Collection<Insanity> checkSubreaders( MapOfSets<Integer, CacheEntry> valIdToItems,
MapOfSets<ReaderField, Integer> readerFieldToValIds) {
final List<Insanity> insanity = new ArrayList<>(23);
Map<ReaderField, Set<ReaderField>> badChildren = new HashMap<>(17);
MapOfSets<ReaderField, ReaderField> badKids = new MapOfSets<>(badChildren); // wrapper
Map<Integer, Set<CacheEntry>> viToItemSets = valIdToItems.getMap();
Map<ReaderField, Set<Integer>> rfToValIdSets = readerFieldToValIds.getMap();
Set<ReaderField> seen = new HashSet<>(17);
Set<ReaderField> readerFields = rfToValIdSets.keySet();
for (final ReaderField rf : readerFields) {
if (seen.contains(rf)) continue;
List<Object> kids = getAllDescendantReaderKeys(rf.readerKey);
for (Object kidKey : kids) {
ReaderField kid = new ReaderField(kidKey, rf.fieldName);
if (badChildren.containsKey(kid)) {
// we've already process this kid as RF and found other problems
// track those problems as our own
badKids.put(rf, kid);
badKids.putAll(rf, badChildren.get(kid));
badChildren.remove(kid);
} else if (rfToValIdSets.containsKey(kid)) {
// we have cache entries for the kid
badKids.put(rf, kid);
}
seen.add(kid);
}
seen.add(rf);
}
// every mapping in badKids represents an Insanity
for (final ReaderField parent : badChildren.keySet()) {
Set<ReaderField> kids = badChildren.get(parent);
List<CacheEntry> badEntries = new ArrayList<>(kids.size() * 2);
// put parent entr(ies) in first
{
for (final Integer value : rfToValIdSets.get(parent)) {
badEntries.addAll(viToItemSets.get(value));
}
}
// now the entries for the descendants
for (final ReaderField kid : kids) {
for (final Integer value : rfToValIdSets.get(kid)) {
badEntries.addAll(viToItemSets.get(value));
}
}
CacheEntry[] badness = new CacheEntry[badEntries.size()];
badness = badEntries.toArray(badness);
insanity.add(new Insanity(InsanityType.SUBREADER,
"Found caches for descendants of " +
parent.toString(),
badness));
}
return insanity;
}
/**
* Checks if the seed is an IndexReader, and if so will walk
* the hierarchy of subReaders building up a list of the objects
* returned by {@code seed.getCoreCacheKey()}
*/
private List<Object> getAllDescendantReaderKeys(Object seed) {
List<Object> all = new ArrayList<>(17); // will grow as we iter
all.add(seed);
for (int i = 0; i < all.size(); i++) {
final Object obj = all.get(i);
// TODO: We don't check closed readers here (as getTopReaderContext
// throws AlreadyClosedException), what should we do? Reflection?
if (obj instanceof IndexReader) {
try {
final List<IndexReaderContext> childs =
((IndexReader) obj).getContext().children();
if (childs != null) { // it is composite reader
for (final IndexReaderContext ctx : childs) {
all.add(ctx.reader().getCoreCacheKey());
}
}
} catch (AlreadyClosedException ace) {
// ignore this reader
}
}
}
// need to skip the first, because it was the seed
return all.subList(1, all.size());
}
/**
* Simple pair object for using "readerKey + fieldName" a Map key
*/
private final static class ReaderField {
public final Object readerKey;
public final String fieldName;
public ReaderField(Object readerKey, String fieldName) {
this.readerKey = readerKey;
this.fieldName = fieldName;
}
@Override
public int hashCode() {
return System.identityHashCode(readerKey) * fieldName.hashCode();
}
@Override
public boolean equals(Object that) {
if (! (that instanceof ReaderField)) return false;
ReaderField other = (ReaderField) that;
return (this.readerKey == other.readerKey &&
this.fieldName.equals(other.fieldName));
}
@Override
public String toString() {
return readerKey.toString() + "+" + fieldName;
}
}
/**
* Simple container for a collection of related CacheEntry objects that
* in conjunction with each other represent some "insane" usage of the
* FieldCache.
*/
public final static class Insanity {
private final InsanityType type;
private final String msg;
private final CacheEntry[] entries;
public Insanity(InsanityType type, String msg, CacheEntry... entries) {
if (null == type) {
throw new IllegalArgumentException
("Insanity requires non-null InsanityType");
}
if (null == entries || 0 == entries.length) {
throw new IllegalArgumentException
("Insanity requires non-null/non-empty CacheEntry[]");
}
this.type = type;
this.msg = msg;
this.entries = entries;
}
/**
* Type of insane behavior this object represents
*/
public InsanityType getType() { return type; }
/**
* Description of hte insane behavior
*/
public String getMsg() { return msg; }
/**
* CacheEntry objects which suggest a problem
*/
public CacheEntry[] getCacheEntries() { return entries; }
/**
* Multi-Line representation of this Insanity object, starting with
* the Type and Msg, followed by each CacheEntry.toString() on its
* own line prefaced by a tab character
*/
@Override
public String toString() {
StringBuilder buf = new StringBuilder();
buf.append(getType()).append(": ");
String m = getMsg();
if (null != m) buf.append(m);
buf.append('\n');
CacheEntry[] ce = getCacheEntries();
for (int i = 0; i < ce.length; i++) {
buf.append('\t').append(ce[i].toString()).append('\n');
}
return buf.toString();
}
}
/**
* An Enumeration of the different types of "insane" behavior that
* may be detected in a FieldCache.
*
* @see InsanityType#SUBREADER
* @see InsanityType#VALUEMISMATCH
* @see InsanityType#EXPECTED
*/
public final static class InsanityType {
private final String label;
private InsanityType(final String label) {
this.label = label;
}
@Override
public String toString() { return label; }
/**
* Indicates an overlap in cache usage on a given field
* in sub/super readers.
*/
public final static InsanityType SUBREADER
= new InsanityType("SUBREADER");
/**
* <p>
* Indicates entries have the same reader+fieldname but
* different cached values. This can happen if different datatypes,
* or parsers are used -- and while it's not necessarily a bug
* it's typically an indication of a possible problem.
* </p>
* <p>
* <b>NOTE:</b> Only the reader, fieldname, and cached value are actually
* tested -- if two cache entries have different parsers or datatypes but
* the cached values are the same Object (== not just equal()) this method
* does not consider that a red flag. This allows for subtle variations
* in the way a Parser is specified (null vs DEFAULT_LONG_PARSER, etc...)
* </p>
*/
public final static InsanityType VALUEMISMATCH
= new InsanityType("VALUEMISMATCH");
/**
* Indicates an expected bit of "insanity". This may be useful for
* clients that wish to preserve/log information about insane usage
* but indicate that it was expected.
*/
public final static InsanityType EXPECTED
= new InsanityType("EXPECTED");
}
}

View File

@ -0,0 +1,391 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.uninverting;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Map;
import org.apache.lucene.document.BinaryDocValuesField; // javadocs
import org.apache.lucene.document.NumericDocValuesField; // javadocs
import org.apache.lucene.document.SortedDocValuesField; // javadocs
import org.apache.lucene.document.SortedSetDocValuesField; // javadocs
import org.apache.lucene.document.StringField; // javadocs
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.FilterDirectoryReader;
import org.apache.lucene.index.FilterLeafReader;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.util.Bits;
import org.apache.solr.uninverting.FieldCache.CacheEntry;
/**
* A FilterReader that exposes <i>indexed</i> values as if they also had
* docvalues.
* <p>
* This is accomplished by "inverting the inverted index" or "uninversion".
* <p>
* The uninversion process happens lazily: upon the first request for the
* field's docvalues (e.g. via {@link org.apache.lucene.index.LeafReader#getNumericDocValues(String)}
* or similar), it will create the docvalues on-the-fly if needed and cache it,
* based on the core cache key of the wrapped LeafReader.
*/
public class UninvertingReader extends FilterLeafReader {
/**
* Specifies the type of uninversion to apply for the field.
*/
public static enum Type {
/**
* Single-valued Integer, (e.g. indexed with {@link org.apache.lucene.document.IntPoint})
* <p>
* Fields with this type act as if they were indexed with
* {@link NumericDocValuesField}.
*/
INTEGER_POINT,
/**
* Single-valued Integer, (e.g. indexed with {@link org.apache.lucene.document.LongPoint})
* <p>
* Fields with this type act as if they were indexed with
* {@link NumericDocValuesField}.
*/
LONG_POINT,
/**
* Single-valued Integer, (e.g. indexed with {@link org.apache.lucene.document.FloatPoint})
* <p>
* Fields with this type act as if they were indexed with
* {@link NumericDocValuesField}.
*/
FLOAT_POINT,
/**
* Single-valued Integer, (e.g. indexed with {@link org.apache.lucene.document.DoublePoint})
* <p>
* Fields with this type act as if they were indexed with
* {@link NumericDocValuesField}.
*/
DOUBLE_POINT,
/**
* Single-valued Integer, (e.g. indexed with {@link org.apache.lucene.document.LegacyIntField})
* <p>
* Fields with this type act as if they were indexed with
* {@link NumericDocValuesField}.
* @deprecated Index with points and use {@link #INTEGER_POINT} instead.
*/
@Deprecated
LEGACY_INTEGER,
/**
* Single-valued Long, (e.g. indexed with {@link org.apache.lucene.document.LegacyLongField})
* <p>
* Fields with this type act as if they were indexed with
* {@link NumericDocValuesField}.
* @deprecated Index with points and use {@link #LONG_POINT} instead.
*/
@Deprecated
LEGACY_LONG,
/**
* Single-valued Float, (e.g. indexed with {@link org.apache.lucene.document.LegacyFloatField})
* <p>
* Fields with this type act as if they were indexed with
* {@link NumericDocValuesField}.
* @deprecated Index with points and use {@link #FLOAT_POINT} instead.
*/
@Deprecated
LEGACY_FLOAT,
/**
* Single-valued Double, (e.g. indexed with {@link org.apache.lucene.document.LegacyDoubleField})
* <p>
* Fields with this type act as if they were indexed with
* {@link NumericDocValuesField}.
* @deprecated Index with points and use {@link #DOUBLE_POINT} instead.
*/
@Deprecated
LEGACY_DOUBLE,
/**
* Single-valued Binary, (e.g. indexed with {@link StringField})
* <p>
* Fields with this type act as if they were indexed with
* {@link BinaryDocValuesField}.
*/
BINARY,
/**
* Single-valued Binary, (e.g. indexed with {@link StringField})
* <p>
* Fields with this type act as if they were indexed with
* {@link SortedDocValuesField}.
*/
SORTED,
/**
* Multi-valued Binary, (e.g. indexed with {@link StringField})
* <p>
* Fields with this type act as if they were indexed with
* {@link SortedSetDocValuesField}.
*/
SORTED_SET_BINARY,
/**
* Multi-valued Integer, (e.g. indexed with {@link org.apache.lucene.document.LegacyIntField})
* <p>
* Fields with this type act as if they were indexed with
* {@link SortedSetDocValuesField}.
*/
SORTED_SET_INTEGER,
/**
* Multi-valued Float, (e.g. indexed with {@link org.apache.lucene.document.LegacyFloatField})
* <p>
* Fields with this type act as if they were indexed with
* {@link SortedSetDocValuesField}.
*/
SORTED_SET_FLOAT,
/**
* Multi-valued Long, (e.g. indexed with {@link org.apache.lucene.document.LegacyLongField})
* <p>
* Fields with this type act as if they were indexed with
* {@link SortedSetDocValuesField}.
*/
SORTED_SET_LONG,
/**
* Multi-valued Double, (e.g. indexed with {@link org.apache.lucene.document.LegacyDoubleField})
* <p>
* Fields with this type act as if they were indexed with
* {@link SortedSetDocValuesField}.
*/
SORTED_SET_DOUBLE
}
/**
* Wraps a provided DirectoryReader. Note that for convenience, the returned reader
* can be used normally (e.g. passed to {@link DirectoryReader#openIfChanged(DirectoryReader)})
* and so on.
*/
public static DirectoryReader wrap(DirectoryReader in, final Map<String,Type> mapping) throws IOException {
return new UninvertingDirectoryReader(in, mapping);
}
static class UninvertingDirectoryReader extends FilterDirectoryReader {
final Map<String,Type> mapping;
public UninvertingDirectoryReader(DirectoryReader in, final Map<String,Type> mapping) throws IOException {
super(in, new FilterDirectoryReader.SubReaderWrapper() {
@Override
public LeafReader wrap(LeafReader reader) {
return new UninvertingReader(reader, mapping);
}
});
this.mapping = mapping;
}
@Override
protected DirectoryReader doWrapDirectoryReader(DirectoryReader in) throws IOException {
return new UninvertingDirectoryReader(in, mapping);
}
}
final Map<String,Type> mapping;
final FieldInfos fieldInfos;
/**
* Create a new UninvertingReader with the specified mapping
* <p>
* Expert: This should almost never be used. Use {@link #wrap(DirectoryReader, Map)}
* instead.
*
* @lucene.internal
*/
public UninvertingReader(LeafReader in, Map<String,Type> mapping) {
super(in);
this.mapping = mapping;
ArrayList<FieldInfo> filteredInfos = new ArrayList<>();
for (FieldInfo fi : in.getFieldInfos()) {
DocValuesType type = fi.getDocValuesType();
if (type == DocValuesType.NONE) {
Type t = mapping.get(fi.name);
if (t != null) {
if (t == Type.INTEGER_POINT || t == Type.LONG_POINT || t == Type.FLOAT_POINT || t == Type.DOUBLE_POINT) {
// type uses points
if (fi.getPointDimensionCount() == 0) {
continue;
}
} else {
// type uses inverted index
if (fi.getIndexOptions() == IndexOptions.NONE) {
continue;
}
}
switch(t) {
case INTEGER_POINT:
case LONG_POINT:
case FLOAT_POINT:
case DOUBLE_POINT:
case LEGACY_INTEGER:
case LEGACY_LONG:
case LEGACY_FLOAT:
case LEGACY_DOUBLE:
type = DocValuesType.NUMERIC;
break;
case BINARY:
type = DocValuesType.BINARY;
break;
case SORTED:
type = DocValuesType.SORTED;
break;
case SORTED_SET_BINARY:
case SORTED_SET_INTEGER:
case SORTED_SET_FLOAT:
case SORTED_SET_LONG:
case SORTED_SET_DOUBLE:
type = DocValuesType.SORTED_SET;
break;
default:
throw new AssertionError();
}
}
}
filteredInfos.add(new FieldInfo(fi.name, fi.number, fi.hasVectors(), fi.omitsNorms(),
fi.hasPayloads(), fi.getIndexOptions(), type, fi.getDocValuesGen(), fi.attributes(),
fi.getPointDimensionCount(), fi.getPointNumBytes()));
}
fieldInfos = new FieldInfos(filteredInfos.toArray(new FieldInfo[filteredInfos.size()]));
}
@Override
public FieldInfos getFieldInfos() {
return fieldInfos;
}
@Override
public NumericDocValues getNumericDocValues(String field) throws IOException {
Type v = getType(field);
if (v != null) {
switch (v) {
case INTEGER_POINT: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.INT_POINT_PARSER, true);
case FLOAT_POINT: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.FLOAT_POINT_PARSER, true);
case LONG_POINT: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.LONG_POINT_PARSER, true);
case DOUBLE_POINT: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.DOUBLE_POINT_PARSER, true);
case LEGACY_INTEGER: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.LEGACY_INT_PARSER, true);
case LEGACY_FLOAT: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.LEGACY_FLOAT_PARSER, true);
case LEGACY_LONG: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.LEGACY_LONG_PARSER, true);
case LEGACY_DOUBLE: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.LEGACY_DOUBLE_PARSER, true);
}
}
return super.getNumericDocValues(field);
}
@Override
public BinaryDocValues getBinaryDocValues(String field) throws IOException {
Type v = getType(field);
if (v == Type.BINARY) {
return FieldCache.DEFAULT.getTerms(in, field, true);
} else {
return in.getBinaryDocValues(field);
}
}
@Override
public SortedDocValues getSortedDocValues(String field) throws IOException {
Type v = getType(field);
if (v == Type.SORTED) {
return FieldCache.DEFAULT.getTermsIndex(in, field);
} else {
return in.getSortedDocValues(field);
}
}
@Override
public SortedSetDocValues getSortedSetDocValues(String field) throws IOException {
Type v = getType(field);
if (v != null) {
switch (v) {
case SORTED_SET_INTEGER:
case SORTED_SET_FLOAT:
return FieldCache.DEFAULT.getDocTermOrds(in, field, FieldCache.INT32_TERM_PREFIX);
case SORTED_SET_LONG:
case SORTED_SET_DOUBLE:
return FieldCache.DEFAULT.getDocTermOrds(in, field, FieldCache.INT64_TERM_PREFIX);
case SORTED_SET_BINARY:
return FieldCache.DEFAULT.getDocTermOrds(in, field, null);
}
}
return in.getSortedSetDocValues(field);
}
@Override
public Bits getDocsWithField(String field) throws IOException {
Type v = getType(field);
if (v != null) {
switch (v) {
case INTEGER_POINT: return FieldCache.DEFAULT.getDocsWithField(in, field, FieldCache.INT_POINT_PARSER);
case FLOAT_POINT: return FieldCache.DEFAULT.getDocsWithField(in, field, FieldCache.FLOAT_POINT_PARSER);
case LONG_POINT: return FieldCache.DEFAULT.getDocsWithField(in, field, FieldCache.LONG_POINT_PARSER);
case DOUBLE_POINT: return FieldCache.DEFAULT.getDocsWithField(in, field, FieldCache.DOUBLE_POINT_PARSER);
case LEGACY_INTEGER: return FieldCache.DEFAULT.getDocsWithField(in, field, FieldCache.LEGACY_INT_PARSER);
case LEGACY_FLOAT: return FieldCache.DEFAULT.getDocsWithField(in, field, FieldCache.LEGACY_FLOAT_PARSER);
case LEGACY_LONG: return FieldCache.DEFAULT.getDocsWithField(in, field, FieldCache.LEGACY_LONG_PARSER);
case LEGACY_DOUBLE: return FieldCache.DEFAULT.getDocsWithField(in, field, FieldCache.LEGACY_DOUBLE_PARSER);
default:
return FieldCache.DEFAULT.getDocsWithField(in, field, null);
}
} else {
return in.getDocsWithField(field);
}
}
/**
* Returns the field's uninversion type, or null
* if the field doesn't exist or doesn't have a mapping.
*/
private Type getType(String field) {
FieldInfo info = fieldInfos.fieldInfo(field);
if (info == null || info.getDocValuesType() == DocValuesType.NONE) {
return null;
}
return mapping.get(field);
}
@Override
public Object getCoreCacheKey() {
return in.getCoreCacheKey();
}
@Override
public Object getCombinedCoreAndDeletesKey() {
return in.getCombinedCoreAndDeletesKey();
}
@Override
public String toString() {
return "Uninverting(" + in.toString() + ")";
}
/**
* Return information about the backing cache
* @lucene.internal
*/
public static String[] getUninvertedStats() {
CacheEntry[] entries = FieldCache.DEFAULT.getCacheEntries();
String[] info = new String[entries.length];
for (int i = 0; i < entries.length; i++) {
info[i] = entries[i].toString();
}
return info;
}
}

View File

@ -0,0 +1,21 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Support for creating docvalues on-the-fly from the inverted index at runtime.
*/
package org.apache.solr.uninverting;

View File

@ -29,8 +29,9 @@ import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer; import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Weight; import org.apache.lucene.search.Weight;
import org.apache.lucene.uninverting.UninvertingReader;
import org.apache.solr.schema.IndexSchema; import org.apache.solr.schema.IndexSchema;
import org.apache.solr.uninverting.UninvertingReader;
import org.apache.solr.uninverting.UninvertingReader;
/** /**
* Allows access to uninverted docvalues by delete-by-queries. * Allows access to uninverted docvalues by delete-by-queries.

View File

@ -24,7 +24,6 @@ import java.util.concurrent.locks.ReentrantReadWriteLock;
import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.SlowCompositeReaderWrapper;
import org.apache.lucene.index.Terms; import org.apache.lucene.index.Terms;
import org.apache.lucene.queries.function.FunctionValues; import org.apache.lucene.queries.function.FunctionValues;
import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.ValueSource;
@ -34,6 +33,7 @@ import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LegacyNumericUtils; import org.apache.lucene.util.LegacyNumericUtils;
import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException;
import org.apache.solr.common.util.SuppressForbidden; import org.apache.solr.common.util.SuppressForbidden;
import org.apache.solr.index.SlowCompositeReaderWrapper;
import org.apache.solr.schema.IndexSchema; import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SchemaField; import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.SolrIndexSearcher; import org.apache.solr.search.SolrIndexSearcher;

View File

@ -0,0 +1,95 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.index;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
public class TestSlowCompositeReaderWrapper extends LuceneTestCase {
public void testCoreListenerOnSlowCompositeReaderWrapper() throws IOException {
RandomIndexWriter w = new RandomIndexWriter(random(), newDirectory());
final int numDocs = TestUtil.nextInt(random(), 1, 5);
for (int i = 0; i < numDocs; ++i) {
w.addDocument(new Document());
if (random().nextBoolean()) {
w.commit();
}
}
w.commit();
w.close();
final IndexReader reader = DirectoryReader.open(w.w.getDirectory());
final LeafReader leafReader = SlowCompositeReaderWrapper.wrap(reader);
final int numListeners = TestUtil.nextInt(random(), 1, 10);
final List<LeafReader.CoreClosedListener> listeners = new ArrayList<>();
AtomicInteger counter = new AtomicInteger(numListeners);
for (int i = 0; i < numListeners; ++i) {
CountCoreListener listener = new CountCoreListener(counter, leafReader.getCoreCacheKey());
listeners.add(listener);
leafReader.addCoreClosedListener(listener);
}
for (int i = 0; i < 100; ++i) {
leafReader.addCoreClosedListener(listeners.get(random().nextInt(listeners.size())));
}
final int removed = random().nextInt(numListeners);
Collections.shuffle(listeners, random());
for (int i = 0; i < removed; ++i) {
leafReader.removeCoreClosedListener(listeners.get(i));
}
assertEquals(numListeners, counter.get());
// make sure listeners are registered on the wrapped reader and that closing any of them has the same effect
if (random().nextBoolean()) {
reader.close();
} else {
leafReader.close();
}
assertEquals(removed, counter.get());
w.w.getDirectory().close();
}
private static final class CountCoreListener implements LeafReader.CoreClosedListener {
private final AtomicInteger count;
private final Object coreCacheKey;
public CountCoreListener(AtomicInteger count, Object coreCacheKey) {
this.count = count;
this.coreCacheKey = coreCacheKey;
}
@Override
public void onClose(Object coreCacheKey) {
assertSame(this.coreCacheKey, coreCacheKey);
count.decrementAndGet();
}
}
}

View File

@ -25,12 +25,12 @@ import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.uninverting.DocTermOrds;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.SolrException.ErrorCode; import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.params.FacetParams; import org.apache.solr.common.params.FacetParams;
import org.apache.solr.search.SolrIndexSearcher; import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.uninverting.DocTermOrds;
import org.apache.solr.util.RefCounted; import org.apache.solr.util.RefCounted;
import org.junit.After; import org.junit.After;
import org.junit.BeforeClass; import org.junit.BeforeClass;

View File

@ -42,13 +42,12 @@ import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.LeafCollector; import org.apache.lucene.search.LeafCollector;
import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort; import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.SortField.Type; import org.apache.lucene.search.SortField.Type;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopFieldCollector; import org.apache.lucene.search.TopFieldCollector;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.uninverting.UninvertingReader;
import org.apache.lucene.util.BitDocIdSet; import org.apache.lucene.util.BitDocIdSet;
import org.apache.lucene.util.Bits; import org.apache.lucene.util.Bits;
import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.FixedBitSet;
@ -56,6 +55,7 @@ import org.apache.lucene.util.TestUtil;
import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.schema.SchemaField; import org.apache.solr.schema.SchemaField;
import org.apache.solr.uninverting.UninvertingReader;
import org.junit.BeforeClass; import org.junit.BeforeClass;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;

View File

@ -0,0 +1,681 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.uninverting;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.LegacyIntField;
import org.apache.lucene.document.LegacyLongField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum.SeekStatus;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LegacyNumericUtils;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.TestUtil;
import org.apache.solr.index.SlowCompositeReaderWrapper;
// TODO:
// - test w/ del docs
// - test prefix
// - test w/ cutoff
// - crank docs way up so we get some merging sometimes
public class TestDocTermOrds extends LuceneTestCase {
public void testEmptyIndex() throws IOException {
final Directory dir = newDirectory();
final IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
iw.close();
final DirectoryReader ir = DirectoryReader.open(dir);
TestUtil.checkReader(ir);
final LeafReader composite = SlowCompositeReaderWrapper.wrap(ir);
TestUtil.checkReader(composite);
// check the leaves
// (normally there are none for an empty index, so this is really just future
// proofing in case that changes for some reason)
for (LeafReaderContext rc : ir.leaves()) {
final LeafReader r = rc.reader();
final DocTermOrds dto = new DocTermOrds(r, r.getLiveDocs(), "any_field");
assertNull("OrdTermsEnum should be null (leaf)", dto.getOrdTermsEnum(r));
assertEquals("iterator should be empty (leaf)", 0, dto.iterator(r).getValueCount());
}
// check the composite
final DocTermOrds dto = new DocTermOrds(composite, composite.getLiveDocs(), "any_field");
assertNull("OrdTermsEnum should be null (composite)", dto.getOrdTermsEnum(composite));
assertEquals("iterator should be empty (composite)", 0, dto.iterator(composite).getValueCount());
ir.close();
dir.close();
}
public void testSimple() throws Exception {
Directory dir = newDirectory();
final RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy()));
Document doc = new Document();
Field field = newTextField("field", "", Field.Store.NO);
doc.add(field);
field.setStringValue("a b c");
w.addDocument(doc);
field.setStringValue("d e f");
w.addDocument(doc);
field.setStringValue("a f");
w.addDocument(doc);
final IndexReader r = w.getReader();
w.close();
final LeafReader ar = SlowCompositeReaderWrapper.wrap(r);
TestUtil.checkReader(ar);
final DocTermOrds dto = new DocTermOrds(ar, ar.getLiveDocs(), "field");
SortedSetDocValues iter = dto.iterator(ar);
iter.setDocument(0);
assertEquals(0, iter.nextOrd());
assertEquals(1, iter.nextOrd());
assertEquals(2, iter.nextOrd());
assertEquals(SortedSetDocValues.NO_MORE_ORDS, iter.nextOrd());
iter.setDocument(1);
assertEquals(3, iter.nextOrd());
assertEquals(4, iter.nextOrd());
assertEquals(5, iter.nextOrd());
assertEquals(SortedSetDocValues.NO_MORE_ORDS, iter.nextOrd());
iter.setDocument(2);
assertEquals(0, iter.nextOrd());
assertEquals(5, iter.nextOrd());
assertEquals(SortedSetDocValues.NO_MORE_ORDS, iter.nextOrd());
r.close();
dir.close();
}
public void testRandom() throws Exception {
Directory dir = newDirectory();
final int NUM_TERMS = atLeast(20);
final Set<BytesRef> terms = new HashSet<>();
while(terms.size() < NUM_TERMS) {
final String s = TestUtil.randomRealisticUnicodeString(random());
//final String s = _TestUtil.randomSimpleString(random);
if (s.length() > 0) {
terms.add(new BytesRef(s));
}
}
final BytesRef[] termsArray = terms.toArray(new BytesRef[terms.size()]);
Arrays.sort(termsArray);
final int NUM_DOCS = atLeast(100);
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
// Sometimes swap in codec that impls ord():
if (random().nextInt(10) == 7) {
// Make sure terms index has ords:
Codec codec = TestUtil.alwaysPostingsFormat(TestUtil.getPostingsFormatWithOrds(random()));
conf.setCodec(codec);
}
final RandomIndexWriter w = new RandomIndexWriter(random(), dir, conf);
final int[][] idToOrds = new int[NUM_DOCS][];
final Set<Integer> ordsForDocSet = new HashSet<>();
for(int id=0;id<NUM_DOCS;id++) {
Document doc = new Document();
doc.add(new LegacyIntField("id", id, Field.Store.YES));
final int termCount = TestUtil.nextInt(random(), 0, 20 * RANDOM_MULTIPLIER);
while(ordsForDocSet.size() < termCount) {
ordsForDocSet.add(random().nextInt(termsArray.length));
}
final int[] ordsForDoc = new int[termCount];
int upto = 0;
if (VERBOSE) {
System.out.println("TEST: doc id=" + id);
}
for(int ord : ordsForDocSet) {
ordsForDoc[upto++] = ord;
Field field = newStringField("field", termsArray[ord].utf8ToString(), Field.Store.NO);
if (VERBOSE) {
System.out.println(" f=" + termsArray[ord].utf8ToString());
}
doc.add(field);
}
ordsForDocSet.clear();
Arrays.sort(ordsForDoc);
idToOrds[id] = ordsForDoc;
w.addDocument(doc);
}
final DirectoryReader r = w.getReader();
w.close();
if (VERBOSE) {
System.out.println("TEST: reader=" + r);
}
for(LeafReaderContext ctx : r.leaves()) {
if (VERBOSE) {
System.out.println("\nTEST: sub=" + ctx.reader());
}
verify(ctx.reader(), idToOrds, termsArray, null);
}
// Also test top-level reader: its enum does not support
// ord, so this forces the OrdWrapper to run:
if (VERBOSE) {
System.out.println("TEST: top reader");
}
LeafReader slowR = SlowCompositeReaderWrapper.wrap(r);
TestUtil.checkReader(slowR);
verify(slowR, idToOrds, termsArray, null);
FieldCache.DEFAULT.purgeByCacheKey(slowR.getCoreCacheKey());
r.close();
dir.close();
}
public void testRandomWithPrefix() throws Exception {
Directory dir = newDirectory();
final Set<String> prefixes = new HashSet<>();
final int numPrefix = TestUtil.nextInt(random(), 2, 7);
if (VERBOSE) {
System.out.println("TEST: use " + numPrefix + " prefixes");
}
while(prefixes.size() < numPrefix) {
prefixes.add(TestUtil.randomRealisticUnicodeString(random()));
//prefixes.add(_TestUtil.randomSimpleString(random));
}
final String[] prefixesArray = prefixes.toArray(new String[prefixes.size()]);
final int NUM_TERMS = atLeast(20);
final Set<BytesRef> terms = new HashSet<>();
while(terms.size() < NUM_TERMS) {
final String s = prefixesArray[random().nextInt(prefixesArray.length)] + TestUtil.randomRealisticUnicodeString(random());
//final String s = prefixesArray[random.nextInt(prefixesArray.length)] + _TestUtil.randomSimpleString(random);
if (s.length() > 0) {
terms.add(new BytesRef(s));
}
}
final BytesRef[] termsArray = terms.toArray(new BytesRef[terms.size()]);
Arrays.sort(termsArray);
final int NUM_DOCS = atLeast(100);
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
// Sometimes swap in codec that impls ord():
if (random().nextInt(10) == 7) {
Codec codec = TestUtil.alwaysPostingsFormat(TestUtil.getPostingsFormatWithOrds(random()));
conf.setCodec(codec);
}
final RandomIndexWriter w = new RandomIndexWriter(random(), dir, conf);
final int[][] idToOrds = new int[NUM_DOCS][];
final Set<Integer> ordsForDocSet = new HashSet<>();
for(int id=0;id<NUM_DOCS;id++) {
Document doc = new Document();
doc.add(new LegacyIntField("id", id, Field.Store.YES));
final int termCount = TestUtil.nextInt(random(), 0, 20 * RANDOM_MULTIPLIER);
while(ordsForDocSet.size() < termCount) {
ordsForDocSet.add(random().nextInt(termsArray.length));
}
final int[] ordsForDoc = new int[termCount];
int upto = 0;
if (VERBOSE) {
System.out.println("TEST: doc id=" + id);
}
for(int ord : ordsForDocSet) {
ordsForDoc[upto++] = ord;
Field field = newStringField("field", termsArray[ord].utf8ToString(), Field.Store.NO);
if (VERBOSE) {
System.out.println(" f=" + termsArray[ord].utf8ToString());
}
doc.add(field);
}
ordsForDocSet.clear();
Arrays.sort(ordsForDoc);
idToOrds[id] = ordsForDoc;
w.addDocument(doc);
}
final DirectoryReader r = w.getReader();
w.close();
if (VERBOSE) {
System.out.println("TEST: reader=" + r);
}
LeafReader slowR = SlowCompositeReaderWrapper.wrap(r);
TestUtil.checkReader(slowR);
for(String prefix : prefixesArray) {
final BytesRef prefixRef = prefix == null ? null : new BytesRef(prefix);
final int[][] idToOrdsPrefix = new int[NUM_DOCS][];
for(int id=0;id<NUM_DOCS;id++) {
final int[] docOrds = idToOrds[id];
final List<Integer> newOrds = new ArrayList<>();
for(int ord : idToOrds[id]) {
if (StringHelper.startsWith(termsArray[ord], prefixRef)) {
newOrds.add(ord);
}
}
final int[] newOrdsArray = new int[newOrds.size()];
int upto = 0;
for(int ord : newOrds) {
newOrdsArray[upto++] = ord;
}
idToOrdsPrefix[id] = newOrdsArray;
}
for(LeafReaderContext ctx : r.leaves()) {
if (VERBOSE) {
System.out.println("\nTEST: sub=" + ctx.reader());
}
verify(ctx.reader(), idToOrdsPrefix, termsArray, prefixRef);
}
// Also test top-level reader: its enum does not support
// ord, so this forces the OrdWrapper to run:
if (VERBOSE) {
System.out.println("TEST: top reader");
}
verify(slowR, idToOrdsPrefix, termsArray, prefixRef);
}
FieldCache.DEFAULT.purgeByCacheKey(slowR.getCoreCacheKey());
r.close();
dir.close();
}
private void verify(LeafReader r, int[][] idToOrds, BytesRef[] termsArray, BytesRef prefixRef) throws Exception {
final DocTermOrds dto = new DocTermOrds(r, r.getLiveDocs(),
"field",
prefixRef,
Integer.MAX_VALUE,
TestUtil.nextInt(random(), 2, 10));
final NumericDocValues docIDToID = FieldCache.DEFAULT.getNumerics(r, "id", FieldCache.LEGACY_INT_PARSER, false);
/*
for(int docID=0;docID<subR.maxDoc();docID++) {
System.out.println(" docID=" + docID + " id=" + docIDToID[docID]);
}
*/
if (VERBOSE) {
System.out.println("TEST: verify prefix=" + (prefixRef==null ? "null" : prefixRef.utf8ToString()));
System.out.println("TEST: all TERMS:");
TermsEnum allTE = MultiFields.getTerms(r, "field").iterator();
int ord = 0;
while(allTE.next() != null) {
System.out.println(" ord=" + (ord++) + " term=" + allTE.term().utf8ToString());
}
}
//final TermsEnum te = subR.fields().terms("field").iterator();
final TermsEnum te = dto.getOrdTermsEnum(r);
if (dto.numTerms() == 0) {
if (prefixRef == null) {
assertNull(MultiFields.getTerms(r, "field"));
} else {
Terms terms = MultiFields.getTerms(r, "field");
if (terms != null) {
TermsEnum termsEnum = terms.iterator();
TermsEnum.SeekStatus result = termsEnum.seekCeil(prefixRef);
if (result != TermsEnum.SeekStatus.END) {
assertFalse("term=" + termsEnum.term().utf8ToString() + " matches prefix=" + prefixRef.utf8ToString(), StringHelper.startsWith(termsEnum.term(), prefixRef));
} else {
// ok
}
} else {
// ok
}
}
return;
}
if (VERBOSE) {
System.out.println("TEST: TERMS:");
te.seekExact(0);
while(true) {
System.out.println(" ord=" + te.ord() + " term=" + te.term().utf8ToString());
if (te.next() == null) {
break;
}
}
}
SortedSetDocValues iter = dto.iterator(r);
for(int docID=0;docID<r.maxDoc();docID++) {
if (VERBOSE) {
System.out.println("TEST: docID=" + docID + " of " + r.maxDoc() + " (id=" + docIDToID.get(docID) + ")");
}
iter.setDocument(docID);
final int[] answers = idToOrds[(int) docIDToID.get(docID)];
int upto = 0;
long ord;
while ((ord = iter.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
te.seekExact(ord);
final BytesRef expected = termsArray[answers[upto++]];
if (VERBOSE) {
System.out.println(" exp=" + expected.utf8ToString() + " actual=" + te.term().utf8ToString());
}
assertEquals("expected=" + expected.utf8ToString() + " actual=" + te.term().utf8ToString() + " ord=" + ord, expected, te.term());
}
assertEquals(answers.length, upto);
}
}
public void testBackToTheFuture() throws Exception {
Directory dir = newDirectory();
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
Document doc = new Document();
doc.add(newStringField("foo", "bar", Field.Store.NO));
iw.addDocument(doc);
doc = new Document();
doc.add(newStringField("foo", "baz", Field.Store.NO));
// we need a second value for a doc, or we don't actually test DocTermOrds!
doc.add(newStringField("foo", "car", Field.Store.NO));
iw.addDocument(doc);
DirectoryReader r1 = DirectoryReader.open(iw);
iw.deleteDocuments(new Term("foo", "baz"));
DirectoryReader r2 = DirectoryReader.open(iw);
FieldCache.DEFAULT.getDocTermOrds(getOnlyLeafReader(r2), "foo", null);
SortedSetDocValues v = FieldCache.DEFAULT.getDocTermOrds(getOnlyLeafReader(r1), "foo", null);
assertEquals(3, v.getValueCount());
v.setDocument(1);
assertEquals(1, v.nextOrd());
iw.close();
r1.close();
r2.close();
dir.close();
}
public void testNumericEncoded32() throws IOException {
Directory dir = newDirectory();
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
Document doc = new Document();
doc.add(new LegacyIntField("foo", 5, Field.Store.NO));
iw.addDocument(doc);
doc = new Document();
doc.add(new LegacyIntField("foo", 5, Field.Store.NO));
doc.add(new LegacyIntField("foo", -3, Field.Store.NO));
iw.addDocument(doc);
iw.forceMerge(1);
iw.close();
DirectoryReader ir = DirectoryReader.open(dir);
LeafReader ar = getOnlyLeafReader(ir);
SortedSetDocValues v = FieldCache.DEFAULT.getDocTermOrds(ar, "foo", FieldCache.INT32_TERM_PREFIX);
assertEquals(2, v.getValueCount());
v.setDocument(0);
assertEquals(1, v.nextOrd());
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
v.setDocument(1);
assertEquals(0, v.nextOrd());
assertEquals(1, v.nextOrd());
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
BytesRef value = v.lookupOrd(0);
assertEquals(-3, LegacyNumericUtils.prefixCodedToInt(value));
value = v.lookupOrd(1);
assertEquals(5, LegacyNumericUtils.prefixCodedToInt(value));
ir.close();
dir.close();
}
public void testNumericEncoded64() throws IOException {
Directory dir = newDirectory();
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
Document doc = new Document();
doc.add(new LegacyLongField("foo", 5, Field.Store.NO));
iw.addDocument(doc);
doc = new Document();
doc.add(new LegacyLongField("foo", 5, Field.Store.NO));
doc.add(new LegacyLongField("foo", -3, Field.Store.NO));
iw.addDocument(doc);
iw.forceMerge(1);
iw.close();
DirectoryReader ir = DirectoryReader.open(dir);
LeafReader ar = getOnlyLeafReader(ir);
SortedSetDocValues v = FieldCache.DEFAULT.getDocTermOrds(ar, "foo", FieldCache.INT64_TERM_PREFIX);
assertEquals(2, v.getValueCount());
v.setDocument(0);
assertEquals(1, v.nextOrd());
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
v.setDocument(1);
assertEquals(0, v.nextOrd());
assertEquals(1, v.nextOrd());
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
BytesRef value = v.lookupOrd(0);
assertEquals(-3, LegacyNumericUtils.prefixCodedToLong(value));
value = v.lookupOrd(1);
assertEquals(5, LegacyNumericUtils.prefixCodedToLong(value));
ir.close();
dir.close();
}
public void testSortedTermsEnum() throws IOException {
Directory directory = newDirectory();
Analyzer analyzer = new MockAnalyzer(random());
IndexWriterConfig iwconfig = newIndexWriterConfig(analyzer);
iwconfig.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
Document doc = new Document();
doc.add(new StringField("field", "hello", Field.Store.NO));
iwriter.addDocument(doc);
doc = new Document();
doc.add(new StringField("field", "world", Field.Store.NO));
// we need a second value for a doc, or we don't actually test DocTermOrds!
doc.add(new StringField("field", "hello", Field.Store.NO));
iwriter.addDocument(doc);
doc = new Document();
doc.add(new StringField("field", "beer", Field.Store.NO));
iwriter.addDocument(doc);
iwriter.forceMerge(1);
DirectoryReader ireader = iwriter.getReader();
iwriter.close();
LeafReader ar = getOnlyLeafReader(ireader);
SortedSetDocValues dv = FieldCache.DEFAULT.getDocTermOrds(ar, "field", null);
assertEquals(3, dv.getValueCount());
TermsEnum termsEnum = dv.termsEnum();
// next()
assertEquals("beer", termsEnum.next().utf8ToString());
assertEquals(0, termsEnum.ord());
assertEquals("hello", termsEnum.next().utf8ToString());
assertEquals(1, termsEnum.ord());
assertEquals("world", termsEnum.next().utf8ToString());
assertEquals(2, termsEnum.ord());
// seekCeil()
assertEquals(SeekStatus.NOT_FOUND, termsEnum.seekCeil(new BytesRef("ha!")));
assertEquals("hello", termsEnum.term().utf8ToString());
assertEquals(1, termsEnum.ord());
assertEquals(SeekStatus.FOUND, termsEnum.seekCeil(new BytesRef("beer")));
assertEquals("beer", termsEnum.term().utf8ToString());
assertEquals(0, termsEnum.ord());
assertEquals(SeekStatus.END, termsEnum.seekCeil(new BytesRef("zzz")));
// seekExact()
assertTrue(termsEnum.seekExact(new BytesRef("beer")));
assertEquals("beer", termsEnum.term().utf8ToString());
assertEquals(0, termsEnum.ord());
assertTrue(termsEnum.seekExact(new BytesRef("hello")));
assertEquals("hello", termsEnum.term().utf8ToString());
assertEquals(1, termsEnum.ord());
assertTrue(termsEnum.seekExact(new BytesRef("world")));
assertEquals("world", termsEnum.term().utf8ToString());
assertEquals(2, termsEnum.ord());
assertFalse(termsEnum.seekExact(new BytesRef("bogus")));
// seek(ord)
termsEnum.seekExact(0);
assertEquals("beer", termsEnum.term().utf8ToString());
assertEquals(0, termsEnum.ord());
termsEnum.seekExact(1);
assertEquals("hello", termsEnum.term().utf8ToString());
assertEquals(1, termsEnum.ord());
termsEnum.seekExact(2);
assertEquals("world", termsEnum.term().utf8ToString());
assertEquals(2, termsEnum.ord());
// lookupTerm(BytesRef)
assertEquals(-1, dv.lookupTerm(new BytesRef("apple")));
assertEquals(0, dv.lookupTerm(new BytesRef("beer")));
assertEquals(-2, dv.lookupTerm(new BytesRef("car")));
assertEquals(1, dv.lookupTerm(new BytesRef("hello")));
assertEquals(-3, dv.lookupTerm(new BytesRef("matter")));
assertEquals(2, dv.lookupTerm(new BytesRef("world")));
assertEquals(-4, dv.lookupTerm(new BytesRef("zany")));
ireader.close();
directory.close();
}
public void testActuallySingleValued() throws IOException {
Directory dir = newDirectory();
IndexWriterConfig iwconfig = newIndexWriterConfig(null);
iwconfig.setMergePolicy(newLogMergePolicy());
IndexWriter iw = new IndexWriter(dir, iwconfig);
Document doc = new Document();
doc.add(new StringField("foo", "bar", Field.Store.NO));
iw.addDocument(doc);
doc = new Document();
doc.add(new StringField("foo", "baz", Field.Store.NO));
iw.addDocument(doc);
doc = new Document();
iw.addDocument(doc);
doc = new Document();
doc.add(new StringField("foo", "baz", Field.Store.NO));
doc.add(new StringField("foo", "baz", Field.Store.NO));
iw.addDocument(doc);
iw.forceMerge(1);
iw.close();
DirectoryReader ir = DirectoryReader.open(dir);
LeafReader ar = getOnlyLeafReader(ir);
SortedSetDocValues v = FieldCache.DEFAULT.getDocTermOrds(ar, "foo", null);
assertNotNull(DocValues.unwrapSingleton(v)); // actually a single-valued field
assertEquals(2, v.getValueCount());
v.setDocument(0);
assertEquals(0, v.nextOrd());
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
v.setDocument(1);
assertEquals(1, v.nextOrd());
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
v.setDocument(2);
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
v.setDocument(3);
assertEquals(1, v.nextOrd());
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
BytesRef value = v.lookupOrd(0);
assertEquals("bar", value.utf8ToString());
value = v.lookupOrd(1);
assertEquals("baz", value.utf8ToString());
ir.close();
dir.close();
}
}

View File

@ -0,0 +1,731 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.uninverting;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.concurrent.CyclicBarrier;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.DoublePoint;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FloatPoint;
import org.apache.lucene.document.IntPoint;
import org.apache.lucene.document.LegacyDoubleField;
import org.apache.lucene.document.LegacyFloatField;
import org.apache.lucene.document.LegacyIntField;
import org.apache.lucene.document.LegacyLongField;
import org.apache.lucene.document.LongPoint;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LogDocMergePolicy;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LegacyNumericUtils;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
import org.apache.solr.index.SlowCompositeReaderWrapper;
import org.junit.AfterClass;
import org.junit.BeforeClass;
public class TestFieldCache extends LuceneTestCase {
private static LeafReader reader;
private static int NUM_DOCS;
private static int NUM_ORDS;
private static String[] unicodeStrings;
private static BytesRef[][] multiValued;
private static Directory directory;
@BeforeClass
public static void beforeClass() throws Exception {
NUM_DOCS = atLeast(500);
NUM_ORDS = atLeast(2);
directory = newDirectory();
IndexWriter writer= new IndexWriter(directory, new IndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(new LogDocMergePolicy()));
long theLong = Long.MAX_VALUE;
double theDouble = Double.MAX_VALUE;
int theInt = Integer.MAX_VALUE;
float theFloat = Float.MAX_VALUE;
unicodeStrings = new String[NUM_DOCS];
multiValued = new BytesRef[NUM_DOCS][NUM_ORDS];
if (VERBOSE) {
System.out.println("TEST: setUp");
}
for (int i = 0; i < NUM_DOCS; i++){
Document doc = new Document();
doc.add(new LongPoint("theLong", theLong--));
doc.add(new DoublePoint("theDouble", theDouble--));
doc.add(new IntPoint("theInt", theInt--));
doc.add(new FloatPoint("theFloat", theFloat--));
if (i%2 == 0) {
doc.add(new IntPoint("sparse", i));
}
if (i%2 == 0) {
doc.add(new IntPoint("numInt", i));
}
// sometimes skip the field:
if (random().nextInt(40) != 17) {
unicodeStrings[i] = generateString(i);
doc.add(newStringField("theRandomUnicodeString", unicodeStrings[i], Field.Store.YES));
}
// sometimes skip the field:
if (random().nextInt(10) != 8) {
for (int j = 0; j < NUM_ORDS; j++) {
String newValue = generateString(i);
multiValued[i][j] = new BytesRef(newValue);
doc.add(newStringField("theRandomUnicodeMultiValuedField", newValue, Field.Store.YES));
}
Arrays.sort(multiValued[i]);
}
writer.addDocument(doc);
}
writer.forceMerge(1); // this test relies on one segment and docid order
IndexReader r = DirectoryReader.open(writer);
assertEquals(1, r.leaves().size());
reader = r.leaves().get(0).reader();
TestUtil.checkReader(reader);
writer.close();
}
@AfterClass
public static void afterClass() throws Exception {
reader.close();
reader = null;
directory.close();
directory = null;
unicodeStrings = null;
multiValued = null;
}
public void test() throws IOException {
FieldCache cache = FieldCache.DEFAULT;
NumericDocValues doubles = cache.getNumerics(reader, "theDouble", FieldCache.DOUBLE_POINT_PARSER, random().nextBoolean());
assertSame("Second request to cache return same array", doubles, cache.getNumerics(reader, "theDouble", FieldCache.DOUBLE_POINT_PARSER, random().nextBoolean()));
for (int i = 0; i < NUM_DOCS; i++) {
assertEquals(Double.doubleToLongBits(Double.MAX_VALUE - i), doubles.get(i));
}
NumericDocValues longs = cache.getNumerics(reader, "theLong", FieldCache.LONG_POINT_PARSER, random().nextBoolean());
assertSame("Second request to cache return same array", longs, cache.getNumerics(reader, "theLong", FieldCache.LONG_POINT_PARSER, random().nextBoolean()));
for (int i = 0; i < NUM_DOCS; i++) {
assertEquals(Long.MAX_VALUE - i, longs.get(i));
}
NumericDocValues ints = cache.getNumerics(reader, "theInt", FieldCache.INT_POINT_PARSER, random().nextBoolean());
assertSame("Second request to cache return same array", ints, cache.getNumerics(reader, "theInt", FieldCache.INT_POINT_PARSER, random().nextBoolean()));
for (int i = 0; i < NUM_DOCS; i++) {
assertEquals(Integer.MAX_VALUE - i, ints.get(i));
}
NumericDocValues floats = cache.getNumerics(reader, "theFloat", FieldCache.FLOAT_POINT_PARSER, random().nextBoolean());
assertSame("Second request to cache return same array", floats, cache.getNumerics(reader, "theFloat", FieldCache.FLOAT_POINT_PARSER, random().nextBoolean()));
for (int i = 0; i < NUM_DOCS; i++) {
assertEquals(Float.floatToIntBits(Float.MAX_VALUE - i), floats.get(i));
}
Bits docsWithField = cache.getDocsWithField(reader, "theLong", FieldCache.LONG_POINT_PARSER);
assertSame("Second request to cache return same array", docsWithField, cache.getDocsWithField(reader, "theLong", FieldCache.LONG_POINT_PARSER));
assertTrue("docsWithField(theLong) must be class Bits.MatchAllBits", docsWithField instanceof Bits.MatchAllBits);
assertTrue("docsWithField(theLong) Size: " + docsWithField.length() + " is not: " + NUM_DOCS, docsWithField.length() == NUM_DOCS);
for (int i = 0; i < docsWithField.length(); i++) {
assertTrue(docsWithField.get(i));
}
docsWithField = cache.getDocsWithField(reader, "sparse", FieldCache.INT_POINT_PARSER);
assertSame("Second request to cache return same array", docsWithField, cache.getDocsWithField(reader, "sparse", FieldCache.INT_POINT_PARSER));
assertFalse("docsWithField(sparse) must not be class Bits.MatchAllBits", docsWithField instanceof Bits.MatchAllBits);
assertTrue("docsWithField(sparse) Size: " + docsWithField.length() + " is not: " + NUM_DOCS, docsWithField.length() == NUM_DOCS);
for (int i = 0; i < docsWithField.length(); i++) {
assertEquals(i%2 == 0, docsWithField.get(i));
}
// getTermsIndex
SortedDocValues termsIndex = cache.getTermsIndex(reader, "theRandomUnicodeString");
for (int i = 0; i < NUM_DOCS; i++) {
final String s;
final int ord = termsIndex.getOrd(i);
if (ord == -1) {
s = null;
} else {
s = termsIndex.lookupOrd(ord).utf8ToString();
}
assertTrue("for doc " + i + ": " + s + " does not equal: " + unicodeStrings[i], unicodeStrings[i] == null || unicodeStrings[i].equals(s));
}
int nTerms = termsIndex.getValueCount();
TermsEnum tenum = termsIndex.termsEnum();
for (int i=0; i<nTerms; i++) {
BytesRef val1 = BytesRef.deepCopyOf(tenum.next());
final BytesRef val = termsIndex.lookupOrd(i);
// System.out.println("i="+i);
assertEquals(val, val1);
}
// seek the enum around (note this isn't a great test here)
int num = atLeast(100);
for (int i = 0; i < num; i++) {
int k = random().nextInt(nTerms);
final BytesRef val = BytesRef.deepCopyOf(termsIndex.lookupOrd(k));
assertEquals(TermsEnum.SeekStatus.FOUND, tenum.seekCeil(val));
assertEquals(val, tenum.term());
}
for(int i=0;i<nTerms;i++) {
final BytesRef val = BytesRef.deepCopyOf(termsIndex.lookupOrd(i));
assertEquals(TermsEnum.SeekStatus.FOUND, tenum.seekCeil(val));
assertEquals(val, tenum.term());
}
// test bad field
termsIndex = cache.getTermsIndex(reader, "bogusfield");
// getTerms
BinaryDocValues terms = cache.getTerms(reader, "theRandomUnicodeString", true);
Bits bits = cache.getDocsWithField(reader, "theRandomUnicodeString", null);
for (int i = 0; i < NUM_DOCS; i++) {
final String s;
if (!bits.get(i)) {
s = null;
} else {
s = terms.get(i).utf8ToString();
}
assertTrue("for doc " + i + ": " + s + " does not equal: " + unicodeStrings[i], unicodeStrings[i] == null || unicodeStrings[i].equals(s));
}
// test bad field
terms = cache.getTerms(reader, "bogusfield", false);
// getDocTermOrds
SortedSetDocValues termOrds = cache.getDocTermOrds(reader, "theRandomUnicodeMultiValuedField", null);
int numEntries = cache.getCacheEntries().length;
// ask for it again, and check that we didnt create any additional entries:
termOrds = cache.getDocTermOrds(reader, "theRandomUnicodeMultiValuedField", null);
assertEquals(numEntries, cache.getCacheEntries().length);
for (int i = 0; i < NUM_DOCS; i++) {
termOrds.setDocument(i);
// This will remove identical terms. A DocTermOrds doesn't return duplicate ords for a docId
List<BytesRef> values = new ArrayList<>(new LinkedHashSet<>(Arrays.asList(multiValued[i])));
for (BytesRef v : values) {
if (v == null) {
// why does this test use null values... instead of an empty list: confusing
break;
}
long ord = termOrds.nextOrd();
assert ord != SortedSetDocValues.NO_MORE_ORDS;
BytesRef scratch = termOrds.lookupOrd(ord);
assertEquals(v, scratch);
}
assertEquals(SortedSetDocValues.NO_MORE_ORDS, termOrds.nextOrd());
}
// test bad field
termOrds = cache.getDocTermOrds(reader, "bogusfield", null);
assertTrue(termOrds.getValueCount() == 0);
FieldCache.DEFAULT.purgeByCacheKey(reader.getCoreCacheKey());
}
public void testEmptyIndex() throws Exception {
Directory dir = newDirectory();
IndexWriter writer= new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())).setMaxBufferedDocs(500));
writer.close();
IndexReader r = DirectoryReader.open(dir);
LeafReader reader = SlowCompositeReaderWrapper.wrap(r);
TestUtil.checkReader(reader);
FieldCache.DEFAULT.getTerms(reader, "foobar", true);
FieldCache.DEFAULT.getTermsIndex(reader, "foobar");
FieldCache.DEFAULT.purgeByCacheKey(reader.getCoreCacheKey());
r.close();
dir.close();
}
private static String generateString(int i) {
String s = null;
if (i > 0 && random().nextInt(3) == 1) {
// reuse past string -- try to find one that's not null
for(int iter = 0; iter < 10 && s == null;iter++) {
s = unicodeStrings[random().nextInt(i)];
}
if (s == null) {
s = TestUtil.randomUnicodeString(random());
}
} else {
s = TestUtil.randomUnicodeString(random());
}
return s;
}
public void testDocsWithField() throws Exception {
FieldCache cache = FieldCache.DEFAULT;
cache.purgeAllCaches();
assertEquals(0, cache.getCacheEntries().length);
cache.getNumerics(reader, "theDouble", FieldCache.DOUBLE_POINT_PARSER, true);
// The double[] takes one slots, and docsWithField should also
// have been populated:
assertEquals(2, cache.getCacheEntries().length);
Bits bits = cache.getDocsWithField(reader, "theDouble", FieldCache.DOUBLE_POINT_PARSER);
// No new entries should appear:
assertEquals(2, cache.getCacheEntries().length);
assertTrue(bits instanceof Bits.MatchAllBits);
NumericDocValues ints = cache.getNumerics(reader, "sparse", FieldCache.INT_POINT_PARSER, true);
assertEquals(4, cache.getCacheEntries().length);
Bits docsWithField = cache.getDocsWithField(reader, "sparse", FieldCache.INT_POINT_PARSER);
assertEquals(4, cache.getCacheEntries().length);
for (int i = 0; i < docsWithField.length(); i++) {
if (i%2 == 0) {
assertTrue(docsWithField.get(i));
assertEquals(i, ints.get(i));
} else {
assertFalse(docsWithField.get(i));
}
}
NumericDocValues numInts = cache.getNumerics(reader, "numInt", FieldCache.INT_POINT_PARSER, random().nextBoolean());
docsWithField = cache.getDocsWithField(reader, "numInt", FieldCache.INT_POINT_PARSER);
for (int i = 0; i < docsWithField.length(); i++) {
if (i%2 == 0) {
assertTrue(docsWithField.get(i));
assertEquals(i, numInts.get(i));
} else {
assertFalse(docsWithField.get(i));
}
}
}
public void testGetDocsWithFieldThreadSafety() throws Exception {
final FieldCache cache = FieldCache.DEFAULT;
cache.purgeAllCaches();
int NUM_THREADS = 3;
Thread[] threads = new Thread[NUM_THREADS];
final AtomicBoolean failed = new AtomicBoolean();
final AtomicInteger iters = new AtomicInteger();
final int NUM_ITER = 200 * RANDOM_MULTIPLIER;
final CyclicBarrier restart = new CyclicBarrier(NUM_THREADS,
new Runnable() {
@Override
public void run() {
cache.purgeAllCaches();
iters.incrementAndGet();
}
});
for(int threadIDX=0;threadIDX<NUM_THREADS;threadIDX++) {
threads[threadIDX] = new Thread() {
@Override
public void run() {
try {
while(!failed.get()) {
final int op = random().nextInt(3);
if (op == 0) {
// Purge all caches & resume, once all
// threads get here:
restart.await();
if (iters.get() >= NUM_ITER) {
break;
}
} else if (op == 1) {
Bits docsWithField = cache.getDocsWithField(reader, "sparse", FieldCache.INT_POINT_PARSER);
for (int i = 0; i < docsWithField.length(); i++) {
assertEquals(i%2 == 0, docsWithField.get(i));
}
} else {
NumericDocValues ints = cache.getNumerics(reader, "sparse", FieldCache.INT_POINT_PARSER, true);
Bits docsWithField = cache.getDocsWithField(reader, "sparse", FieldCache.INT_POINT_PARSER);
for (int i = 0; i < docsWithField.length(); i++) {
if (i%2 == 0) {
assertTrue(docsWithField.get(i));
assertEquals(i, ints.get(i));
} else {
assertFalse(docsWithField.get(i));
}
}
}
}
} catch (Throwable t) {
failed.set(true);
restart.reset();
throw new RuntimeException(t);
}
}
};
threads[threadIDX].start();
}
for(int threadIDX=0;threadIDX<NUM_THREADS;threadIDX++) {
threads[threadIDX].join();
}
assertFalse(failed.get());
}
public void testDocValuesIntegration() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig(null);
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
Document doc = new Document();
doc.add(new BinaryDocValuesField("binary", new BytesRef("binary value")));
doc.add(new SortedDocValuesField("sorted", new BytesRef("sorted value")));
doc.add(new NumericDocValuesField("numeric", 42));
doc.add(new SortedSetDocValuesField("sortedset", new BytesRef("sortedset value1")));
doc.add(new SortedSetDocValuesField("sortedset", new BytesRef("sortedset value2")));
iw.addDocument(doc);
DirectoryReader ir = iw.getReader();
iw.close();
LeafReader ar = getOnlyLeafReader(ir);
// Binary type: can be retrieved via getTerms()
expectThrows(IllegalStateException.class, () -> {
FieldCache.DEFAULT.getNumerics(ar, "binary", FieldCache.INT_POINT_PARSER, false);
});
BinaryDocValues binary = FieldCache.DEFAULT.getTerms(ar, "binary", true);
final BytesRef term = binary.get(0);
assertEquals("binary value", term.utf8ToString());
expectThrows(IllegalStateException.class, () -> {
FieldCache.DEFAULT.getTermsIndex(ar, "binary");
});
expectThrows(IllegalStateException.class, () -> {
FieldCache.DEFAULT.getDocTermOrds(ar, "binary", null);
});
expectThrows(IllegalStateException.class, () -> {
new DocTermOrds(ar, null, "binary");
});
Bits bits = FieldCache.DEFAULT.getDocsWithField(ar, "binary", null);
assertTrue(bits.get(0));
// Sorted type: can be retrieved via getTerms(), getTermsIndex(), getDocTermOrds()
expectThrows(IllegalStateException.class, () -> {
FieldCache.DEFAULT.getNumerics(ar, "sorted", FieldCache.INT_POINT_PARSER, false);
});
expectThrows(IllegalStateException.class, () -> {
new DocTermOrds(ar, null, "sorted");
});
binary = FieldCache.DEFAULT.getTerms(ar, "sorted", true);
BytesRef scratch = binary.get(0);
assertEquals("sorted value", scratch.utf8ToString());
SortedDocValues sorted = FieldCache.DEFAULT.getTermsIndex(ar, "sorted");
assertEquals(0, sorted.getOrd(0));
assertEquals(1, sorted.getValueCount());
scratch = sorted.get(0);
assertEquals("sorted value", scratch.utf8ToString());
SortedSetDocValues sortedSet = FieldCache.DEFAULT.getDocTermOrds(ar, "sorted", null);
sortedSet.setDocument(0);
assertEquals(0, sortedSet.nextOrd());
assertEquals(SortedSetDocValues.NO_MORE_ORDS, sortedSet.nextOrd());
assertEquals(1, sortedSet.getValueCount());
bits = FieldCache.DEFAULT.getDocsWithField(ar, "sorted", null);
assertTrue(bits.get(0));
// Numeric type: can be retrieved via getInts() and so on
NumericDocValues numeric = FieldCache.DEFAULT.getNumerics(ar, "numeric", FieldCache.INT_POINT_PARSER, false);
assertEquals(42, numeric.get(0));
expectThrows(IllegalStateException.class, () -> {
FieldCache.DEFAULT.getTerms(ar, "numeric", true);
});
expectThrows(IllegalStateException.class, () -> {
FieldCache.DEFAULT.getTermsIndex(ar, "numeric");
});
expectThrows(IllegalStateException.class, () -> {
FieldCache.DEFAULT.getDocTermOrds(ar, "numeric", null);
});
expectThrows(IllegalStateException.class, () -> {
new DocTermOrds(ar, null, "numeric");
});
bits = FieldCache.DEFAULT.getDocsWithField(ar, "numeric", null);
assertTrue(bits.get(0));
// SortedSet type: can be retrieved via getDocTermOrds()
expectThrows(IllegalStateException.class, () -> {
FieldCache.DEFAULT.getNumerics(ar, "sortedset", FieldCache.INT_POINT_PARSER, false);
});
expectThrows(IllegalStateException.class, () -> {
FieldCache.DEFAULT.getTerms(ar, "sortedset", true);
});
expectThrows(IllegalStateException.class, () -> {
FieldCache.DEFAULT.getTermsIndex(ar, "sortedset");
});
expectThrows(IllegalStateException.class, () -> {
new DocTermOrds(ar, null, "sortedset");
});
sortedSet = FieldCache.DEFAULT.getDocTermOrds(ar, "sortedset", null);
sortedSet.setDocument(0);
assertEquals(0, sortedSet.nextOrd());
assertEquals(1, sortedSet.nextOrd());
assertEquals(SortedSetDocValues.NO_MORE_ORDS, sortedSet.nextOrd());
assertEquals(2, sortedSet.getValueCount());
bits = FieldCache.DEFAULT.getDocsWithField(ar, "sortedset", null);
assertTrue(bits.get(0));
ir.close();
dir.close();
}
public void testNonexistantFields() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
Document doc = new Document();
iw.addDocument(doc);
DirectoryReader ir = iw.getReader();
iw.close();
LeafReader ar = getOnlyLeafReader(ir);
final FieldCache cache = FieldCache.DEFAULT;
cache.purgeAllCaches();
assertEquals(0, cache.getCacheEntries().length);
NumericDocValues ints = cache.getNumerics(ar, "bogusints", FieldCache.INT_POINT_PARSER, true);
assertEquals(0, ints.get(0));
NumericDocValues longs = cache.getNumerics(ar, "boguslongs", FieldCache.LONG_POINT_PARSER, true);
assertEquals(0, longs.get(0));
NumericDocValues floats = cache.getNumerics(ar, "bogusfloats", FieldCache.FLOAT_POINT_PARSER, true);
assertEquals(0, floats.get(0));
NumericDocValues doubles = cache.getNumerics(ar, "bogusdoubles", FieldCache.DOUBLE_POINT_PARSER, true);
assertEquals(0, doubles.get(0));
BinaryDocValues binaries = cache.getTerms(ar, "bogusterms", true);
BytesRef scratch = binaries.get(0);
assertEquals(0, scratch.length);
SortedDocValues sorted = cache.getTermsIndex(ar, "bogustermsindex");
assertEquals(-1, sorted.getOrd(0));
scratch = sorted.get(0);
assertEquals(0, scratch.length);
SortedSetDocValues sortedSet = cache.getDocTermOrds(ar, "bogusmultivalued", null);
sortedSet.setDocument(0);
assertEquals(SortedSetDocValues.NO_MORE_ORDS, sortedSet.nextOrd());
Bits bits = cache.getDocsWithField(ar, "bogusbits", null);
assertFalse(bits.get(0));
// check that we cached nothing
assertEquals(0, cache.getCacheEntries().length);
ir.close();
dir.close();
}
public void testNonIndexedFields() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
Document doc = new Document();
doc.add(new StoredField("bogusbytes", "bogus"));
doc.add(new StoredField("bogusshorts", "bogus"));
doc.add(new StoredField("bogusints", "bogus"));
doc.add(new StoredField("boguslongs", "bogus"));
doc.add(new StoredField("bogusfloats", "bogus"));
doc.add(new StoredField("bogusdoubles", "bogus"));
doc.add(new StoredField("bogusterms", "bogus"));
doc.add(new StoredField("bogustermsindex", "bogus"));
doc.add(new StoredField("bogusmultivalued", "bogus"));
doc.add(new StoredField("bogusbits", "bogus"));
iw.addDocument(doc);
DirectoryReader ir = iw.getReader();
iw.close();
LeafReader ar = getOnlyLeafReader(ir);
final FieldCache cache = FieldCache.DEFAULT;
cache.purgeAllCaches();
assertEquals(0, cache.getCacheEntries().length);
NumericDocValues ints = cache.getNumerics(ar, "bogusints", FieldCache.INT_POINT_PARSER, true);
assertEquals(0, ints.get(0));
NumericDocValues longs = cache.getNumerics(ar, "boguslongs", FieldCache.LONG_POINT_PARSER, true);
assertEquals(0, longs.get(0));
NumericDocValues floats = cache.getNumerics(ar, "bogusfloats", FieldCache.FLOAT_POINT_PARSER, true);
assertEquals(0, floats.get(0));
NumericDocValues doubles = cache.getNumerics(ar, "bogusdoubles", FieldCache.DOUBLE_POINT_PARSER, true);
assertEquals(0, doubles.get(0));
BinaryDocValues binaries = cache.getTerms(ar, "bogusterms", true);
BytesRef scratch = binaries.get(0);
assertEquals(0, scratch.length);
SortedDocValues sorted = cache.getTermsIndex(ar, "bogustermsindex");
assertEquals(-1, sorted.getOrd(0));
scratch = sorted.get(0);
assertEquals(0, scratch.length);
SortedSetDocValues sortedSet = cache.getDocTermOrds(ar, "bogusmultivalued", null);
sortedSet.setDocument(0);
assertEquals(SortedSetDocValues.NO_MORE_ORDS, sortedSet.nextOrd());
Bits bits = cache.getDocsWithField(ar, "bogusbits", null);
assertFalse(bits.get(0));
// check that we cached nothing
assertEquals(0, cache.getCacheEntries().length);
ir.close();
dir.close();
}
// Make sure that the use of GrowableWriter doesn't prevent from using the full long range
public void testLongFieldCache() throws IOException {
Directory dir = newDirectory();
IndexWriterConfig cfg = newIndexWriterConfig(new MockAnalyzer(random()));
cfg.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, cfg);
Document doc = new Document();
LongPoint field = new LongPoint("f", 0L);
StoredField field2 = new StoredField("f", 0L);
doc.add(field);
doc.add(field2);
final long[] values = new long[TestUtil.nextInt(random(), 1, 10)];
for (int i = 0; i < values.length; ++i) {
final long v;
switch (random().nextInt(10)) {
case 0:
v = Long.MIN_VALUE;
break;
case 1:
v = 0;
break;
case 2:
v = Long.MAX_VALUE;
break;
default:
v = TestUtil.nextLong(random(), -10, 10);
break;
}
values[i] = v;
if (v == 0 && random().nextBoolean()) {
// missing
iw.addDocument(new Document());
} else {
field.setLongValue(v);
field2.setLongValue(v);
iw.addDocument(doc);
}
}
iw.forceMerge(1);
final DirectoryReader reader = iw.getReader();
final NumericDocValues longs = FieldCache.DEFAULT.getNumerics(getOnlyLeafReader(reader), "f", FieldCache.LONG_POINT_PARSER, false);
for (int i = 0; i < values.length; ++i) {
assertEquals(values[i], longs.get(i));
}
reader.close();
iw.close();
dir.close();
}
// Make sure that the use of GrowableWriter doesn't prevent from using the full int range
public void testIntFieldCache() throws IOException {
Directory dir = newDirectory();
IndexWriterConfig cfg = newIndexWriterConfig(new MockAnalyzer(random()));
cfg.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, cfg);
Document doc = new Document();
IntPoint field = new IntPoint("f", 0);
doc.add(field);
final int[] values = new int[TestUtil.nextInt(random(), 1, 10)];
for (int i = 0; i < values.length; ++i) {
final int v;
switch (random().nextInt(10)) {
case 0:
v = Integer.MIN_VALUE;
break;
case 1:
v = 0;
break;
case 2:
v = Integer.MAX_VALUE;
break;
default:
v = TestUtil.nextInt(random(), -10, 10);
break;
}
values[i] = v;
if (v == 0 && random().nextBoolean()) {
// missing
iw.addDocument(new Document());
} else {
field.setIntValue(v);
iw.addDocument(doc);
}
}
iw.forceMerge(1);
final DirectoryReader reader = iw.getReader();
final NumericDocValues ints = FieldCache.DEFAULT.getNumerics(getOnlyLeafReader(reader), "f", FieldCache.INT_POINT_PARSER, false);
for (int i = 0; i < values.length; ++i) {
assertEquals(values[i], ints.get(i));
}
reader.close();
iw.close();
dir.close();
}
}

View File

@ -0,0 +1,70 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.uninverting;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.IntPoint;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
public class TestFieldCacheReopen extends LuceneTestCase {
// TODO: make a version of this that tests the same thing with UninvertingReader.wrap()
// LUCENE-1579: Ensure that on a reopened reader, that any
// shared segments reuse the doc values arrays in
// FieldCache
public void testFieldCacheReuseAfterReopen() throws Exception {
Directory dir = newDirectory();
IndexWriter writer = new IndexWriter(
dir,
newIndexWriterConfig(new MockAnalyzer(random())).
setMergePolicy(newLogMergePolicy(10))
);
Document doc = new Document();
doc.add(new IntPoint("number", 17));
writer.addDocument(doc);
writer.commit();
// Open reader1
DirectoryReader r = DirectoryReader.open(dir);
LeafReader r1 = getOnlyLeafReader(r);
final NumericDocValues ints = FieldCache.DEFAULT.getNumerics(r1, "number", FieldCache.INT_POINT_PARSER, false);
assertEquals(17, ints.get(0));
// Add new segment
writer.addDocument(doc);
writer.commit();
// Reopen reader1 --> reader2
DirectoryReader r2 = DirectoryReader.openIfChanged(r);
assertNotNull(r2);
r.close();
LeafReader sub0 = r2.leaves().get(0).reader();
final NumericDocValues ints2 = FieldCache.DEFAULT.getNumerics(sub0, "number", FieldCache.INT_POINT_PARSER, false);
r2.close();
assertTrue(ints == ints2);
writer.close();
dir.close();
}
}

View File

@ -0,0 +1,164 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.uninverting;
import java.io.IOException;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.LegacyDoubleField;
import org.apache.lucene.document.LegacyFloatField;
import org.apache.lucene.document.LegacyIntField;
import org.apache.lucene.document.LegacyLongField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.MultiReader;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.solr.index.SlowCompositeReaderWrapper;
import org.apache.solr.uninverting.FieldCacheSanityChecker.Insanity;
import org.apache.solr.uninverting.FieldCacheSanityChecker.InsanityType;
public class TestFieldCacheSanityChecker extends LuceneTestCase {
protected LeafReader readerA;
protected LeafReader readerB;
protected LeafReader readerX;
protected LeafReader readerAclone;
protected Directory dirA, dirB;
private static final int NUM_DOCS = 1000;
@Override
public void setUp() throws Exception {
super.setUp();
dirA = newDirectory();
dirB = newDirectory();
IndexWriter wA = new IndexWriter(dirA, newIndexWriterConfig(new MockAnalyzer(random())));
IndexWriter wB = new IndexWriter(dirB, newIndexWriterConfig(new MockAnalyzer(random())));
long theLong = Long.MAX_VALUE;
double theDouble = Double.MAX_VALUE;
int theInt = Integer.MAX_VALUE;
float theFloat = Float.MAX_VALUE;
for (int i = 0; i < NUM_DOCS; i++){
Document doc = new Document();
doc.add(new LegacyLongField("theLong", theLong--, Field.Store.NO));
doc.add(new LegacyDoubleField("theDouble", theDouble--, Field.Store.NO));
doc.add(new LegacyIntField("theInt", theInt--, Field.Store.NO));
doc.add(new LegacyFloatField("theFloat", theFloat--, Field.Store.NO));
if (0 == i % 3) {
wA.addDocument(doc);
} else {
wB.addDocument(doc);
}
}
wA.close();
wB.close();
DirectoryReader rA = DirectoryReader.open(dirA);
readerA = SlowCompositeReaderWrapper.wrap(rA);
readerAclone = SlowCompositeReaderWrapper.wrap(rA);
readerA = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(dirA));
readerB = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(dirB));
readerX = SlowCompositeReaderWrapper.wrap(new MultiReader(readerA, readerB));
}
@Override
public void tearDown() throws Exception {
readerA.close();
readerAclone.close();
readerB.close();
readerX.close();
dirA.close();
dirB.close();
super.tearDown();
}
public void testSanity() throws IOException {
FieldCache cache = FieldCache.DEFAULT;
cache.purgeAllCaches();
cache.getNumerics(readerA, "theDouble", FieldCache.LEGACY_DOUBLE_PARSER, false);
cache.getNumerics(readerAclone, "theDouble", FieldCache.LEGACY_DOUBLE_PARSER, false);
cache.getNumerics(readerB, "theDouble", FieldCache.LEGACY_DOUBLE_PARSER, false);
cache.getNumerics(readerX, "theInt", FieldCache.LEGACY_INT_PARSER, false);
// // //
Insanity[] insanity =
FieldCacheSanityChecker.checkSanity(cache.getCacheEntries());
if (0 < insanity.length)
dumpArray(getTestClass().getName() + "#" + getTestName()
+ " INSANITY", insanity, System.err);
assertEquals("shouldn't be any cache insanity", 0, insanity.length);
cache.purgeAllCaches();
}
public void testInsanity1() throws IOException {
FieldCache cache = FieldCache.DEFAULT;
cache.purgeAllCaches();
cache.getNumerics(readerX, "theInt", FieldCache.LEGACY_INT_PARSER, false);
cache.getTerms(readerX, "theInt", false);
// // //
Insanity[] insanity =
FieldCacheSanityChecker.checkSanity(cache.getCacheEntries());
assertEquals("wrong number of cache errors", 1, insanity.length);
assertEquals("wrong type of cache error",
InsanityType.VALUEMISMATCH,
insanity[0].getType());
assertEquals("wrong number of entries in cache error", 2,
insanity[0].getCacheEntries().length);
// we expect bad things, don't let tearDown complain about them
cache.purgeAllCaches();
}
public void testInsanity2() throws IOException {
FieldCache cache = FieldCache.DEFAULT;
cache.purgeAllCaches();
cache.getTerms(readerA, "theInt", false);
cache.getTerms(readerB, "theInt", false);
cache.getTerms(readerX, "theInt", false);
// // //
Insanity[] insanity =
FieldCacheSanityChecker.checkSanity(cache.getCacheEntries());
assertEquals("wrong number of cache errors", 1, insanity.length);
assertEquals("wrong type of cache error",
InsanityType.SUBREADER,
insanity[0].getType());
assertEquals("wrong number of entries in cache error", 3,
insanity[0].getCacheEntries().length);
// we expect bad things, don't let tearDown complain about them
cache.purgeAllCaches();
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,318 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.uninverting;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Random;
import java.util.Set;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.IntPoint;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.ConstantScoreScorer;
import org.apache.lucene.search.ConstantScoreWeight;
import org.apache.lucene.search.FieldDoc;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TopFieldDocs;
import org.apache.lucene.search.Weight;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BitSetIterator;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
import org.apache.solr.uninverting.UninvertingReader.Type;
/** random sorting tests with uninversion */
public class TestFieldCacheSortRandom extends LuceneTestCase {
public void testRandomStringSort() throws Exception {
testRandomStringSort(SortField.Type.STRING);
}
public void testRandomStringValSort() throws Exception {
testRandomStringSort(SortField.Type.STRING_VAL);
}
private void testRandomStringSort(SortField.Type type) throws Exception {
Random random = new Random(random().nextLong());
final int NUM_DOCS = atLeast(100);
final Directory dir = newDirectory();
final RandomIndexWriter writer = new RandomIndexWriter(random, dir);
final boolean allowDups = random.nextBoolean();
final Set<String> seen = new HashSet<>();
final int maxLength = TestUtil.nextInt(random, 5, 100);
if (VERBOSE) {
System.out.println("TEST: NUM_DOCS=" + NUM_DOCS + " maxLength=" + maxLength + " allowDups=" + allowDups);
}
int numDocs = 0;
final List<BytesRef> docValues = new ArrayList<>();
// TODO: deletions
while (numDocs < NUM_DOCS) {
final Document doc = new Document();
// 10% of the time, the document is missing the value:
final BytesRef br;
if (random().nextInt(10) != 7) {
final String s;
if (random.nextBoolean()) {
s = TestUtil.randomSimpleString(random, maxLength);
} else {
s = TestUtil.randomUnicodeString(random, maxLength);
}
if (!allowDups) {
if (seen.contains(s)) {
continue;
}
seen.add(s);
}
if (VERBOSE) {
System.out.println(" " + numDocs + ": s=" + s);
}
doc.add(new StringField("stringdv", s, Field.Store.NO));
docValues.add(new BytesRef(s));
} else {
br = null;
if (VERBOSE) {
System.out.println(" " + numDocs + ": <missing>");
}
docValues.add(null);
}
doc.add(new IntPoint("id", numDocs));
doc.add(new StoredField("id", numDocs));
writer.addDocument(doc);
numDocs++;
if (random.nextInt(40) == 17) {
// force flush
writer.getReader().close();
}
}
Map<String,UninvertingReader.Type> mapping = new HashMap<>();
mapping.put("stringdv", Type.SORTED);
mapping.put("id", Type.INTEGER_POINT);
final IndexReader r = UninvertingReader.wrap(writer.getReader(), mapping);
writer.close();
if (VERBOSE) {
System.out.println(" reader=" + r);
}
final IndexSearcher s = newSearcher(r, false);
final int ITERS = atLeast(100);
for(int iter=0;iter<ITERS;iter++) {
final boolean reverse = random.nextBoolean();
final TopFieldDocs hits;
final SortField sf;
final boolean sortMissingLast;
final boolean missingIsNull;
sf = new SortField("stringdv", type, reverse);
sortMissingLast = random().nextBoolean();
missingIsNull = true;
if (sortMissingLast) {
sf.setMissingValue(SortField.STRING_LAST);
}
final Sort sort;
if (random.nextBoolean()) {
sort = new Sort(sf);
} else {
sort = new Sort(sf, SortField.FIELD_DOC);
}
final int hitCount = TestUtil.nextInt(random, 1, r.maxDoc() + 20);
final RandomQuery f = new RandomQuery(random.nextLong(), random.nextFloat(), docValues);
int queryType = random.nextInt(2);
if (queryType == 0) {
hits = s.search(new ConstantScoreQuery(f),
hitCount, sort, random.nextBoolean(), random.nextBoolean());
} else {
hits = s.search(f, hitCount, sort, random.nextBoolean(), random.nextBoolean());
}
if (VERBOSE) {
System.out.println("\nTEST: iter=" + iter + " " + hits.totalHits + " hits; topN=" + hitCount + "; reverse=" + reverse + "; sortMissingLast=" + sortMissingLast + " sort=" + sort);
}
// Compute expected results:
Collections.sort(f.matchValues, new Comparator<BytesRef>() {
@Override
public int compare(BytesRef a, BytesRef b) {
if (a == null) {
if (b == null) {
return 0;
}
if (sortMissingLast) {
return 1;
} else {
return -1;
}
} else if (b == null) {
if (sortMissingLast) {
return -1;
} else {
return 1;
}
} else {
return a.compareTo(b);
}
}
});
if (reverse) {
Collections.reverse(f.matchValues);
}
final List<BytesRef> expected = f.matchValues;
if (VERBOSE) {
System.out.println(" expected:");
for(int idx=0;idx<expected.size();idx++) {
BytesRef br = expected.get(idx);
if (br == null && missingIsNull == false) {
br = new BytesRef();
}
System.out.println(" " + idx + ": " + (br == null ? "<missing>" : br.utf8ToString()));
if (idx == hitCount-1) {
break;
}
}
}
if (VERBOSE) {
System.out.println(" actual:");
for(int hitIDX=0;hitIDX<hits.scoreDocs.length;hitIDX++) {
final FieldDoc fd = (FieldDoc) hits.scoreDocs[hitIDX];
BytesRef br = (BytesRef) fd.fields[0];
System.out.println(" " + hitIDX + ": " + (br == null ? "<missing>" : br.utf8ToString()) + " id=" + s.doc(fd.doc).get("id"));
}
}
for(int hitIDX=0;hitIDX<hits.scoreDocs.length;hitIDX++) {
final FieldDoc fd = (FieldDoc) hits.scoreDocs[hitIDX];
BytesRef br = expected.get(hitIDX);
if (br == null && missingIsNull == false) {
br = new BytesRef();
}
// Normally, the old codecs (that don't support
// docsWithField via doc values) will always return
// an empty BytesRef for the missing case; however,
// if all docs in a given segment were missing, in
// that case it will return null! So we must map
// null here, too:
BytesRef br2 = (BytesRef) fd.fields[0];
if (br2 == null && missingIsNull == false) {
br2 = new BytesRef();
}
assertEquals(br, br2);
}
}
r.close();
dir.close();
}
private static class RandomQuery extends Query {
private final long seed;
private float density;
private final List<BytesRef> docValues;
public final List<BytesRef> matchValues = Collections.synchronizedList(new ArrayList<BytesRef>());
// density should be 0.0 ... 1.0
public RandomQuery(long seed, float density, List<BytesRef> docValues) {
this.seed = seed;
this.density = density;
this.docValues = docValues;
}
@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
return new ConstantScoreWeight(this) {
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
Random random = new Random(seed ^ context.docBase);
final int maxDoc = context.reader().maxDoc();
final NumericDocValues idSource = DocValues.getNumeric(context.reader(), "id");
assertNotNull(idSource);
final FixedBitSet bits = new FixedBitSet(maxDoc);
for(int docID=0;docID<maxDoc;docID++) {
if (random.nextFloat() <= density) {
bits.set(docID);
//System.out.println(" acc id=" + idSource.getInt(docID) + " docID=" + docID);
matchValues.add(docValues.get((int) idSource.get(docID)));
}
}
return new ConstantScoreScorer(this, score(), new BitSetIterator(bits, bits.approximateCardinality()));
}
};
}
@Override
public String toString(String field) {
return "RandomFilter(density=" + density + ")";
}
@Override
public boolean equals(Object other) {
return sameClassAs(other) &&
equalsTo(getClass().cast(other));
}
private boolean equalsTo(RandomQuery other) {
return seed == other.seed &&
docValues == other.docValues &&
density == other.density;
}
@Override
public int hashCode() {
int h = classHash();
h = 31 * h + Objects.hash(seed, density);
h = 31 * h + System.identityHashCode(docValues);
return h;
}
}
}

View File

@ -0,0 +1,592 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.uninverting;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermsEnum.SeekStatus;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.Constants;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
import org.apache.solr.index.SlowCompositeReaderWrapper;
import static org.apache.lucene.index.SortedSetDocValues.NO_MORE_ORDS;
public class TestFieldCacheVsDocValues extends LuceneTestCase {
@Override
public void setUp() throws Exception {
super.setUp();
assumeFalse("test unsupported on J9 temporarily, see https://issues.apache.org/jira/browse/LUCENE-6522",
Constants.JAVA_VENDOR.startsWith("IBM"));
}
public void testByteMissingVsFieldCache() throws Exception {
int numIterations = atLeast(1);
for (int i = 0; i < numIterations; i++) {
doTestMissingVsFieldCache(Byte.MIN_VALUE, Byte.MAX_VALUE);
}
}
public void testShortMissingVsFieldCache() throws Exception {
int numIterations = atLeast(1);
for (int i = 0; i < numIterations; i++) {
doTestMissingVsFieldCache(Short.MIN_VALUE, Short.MAX_VALUE);
}
}
public void testIntMissingVsFieldCache() throws Exception {
int numIterations = atLeast(1);
for (int i = 0; i < numIterations; i++) {
doTestMissingVsFieldCache(Integer.MIN_VALUE, Integer.MAX_VALUE);
}
}
public void testLongMissingVsFieldCache() throws Exception {
int numIterations = atLeast(1);
for (int i = 0; i < numIterations; i++) {
doTestMissingVsFieldCache(Long.MIN_VALUE, Long.MAX_VALUE);
}
}
public void testSortedFixedLengthVsFieldCache() throws Exception {
int numIterations = atLeast(1);
for (int i = 0; i < numIterations; i++) {
int fixedLength = TestUtil.nextInt(random(), 1, 10);
doTestSortedVsFieldCache(fixedLength, fixedLength);
}
}
public void testSortedVariableLengthVsFieldCache() throws Exception {
int numIterations = atLeast(1);
for (int i = 0; i < numIterations; i++) {
doTestSortedVsFieldCache(1, 10);
}
}
public void testSortedSetFixedLengthVsUninvertedField() throws Exception {
int numIterations = atLeast(1);
for (int i = 0; i < numIterations; i++) {
int fixedLength = TestUtil.nextInt(random(), 1, 10);
doTestSortedSetVsUninvertedField(fixedLength, fixedLength);
}
}
public void testSortedSetVariableLengthVsUninvertedField() throws Exception {
int numIterations = atLeast(1);
for (int i = 0; i < numIterations; i++) {
doTestSortedSetVsUninvertedField(1, 10);
}
}
// LUCENE-4853
public void testHugeBinaryValues() throws Exception {
Analyzer analyzer = new MockAnalyzer(random());
// FSDirectory because SimpleText will consume gobbs of
// space when storing big binary values:
Directory d = newFSDirectory(createTempDir("hugeBinaryValues"));
boolean doFixed = random().nextBoolean();
int numDocs;
int fixedLength = 0;
if (doFixed) {
// Sometimes make all values fixed length since some
// codecs have different code paths for this:
numDocs = TestUtil.nextInt(random(), 10, 20);
fixedLength = TestUtil.nextInt(random(), 65537, 256 * 1024);
} else {
numDocs = TestUtil.nextInt(random(), 100, 200);
}
IndexWriter w = new IndexWriter(d, newIndexWriterConfig(analyzer));
List<byte[]> docBytes = new ArrayList<>();
long totalBytes = 0;
for(int docID=0;docID<numDocs;docID++) {
// we don't use RandomIndexWriter because it might add
// more docvalues than we expect !!!!
// Must be > 64KB in size to ensure more than 2 pages in
// PagedBytes would be needed:
int numBytes;
if (doFixed) {
numBytes = fixedLength;
} else if (docID == 0 || random().nextInt(5) == 3) {
numBytes = TestUtil.nextInt(random(), 65537, 3 * 1024 * 1024);
} else {
numBytes = TestUtil.nextInt(random(), 1, 1024 * 1024);
}
totalBytes += numBytes;
if (totalBytes > 5 * 1024*1024) {
break;
}
byte[] bytes = new byte[numBytes];
random().nextBytes(bytes);
docBytes.add(bytes);
Document doc = new Document();
BytesRef b = new BytesRef(bytes);
b.length = bytes.length;
doc.add(new BinaryDocValuesField("field", b));
doc.add(new StringField("id", ""+docID, Field.Store.YES));
try {
w.addDocument(doc);
} catch (IllegalArgumentException iae) {
if (iae.getMessage().indexOf("is too large") == -1) {
throw iae;
} else {
// OK: some codecs can't handle binary DV > 32K
assertFalse(codecAcceptsHugeBinaryValues("field"));
w.rollback();
d.close();
return;
}
}
}
DirectoryReader r;
try {
r = DirectoryReader.open(w);
} catch (IllegalArgumentException iae) {
if (iae.getMessage().indexOf("is too large") == -1) {
throw iae;
} else {
assertFalse(codecAcceptsHugeBinaryValues("field"));
// OK: some codecs can't handle binary DV > 32K
w.rollback();
d.close();
return;
}
}
w.close();
LeafReader ar = SlowCompositeReaderWrapper.wrap(r);
TestUtil.checkReader(ar);
BinaryDocValues s = FieldCache.DEFAULT.getTerms(ar, "field", false);
for(int docID=0;docID<docBytes.size();docID++) {
Document doc = ar.document(docID);
BytesRef bytes = s.get(docID);
byte[] expected = docBytes.get(Integer.parseInt(doc.get("id")));
assertEquals(expected.length, bytes.length);
assertEquals(new BytesRef(expected), bytes);
}
assertTrue(codecAcceptsHugeBinaryValues("field"));
ar.close();
d.close();
}
private static final int LARGE_BINARY_FIELD_LENGTH = (1 << 15) - 2;
// TODO: get this out of here and into the deprecated codecs (4.0, 4.2)
public void testHugeBinaryValueLimit() throws Exception {
// We only test DVFormats that have a limit
assumeFalse("test requires codec with limits on max binary field length", codecAcceptsHugeBinaryValues("field"));
Analyzer analyzer = new MockAnalyzer(random());
// FSDirectory because SimpleText will consume gobbs of
// space when storing big binary values:
Directory d = newFSDirectory(createTempDir("hugeBinaryValues"));
boolean doFixed = random().nextBoolean();
int numDocs;
int fixedLength = 0;
if (doFixed) {
// Sometimes make all values fixed length since some
// codecs have different code paths for this:
numDocs = TestUtil.nextInt(random(), 10, 20);
fixedLength = LARGE_BINARY_FIELD_LENGTH;
} else {
numDocs = TestUtil.nextInt(random(), 100, 200);
}
IndexWriter w = new IndexWriter(d, newIndexWriterConfig(analyzer));
List<byte[]> docBytes = new ArrayList<>();
long totalBytes = 0;
for(int docID=0;docID<numDocs;docID++) {
// we don't use RandomIndexWriter because it might add
// more docvalues than we expect !!!!
// Must be > 64KB in size to ensure more than 2 pages in
// PagedBytes would be needed:
int numBytes;
if (doFixed) {
numBytes = fixedLength;
} else if (docID == 0 || random().nextInt(5) == 3) {
numBytes = LARGE_BINARY_FIELD_LENGTH;
} else {
numBytes = TestUtil.nextInt(random(), 1, LARGE_BINARY_FIELD_LENGTH);
}
totalBytes += numBytes;
if (totalBytes > 5 * 1024*1024) {
break;
}
byte[] bytes = new byte[numBytes];
random().nextBytes(bytes);
docBytes.add(bytes);
Document doc = new Document();
BytesRef b = new BytesRef(bytes);
b.length = bytes.length;
doc.add(new BinaryDocValuesField("field", b));
doc.add(new StringField("id", ""+docID, Field.Store.YES));
w.addDocument(doc);
}
DirectoryReader r = DirectoryReader.open(w);
w.close();
LeafReader ar = SlowCompositeReaderWrapper.wrap(r);
TestUtil.checkReader(ar
);
BinaryDocValues s = FieldCache.DEFAULT.getTerms(ar, "field", false);
for(int docID=0;docID<docBytes.size();docID++) {
Document doc = ar.document(docID);
BytesRef bytes = s.get(docID);
byte[] expected = docBytes.get(Integer.parseInt(doc.get("id")));
assertEquals(expected.length, bytes.length);
assertEquals(new BytesRef(expected), bytes);
}
ar.close();
d.close();
}
private void doTestSortedVsFieldCache(int minLength, int maxLength) throws Exception {
Directory dir = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf);
Document doc = new Document();
Field idField = new StringField("id", "", Field.Store.NO);
Field indexedField = new StringField("indexed", "", Field.Store.NO);
Field dvField = new SortedDocValuesField("dv", new BytesRef());
doc.add(idField);
doc.add(indexedField);
doc.add(dvField);
// index some docs
int numDocs = atLeast(300);
for (int i = 0; i < numDocs; i++) {
idField.setStringValue(Integer.toString(i));
final int length;
if (minLength == maxLength) {
length = minLength; // fixed length
} else {
length = TestUtil.nextInt(random(), minLength, maxLength);
}
String value = TestUtil.randomSimpleString(random(), length);
indexedField.setStringValue(value);
dvField.setBytesValue(new BytesRef(value));
writer.addDocument(doc);
if (random().nextInt(31) == 0) {
writer.commit();
}
}
// delete some docs
int numDeletions = random().nextInt(numDocs/10);
for (int i = 0; i < numDeletions; i++) {
int id = random().nextInt(numDocs);
writer.deleteDocuments(new Term("id", Integer.toString(id)));
}
writer.close();
// compare
DirectoryReader ir = DirectoryReader.open(dir);
for (LeafReaderContext context : ir.leaves()) {
LeafReader r = context.reader();
SortedDocValues expected = FieldCache.DEFAULT.getTermsIndex(r, "indexed");
SortedDocValues actual = r.getSortedDocValues("dv");
assertEquals(r.maxDoc(), expected, actual);
}
ir.close();
dir.close();
}
private void doTestSortedSetVsUninvertedField(int minLength, int maxLength) throws Exception {
Directory dir = newDirectory();
IndexWriterConfig conf = new IndexWriterConfig(new MockAnalyzer(random()));
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf);
// index some docs
int numDocs = atLeast(300);
for (int i = 0; i < numDocs; i++) {
Document doc = new Document();
Field idField = new StringField("id", Integer.toString(i), Field.Store.NO);
doc.add(idField);
final int length = TestUtil.nextInt(random(), minLength, maxLength);
int numValues = random().nextInt(17);
// create a random list of strings
List<String> values = new ArrayList<>();
for (int v = 0; v < numValues; v++) {
values.add(TestUtil.randomSimpleString(random(), minLength, length));
}
// add in any order to the indexed field
ArrayList<String> unordered = new ArrayList<>(values);
Collections.shuffle(unordered, random());
for (String v : values) {
doc.add(newStringField("indexed", v, Field.Store.NO));
}
// add in any order to the dv field
ArrayList<String> unordered2 = new ArrayList<>(values);
Collections.shuffle(unordered2, random());
for (String v : unordered2) {
doc.add(new SortedSetDocValuesField("dv", new BytesRef(v)));
}
writer.addDocument(doc);
if (random().nextInt(31) == 0) {
writer.commit();
}
}
// delete some docs
int numDeletions = random().nextInt(numDocs/10);
for (int i = 0; i < numDeletions; i++) {
int id = random().nextInt(numDocs);
writer.deleteDocuments(new Term("id", Integer.toString(id)));
}
// compare per-segment
DirectoryReader ir = writer.getReader();
for (LeafReaderContext context : ir.leaves()) {
LeafReader r = context.reader();
SortedSetDocValues expected = FieldCache.DEFAULT.getDocTermOrds(r, "indexed", null);
SortedSetDocValues actual = r.getSortedSetDocValues("dv");
assertEquals(r.maxDoc(), expected, actual);
}
ir.close();
writer.forceMerge(1);
// now compare again after the merge
ir = writer.getReader();
LeafReader ar = getOnlyLeafReader(ir);
SortedSetDocValues expected = FieldCache.DEFAULT.getDocTermOrds(ar, "indexed", null);
SortedSetDocValues actual = ar.getSortedSetDocValues("dv");
assertEquals(ir.maxDoc(), expected, actual);
ir.close();
writer.close();
dir.close();
}
private void doTestMissingVsFieldCache(LongProducer longs) throws Exception {
Directory dir = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf);
Field idField = new StringField("id", "", Field.Store.NO);
Field indexedField = newStringField("indexed", "", Field.Store.NO);
Field dvField = new NumericDocValuesField("dv", 0);
// index some docs
int numDocs = atLeast(300);
// numDocs should be always > 256 so that in case of a codec that optimizes
// for numbers of values <= 256, all storage layouts are tested
assert numDocs > 256;
for (int i = 0; i < numDocs; i++) {
idField.setStringValue(Integer.toString(i));
long value = longs.next();
indexedField.setStringValue(Long.toString(value));
dvField.setLongValue(value);
Document doc = new Document();
doc.add(idField);
// 1/4 of the time we neglect to add the fields
if (random().nextInt(4) > 0) {
doc.add(indexedField);
doc.add(dvField);
}
writer.addDocument(doc);
if (random().nextInt(31) == 0) {
writer.commit();
}
}
// delete some docs
int numDeletions = random().nextInt(numDocs/10);
for (int i = 0; i < numDeletions; i++) {
int id = random().nextInt(numDocs);
writer.deleteDocuments(new Term("id", Integer.toString(id)));
}
// merge some segments and ensure that at least one of them has more than
// 256 values
writer.forceMerge(numDocs / 256);
writer.close();
// compare
DirectoryReader ir = DirectoryReader.open(dir);
for (LeafReaderContext context : ir.leaves()) {
LeafReader r = context.reader();
Bits expected = FieldCache.DEFAULT.getDocsWithField(r, "indexed", null);
Bits actual = FieldCache.DEFAULT.getDocsWithField(r, "dv", null);
assertEquals(expected, actual);
}
ir.close();
dir.close();
}
private void doTestMissingVsFieldCache(final long minValue, final long maxValue) throws Exception {
doTestMissingVsFieldCache(new LongProducer() {
@Override
long next() {
return TestUtil.nextLong(random(), minValue, maxValue);
}
});
}
static abstract class LongProducer {
abstract long next();
}
private void assertEquals(Bits expected, Bits actual) throws Exception {
assertEquals(expected.length(), actual.length());
for (int i = 0; i < expected.length(); i++) {
assertEquals(expected.get(i), actual.get(i));
}
}
private void assertEquals(int maxDoc, SortedDocValues expected, SortedDocValues actual) throws Exception {
assertEquals(maxDoc, DocValues.singleton(expected), DocValues.singleton(actual));
}
private void assertEquals(int maxDoc, SortedSetDocValues expected, SortedSetDocValues actual) throws Exception {
// can be null for the segment if no docs actually had any SortedDocValues
// in this case FC.getDocTermsOrds returns EMPTY
if (actual == null) {
assertEquals(expected.getValueCount(), 0);
return;
}
assertEquals(expected.getValueCount(), actual.getValueCount());
// compare ord lists
for (int i = 0; i < maxDoc; i++) {
expected.setDocument(i);
actual.setDocument(i);
long expectedOrd;
while ((expectedOrd = expected.nextOrd()) != NO_MORE_ORDS) {
assertEquals(expectedOrd, actual.nextOrd());
}
assertEquals(NO_MORE_ORDS, actual.nextOrd());
}
// compare ord dictionary
for (long i = 0; i < expected.getValueCount(); i++) {
final BytesRef expectedBytes = BytesRef.deepCopyOf(expected.lookupOrd(i));
final BytesRef actualBytes = actual.lookupOrd(i);
assertEquals(expectedBytes, actualBytes);
}
// compare termsenum
assertEquals(expected.getValueCount(), expected.termsEnum(), actual.termsEnum());
}
private void assertEquals(long numOrds, TermsEnum expected, TermsEnum actual) throws Exception {
BytesRef ref;
// sequential next() through all terms
while ((ref = expected.next()) != null) {
assertEquals(ref, actual.next());
assertEquals(expected.ord(), actual.ord());
assertEquals(expected.term(), actual.term());
}
assertNull(actual.next());
// sequential seekExact(ord) through all terms
for (long i = 0; i < numOrds; i++) {
expected.seekExact(i);
actual.seekExact(i);
assertEquals(expected.ord(), actual.ord());
assertEquals(expected.term(), actual.term());
}
// sequential seekExact(BytesRef) through all terms
for (long i = 0; i < numOrds; i++) {
expected.seekExact(i);
assertTrue(actual.seekExact(expected.term()));
assertEquals(expected.ord(), actual.ord());
assertEquals(expected.term(), actual.term());
}
// sequential seekCeil(BytesRef) through all terms
for (long i = 0; i < numOrds; i++) {
expected.seekExact(i);
assertEquals(SeekStatus.FOUND, actual.seekCeil(expected.term()));
assertEquals(expected.ord(), actual.ord());
assertEquals(expected.term(), actual.term());
}
// random seekExact(ord)
for (long i = 0; i < numOrds; i++) {
long randomOrd = TestUtil.nextLong(random(), 0, numOrds - 1);
expected.seekExact(randomOrd);
actual.seekExact(randomOrd);
assertEquals(expected.ord(), actual.ord());
assertEquals(expected.term(), actual.term());
}
// random seekExact(BytesRef)
for (long i = 0; i < numOrds; i++) {
long randomOrd = TestUtil.nextLong(random(), 0, numOrds - 1);
expected.seekExact(randomOrd);
actual.seekExact(expected.term());
assertEquals(expected.ord(), actual.ord());
assertEquals(expected.term(), actual.term());
}
// random seekCeil(BytesRef)
for (long i = 0; i < numOrds; i++) {
BytesRef target = new BytesRef(TestUtil.randomUnicodeString(random()));
SeekStatus expectedStatus = expected.seekCeil(target);
assertEquals(expectedStatus, actual.seekCeil(target));
if (expectedStatus != SeekStatus.END) {
assertEquals(expected.ord(), actual.ord());
assertEquals(expected.term(), actual.term());
}
}
}
protected boolean codecAcceptsHugeBinaryValues(String field) {
String name = TestUtil.getDocValuesFormat(field);
return !(name.equals("Memory")); // Direct has a different type of limit
}
}

View File

@ -0,0 +1,228 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.uninverting;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Random;
import java.util.Set;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
// TODO: what happened to this test... its not actually uninverting?
public class TestFieldCacheWithThreads extends LuceneTestCase {
public void test() throws Exception {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy()));
final List<Long> numbers = new ArrayList<>();
final List<BytesRef> binary = new ArrayList<>();
final List<BytesRef> sorted = new ArrayList<>();
final int numDocs = atLeast(100);
for(int i=0;i<numDocs;i++) {
Document d = new Document();
long number = random().nextLong();
d.add(new NumericDocValuesField("number", number));
BytesRef bytes = new BytesRef(TestUtil.randomRealisticUnicodeString(random()));
d.add(new BinaryDocValuesField("bytes", bytes));
binary.add(bytes);
bytes = new BytesRef(TestUtil.randomRealisticUnicodeString(random()));
d.add(new SortedDocValuesField("sorted", bytes));
sorted.add(bytes);
w.addDocument(d);
numbers.add(number);
}
w.forceMerge(1);
final IndexReader r = DirectoryReader.open(w);
w.close();
assertEquals(1, r.leaves().size());
final LeafReader ar = r.leaves().get(0).reader();
int numThreads = TestUtil.nextInt(random(), 2, 5);
List<Thread> threads = new ArrayList<>();
final CountDownLatch startingGun = new CountDownLatch(1);
for(int t=0;t<numThreads;t++) {
final Random threadRandom = new Random(random().nextLong());
Thread thread = new Thread() {
@Override
public void run() {
try {
//NumericDocValues ndv = ar.getNumericDocValues("number");
NumericDocValues ndv = FieldCache.DEFAULT.getNumerics(ar, "number", FieldCache.LONG_POINT_PARSER, false);
//BinaryDocValues bdv = ar.getBinaryDocValues("bytes");
BinaryDocValues bdv = FieldCache.DEFAULT.getTerms(ar, "bytes", false);
SortedDocValues sdv = FieldCache.DEFAULT.getTermsIndex(ar, "sorted");
startingGun.await();
int iters = atLeast(1000);
for(int iter=0;iter<iters;iter++) {
int docID = threadRandom.nextInt(numDocs);
switch(threadRandom.nextInt(4)) {
case 0:
assertEquals(numbers.get(docID).longValue(), FieldCache.DEFAULT.getNumerics(ar, "number", FieldCache.INT_POINT_PARSER, false).get(docID));
break;
case 1:
assertEquals(numbers.get(docID).longValue(), FieldCache.DEFAULT.getNumerics(ar, "number", FieldCache.LONG_POINT_PARSER, false).get(docID));
break;
case 2:
assertEquals(numbers.get(docID).longValue(), FieldCache.DEFAULT.getNumerics(ar, "number", FieldCache.FLOAT_POINT_PARSER, false).get(docID));
break;
case 3:
assertEquals(numbers.get(docID).longValue(), FieldCache.DEFAULT.getNumerics(ar, "number", FieldCache.DOUBLE_POINT_PARSER, false).get(docID));
break;
}
BytesRef term = bdv.get(docID);
assertEquals(binary.get(docID), term);
term = sdv.get(docID);
assertEquals(sorted.get(docID), term);
}
} catch (Exception e) {
throw new RuntimeException(e);
}
}
};
thread.start();
threads.add(thread);
}
startingGun.countDown();
for(Thread thread : threads) {
thread.join();
}
r.close();
dir.close();
}
public void test2() throws Exception {
Random random = random();
final int NUM_DOCS = atLeast(100);
final Directory dir = newDirectory();
final RandomIndexWriter writer = new RandomIndexWriter(random, dir);
final boolean allowDups = random.nextBoolean();
final Set<String> seen = new HashSet<>();
if (VERBOSE) {
System.out.println("TEST: NUM_DOCS=" + NUM_DOCS + " allowDups=" + allowDups);
}
int numDocs = 0;
final List<BytesRef> docValues = new ArrayList<>();
// TODO: deletions
while (numDocs < NUM_DOCS) {
final String s;
if (random.nextBoolean()) {
s = TestUtil.randomSimpleString(random);
} else {
s = TestUtil.randomUnicodeString(random);
}
final BytesRef br = new BytesRef(s);
if (!allowDups) {
if (seen.contains(s)) {
continue;
}
seen.add(s);
}
if (VERBOSE) {
System.out.println(" " + numDocs + ": s=" + s);
}
final Document doc = new Document();
doc.add(new SortedDocValuesField("stringdv", br));
doc.add(new NumericDocValuesField("id", numDocs));
docValues.add(br);
writer.addDocument(doc);
numDocs++;
if (random.nextInt(40) == 17) {
// force flush
writer.getReader().close();
}
}
writer.forceMerge(1);
final DirectoryReader r = writer.getReader();
writer.close();
final LeafReader sr = getOnlyLeafReader(r);
final long END_TIME = System.nanoTime() + TimeUnit.NANOSECONDS.convert((TEST_NIGHTLY ? 30 : 1), TimeUnit.SECONDS);
final int NUM_THREADS = TestUtil.nextInt(random(), 1, 10);
Thread[] threads = new Thread[NUM_THREADS];
for(int thread=0;thread<NUM_THREADS;thread++) {
threads[thread] = new Thread() {
@Override
public void run() {
Random random = random();
final SortedDocValues stringDVDirect;
final NumericDocValues docIDToID;
try {
stringDVDirect = sr.getSortedDocValues("stringdv");
docIDToID = sr.getNumericDocValues("id");
assertNotNull(stringDVDirect);
} catch (IOException ioe) {
throw new RuntimeException(ioe);
}
while(System.nanoTime() < END_TIME) {
final SortedDocValues source;
source = stringDVDirect;
for(int iter=0;iter<100;iter++) {
final int docID = random.nextInt(sr.maxDoc());
BytesRef term = source.get(docID);
assertEquals(docValues.get((int) docIDToID.get(docID)), term);
}
}
}
};
threads[thread].start();
}
for(Thread thread : threads) {
thread.join();
}
r.close();
dir.close();
}
}

View File

@ -0,0 +1,497 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.uninverting;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.PrintStream;
import java.util.concurrent.CyclicBarrier;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.LegacyDoubleField;
import org.apache.lucene.document.LegacyFloatField;
import org.apache.lucene.document.LegacyIntField;
import org.apache.lucene.document.LegacyLongField;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LegacyNumericUtils;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
import org.apache.solr.index.SlowCompositeReaderWrapper;
import org.junit.AfterClass;
import org.junit.BeforeClass;
/** random assortment of tests against legacy numerics */
public class TestLegacyFieldCache extends LuceneTestCase {
private static LeafReader reader;
private static int NUM_DOCS;
private static Directory directory;
@BeforeClass
public static void beforeClass() throws Exception {
NUM_DOCS = atLeast(500);
directory = newDirectory();
RandomIndexWriter writer= new RandomIndexWriter(random(), directory, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy()));
long theLong = Long.MAX_VALUE;
double theDouble = Double.MAX_VALUE;
int theInt = Integer.MAX_VALUE;
float theFloat = Float.MAX_VALUE;
if (VERBOSE) {
System.out.println("TEST: setUp");
}
for (int i = 0; i < NUM_DOCS; i++){
Document doc = new Document();
doc.add(new LegacyLongField("theLong", theLong--, Field.Store.NO));
doc.add(new LegacyDoubleField("theDouble", theDouble--, Field.Store.NO));
doc.add(new LegacyIntField("theInt", theInt--, Field.Store.NO));
doc.add(new LegacyFloatField("theFloat", theFloat--, Field.Store.NO));
if (i%2 == 0) {
doc.add(new LegacyIntField("sparse", i, Field.Store.NO));
}
if (i%2 == 0) {
doc.add(new LegacyIntField("numInt", i, Field.Store.NO));
}
writer.addDocument(doc);
}
IndexReader r = writer.getReader();
reader = SlowCompositeReaderWrapper.wrap(r);
TestUtil.checkReader(reader);
writer.close();
}
@AfterClass
public static void afterClass() throws Exception {
reader.close();
reader = null;
directory.close();
directory = null;
}
public void testInfoStream() throws Exception {
try {
FieldCache cache = FieldCache.DEFAULT;
ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
cache.setInfoStream(new PrintStream(bos, false, IOUtils.UTF_8));
cache.getNumerics(reader, "theDouble", FieldCache.LEGACY_DOUBLE_PARSER, false);
cache.getNumerics(reader, "theDouble", new FieldCache.Parser() {
@Override
public TermsEnum termsEnum(Terms terms) throws IOException {
return LegacyNumericUtils.filterPrefixCodedLongs(terms.iterator());
}
@Override
public long parseValue(BytesRef term) {
int val = (int) LegacyNumericUtils.prefixCodedToLong(term);
if (val<0) val ^= 0x7fffffff;
return val;
}
}, false);
assertTrue(bos.toString(IOUtils.UTF_8).indexOf("WARNING") != -1);
} finally {
FieldCache.DEFAULT.setInfoStream(null);
FieldCache.DEFAULT.purgeAllCaches();
}
}
public void test() throws IOException {
FieldCache cache = FieldCache.DEFAULT;
NumericDocValues doubles = cache.getNumerics(reader, "theDouble", FieldCache.LEGACY_DOUBLE_PARSER, random().nextBoolean());
assertSame("Second request to cache return same array", doubles, cache.getNumerics(reader, "theDouble", FieldCache.LEGACY_DOUBLE_PARSER, random().nextBoolean()));
for (int i = 0; i < NUM_DOCS; i++) {
assertEquals(Double.doubleToLongBits(Double.MAX_VALUE - i), doubles.get(i));
}
NumericDocValues longs = cache.getNumerics(reader, "theLong", FieldCache.LEGACY_LONG_PARSER, random().nextBoolean());
assertSame("Second request to cache return same array", longs, cache.getNumerics(reader, "theLong", FieldCache.LEGACY_LONG_PARSER, random().nextBoolean()));
for (int i = 0; i < NUM_DOCS; i++) {
assertEquals(Long.MAX_VALUE - i, longs.get(i));
}
NumericDocValues ints = cache.getNumerics(reader, "theInt", FieldCache.LEGACY_INT_PARSER, random().nextBoolean());
assertSame("Second request to cache return same array", ints, cache.getNumerics(reader, "theInt", FieldCache.LEGACY_INT_PARSER, random().nextBoolean()));
for (int i = 0; i < NUM_DOCS; i++) {
assertEquals(Integer.MAX_VALUE - i, ints.get(i));
}
NumericDocValues floats = cache.getNumerics(reader, "theFloat", FieldCache.LEGACY_FLOAT_PARSER, random().nextBoolean());
assertSame("Second request to cache return same array", floats, cache.getNumerics(reader, "theFloat", FieldCache.LEGACY_FLOAT_PARSER, random().nextBoolean()));
for (int i = 0; i < NUM_DOCS; i++) {
assertEquals(Float.floatToIntBits(Float.MAX_VALUE - i), floats.get(i));
}
Bits docsWithField = cache.getDocsWithField(reader, "theLong", null);
assertSame("Second request to cache return same array", docsWithField, cache.getDocsWithField(reader, "theLong", null));
assertTrue("docsWithField(theLong) must be class Bits.MatchAllBits", docsWithField instanceof Bits.MatchAllBits);
assertTrue("docsWithField(theLong) Size: " + docsWithField.length() + " is not: " + NUM_DOCS, docsWithField.length() == NUM_DOCS);
for (int i = 0; i < docsWithField.length(); i++) {
assertTrue(docsWithField.get(i));
}
docsWithField = cache.getDocsWithField(reader, "sparse", null);
assertSame("Second request to cache return same array", docsWithField, cache.getDocsWithField(reader, "sparse", null));
assertFalse("docsWithField(sparse) must not be class Bits.MatchAllBits", docsWithField instanceof Bits.MatchAllBits);
assertTrue("docsWithField(sparse) Size: " + docsWithField.length() + " is not: " + NUM_DOCS, docsWithField.length() == NUM_DOCS);
for (int i = 0; i < docsWithField.length(); i++) {
assertEquals(i%2 == 0, docsWithField.get(i));
}
FieldCache.DEFAULT.purgeByCacheKey(reader.getCoreCacheKey());
}
public void testEmptyIndex() throws Exception {
Directory dir = newDirectory();
IndexWriter writer= new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())).setMaxBufferedDocs(500));
writer.close();
IndexReader r = DirectoryReader.open(dir);
LeafReader reader = SlowCompositeReaderWrapper.wrap(r);
TestUtil.checkReader(reader);
FieldCache.DEFAULT.getTerms(reader, "foobar", true);
FieldCache.DEFAULT.getTermsIndex(reader, "foobar");
FieldCache.DEFAULT.purgeByCacheKey(reader.getCoreCacheKey());
r.close();
dir.close();
}
public void testDocsWithField() throws Exception {
FieldCache cache = FieldCache.DEFAULT;
cache.purgeAllCaches();
assertEquals(0, cache.getCacheEntries().length);
cache.getNumerics(reader, "theDouble", FieldCache.LEGACY_DOUBLE_PARSER, true);
// The double[] takes one slots, and docsWithField should also
// have been populated:
assertEquals(2, cache.getCacheEntries().length);
Bits bits = cache.getDocsWithField(reader, "theDouble", FieldCache.LEGACY_DOUBLE_PARSER);
// No new entries should appear:
assertEquals(2, cache.getCacheEntries().length);
assertTrue(bits instanceof Bits.MatchAllBits);
NumericDocValues ints = cache.getNumerics(reader, "sparse", FieldCache.LEGACY_INT_PARSER, true);
assertEquals(4, cache.getCacheEntries().length);
Bits docsWithField = cache.getDocsWithField(reader, "sparse", FieldCache.LEGACY_INT_PARSER);
assertEquals(4, cache.getCacheEntries().length);
for (int i = 0; i < docsWithField.length(); i++) {
if (i%2 == 0) {
assertTrue(docsWithField.get(i));
assertEquals(i, ints.get(i));
} else {
assertFalse(docsWithField.get(i));
}
}
NumericDocValues numInts = cache.getNumerics(reader, "numInt", FieldCache.LEGACY_INT_PARSER, random().nextBoolean());
docsWithField = cache.getDocsWithField(reader, "numInt", FieldCache.LEGACY_INT_PARSER);
for (int i = 0; i < docsWithField.length(); i++) {
if (i%2 == 0) {
assertTrue(docsWithField.get(i));
assertEquals(i, numInts.get(i));
} else {
assertFalse(docsWithField.get(i));
}
}
}
public void testGetDocsWithFieldThreadSafety() throws Exception {
final FieldCache cache = FieldCache.DEFAULT;
cache.purgeAllCaches();
int NUM_THREADS = 3;
Thread[] threads = new Thread[NUM_THREADS];
final AtomicBoolean failed = new AtomicBoolean();
final AtomicInteger iters = new AtomicInteger();
final int NUM_ITER = 200 * RANDOM_MULTIPLIER;
final CyclicBarrier restart = new CyclicBarrier(NUM_THREADS,
new Runnable() {
@Override
public void run() {
cache.purgeAllCaches();
iters.incrementAndGet();
}
});
for(int threadIDX=0;threadIDX<NUM_THREADS;threadIDX++) {
threads[threadIDX] = new Thread() {
@Override
public void run() {
try {
while(!failed.get()) {
final int op = random().nextInt(3);
if (op == 0) {
// Purge all caches & resume, once all
// threads get here:
restart.await();
if (iters.get() >= NUM_ITER) {
break;
}
} else if (op == 1) {
Bits docsWithField = cache.getDocsWithField(reader, "sparse", null);
for (int i = 0; i < docsWithField.length(); i++) {
assertEquals(i%2 == 0, docsWithField.get(i));
}
} else {
NumericDocValues ints = cache.getNumerics(reader, "sparse", FieldCache.LEGACY_INT_PARSER, true);
Bits docsWithField = cache.getDocsWithField(reader, "sparse", null);
for (int i = 0; i < docsWithField.length(); i++) {
if (i%2 == 0) {
assertTrue(docsWithField.get(i));
assertEquals(i, ints.get(i));
} else {
assertFalse(docsWithField.get(i));
}
}
}
}
} catch (Throwable t) {
failed.set(true);
restart.reset();
throw new RuntimeException(t);
}
}
};
threads[threadIDX].start();
}
for(int threadIDX=0;threadIDX<NUM_THREADS;threadIDX++) {
threads[threadIDX].join();
}
assertFalse(failed.get());
}
public void testDocValuesIntegration() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig(null);
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
Document doc = new Document();
doc.add(new BinaryDocValuesField("binary", new BytesRef("binary value")));
doc.add(new SortedDocValuesField("sorted", new BytesRef("sorted value")));
doc.add(new NumericDocValuesField("numeric", 42));
doc.add(new SortedSetDocValuesField("sortedset", new BytesRef("sortedset value1")));
doc.add(new SortedSetDocValuesField("sortedset", new BytesRef("sortedset value2")));
iw.addDocument(doc);
DirectoryReader ir = iw.getReader();
iw.close();
LeafReader ar = getOnlyLeafReader(ir);
// Binary type: can be retrieved via getTerms()
expectThrows(IllegalStateException.class, () -> {
FieldCache.DEFAULT.getNumerics(ar, "binary", FieldCache.LEGACY_INT_PARSER, false);
});
// Sorted type: can be retrieved via getTerms(), getTermsIndex(), getDocTermOrds()
expectThrows(IllegalStateException.class, () -> {
FieldCache.DEFAULT.getNumerics(ar, "sorted", FieldCache.LEGACY_INT_PARSER, false);
});
// Numeric type: can be retrieved via getInts() and so on
NumericDocValues numeric = FieldCache.DEFAULT.getNumerics(ar, "numeric", FieldCache.LEGACY_INT_PARSER, false);
assertEquals(42, numeric.get(0));
// SortedSet type: can be retrieved via getDocTermOrds()
expectThrows(IllegalStateException.class, () -> {
FieldCache.DEFAULT.getNumerics(ar, "sortedset", FieldCache.LEGACY_INT_PARSER, false);
});
ir.close();
dir.close();
}
public void testNonexistantFields() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
Document doc = new Document();
iw.addDocument(doc);
DirectoryReader ir = iw.getReader();
iw.close();
LeafReader ar = getOnlyLeafReader(ir);
final FieldCache cache = FieldCache.DEFAULT;
cache.purgeAllCaches();
assertEquals(0, cache.getCacheEntries().length);
NumericDocValues ints = cache.getNumerics(ar, "bogusints", FieldCache.LEGACY_INT_PARSER, true);
assertEquals(0, ints.get(0));
NumericDocValues longs = cache.getNumerics(ar, "boguslongs", FieldCache.LEGACY_LONG_PARSER, true);
assertEquals(0, longs.get(0));
NumericDocValues floats = cache.getNumerics(ar, "bogusfloats", FieldCache.LEGACY_FLOAT_PARSER, true);
assertEquals(0, floats.get(0));
NumericDocValues doubles = cache.getNumerics(ar, "bogusdoubles", FieldCache.LEGACY_DOUBLE_PARSER, true);
assertEquals(0, doubles.get(0));
// check that we cached nothing
assertEquals(0, cache.getCacheEntries().length);
ir.close();
dir.close();
}
public void testNonIndexedFields() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
Document doc = new Document();
doc.add(new StoredField("bogusbytes", "bogus"));
doc.add(new StoredField("bogusshorts", "bogus"));
doc.add(new StoredField("bogusints", "bogus"));
doc.add(new StoredField("boguslongs", "bogus"));
doc.add(new StoredField("bogusfloats", "bogus"));
doc.add(new StoredField("bogusdoubles", "bogus"));
doc.add(new StoredField("bogusbits", "bogus"));
iw.addDocument(doc);
DirectoryReader ir = iw.getReader();
iw.close();
LeafReader ar = getOnlyLeafReader(ir);
final FieldCache cache = FieldCache.DEFAULT;
cache.purgeAllCaches();
assertEquals(0, cache.getCacheEntries().length);
NumericDocValues ints = cache.getNumerics(ar, "bogusints", FieldCache.LEGACY_INT_PARSER, true);
assertEquals(0, ints.get(0));
NumericDocValues longs = cache.getNumerics(ar, "boguslongs", FieldCache.LEGACY_LONG_PARSER, true);
assertEquals(0, longs.get(0));
NumericDocValues floats = cache.getNumerics(ar, "bogusfloats", FieldCache.LEGACY_FLOAT_PARSER, true);
assertEquals(0, floats.get(0));
NumericDocValues doubles = cache.getNumerics(ar, "bogusdoubles", FieldCache.LEGACY_DOUBLE_PARSER, true);
assertEquals(0, doubles.get(0));
// check that we cached nothing
assertEquals(0, cache.getCacheEntries().length);
ir.close();
dir.close();
}
// Make sure that the use of GrowableWriter doesn't prevent from using the full long range
public void testLongFieldCache() throws IOException {
Directory dir = newDirectory();
IndexWriterConfig cfg = newIndexWriterConfig(new MockAnalyzer(random()));
cfg.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, cfg);
Document doc = new Document();
LegacyLongField field = new LegacyLongField("f", 0L, Store.YES);
doc.add(field);
final long[] values = new long[TestUtil.nextInt(random(), 1, 10)];
for (int i = 0; i < values.length; ++i) {
final long v;
switch (random().nextInt(10)) {
case 0:
v = Long.MIN_VALUE;
break;
case 1:
v = 0;
break;
case 2:
v = Long.MAX_VALUE;
break;
default:
v = TestUtil.nextLong(random(), -10, 10);
break;
}
values[i] = v;
if (v == 0 && random().nextBoolean()) {
// missing
iw.addDocument(new Document());
} else {
field.setLongValue(v);
iw.addDocument(doc);
}
}
iw.forceMerge(1);
final DirectoryReader reader = iw.getReader();
final NumericDocValues longs = FieldCache.DEFAULT.getNumerics(getOnlyLeafReader(reader), "f", FieldCache.LEGACY_LONG_PARSER, false);
for (int i = 0; i < values.length; ++i) {
assertEquals(values[i], longs.get(i));
}
reader.close();
iw.close();
dir.close();
}
// Make sure that the use of GrowableWriter doesn't prevent from using the full int range
public void testIntFieldCache() throws IOException {
Directory dir = newDirectory();
IndexWriterConfig cfg = newIndexWriterConfig(new MockAnalyzer(random()));
cfg.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, cfg);
Document doc = new Document();
LegacyIntField field = new LegacyIntField("f", 0, Store.YES);
doc.add(field);
final int[] values = new int[TestUtil.nextInt(random(), 1, 10)];
for (int i = 0; i < values.length; ++i) {
final int v;
switch (random().nextInt(10)) {
case 0:
v = Integer.MIN_VALUE;
break;
case 1:
v = 0;
break;
case 2:
v = Integer.MAX_VALUE;
break;
default:
v = TestUtil.nextInt(random(), -10, 10);
break;
}
values[i] = v;
if (v == 0 && random().nextBoolean()) {
// missing
iw.addDocument(new Document());
} else {
field.setIntValue(v);
iw.addDocument(doc);
}
}
iw.forceMerge(1);
final DirectoryReader reader = iw.getReader();
final NumericDocValues ints = FieldCache.DEFAULT.getNumerics(getOnlyLeafReader(reader), "f", FieldCache.LEGACY_INT_PARSER, false);
for (int i = 0; i < values.length; ++i) {
assertEquals(values[i], ints.get(i));
}
reader.close();
iw.close();
dir.close();
}
}

View File

@ -0,0 +1,156 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.uninverting;
import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.LegacyIntField;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.LegacyNumericRangeQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
import org.apache.solr.uninverting.UninvertingReader.Type;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
public class TestNumericTerms32 extends LuceneTestCase {
// distance of entries
private static int distance;
// shift the starting of the values to the left, to also have negative values:
private static final int startOffset = - 1 << 15;
// number of docs to generate for testing
private static int noDocs;
private static Directory directory = null;
private static IndexReader reader = null;
private static IndexSearcher searcher = null;
@BeforeClass
public static void beforeClass() throws Exception {
noDocs = atLeast(4096);
distance = (1 << 30) / noDocs;
directory = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), directory,
newIndexWriterConfig(new MockAnalyzer(random()))
.setMaxBufferedDocs(TestUtil.nextInt(random(), 100, 1000))
.setMergePolicy(newLogMergePolicy()));
final FieldType storedInt = new FieldType(LegacyIntField.TYPE_NOT_STORED);
storedInt.setStored(true);
storedInt.freeze();
final FieldType storedInt8 = new FieldType(storedInt);
storedInt8.setNumericPrecisionStep(8);
final FieldType storedInt4 = new FieldType(storedInt);
storedInt4.setNumericPrecisionStep(4);
final FieldType storedInt2 = new FieldType(storedInt);
storedInt2.setNumericPrecisionStep(2);
LegacyIntField
field8 = new LegacyIntField("field8", 0, storedInt8),
field4 = new LegacyIntField("field4", 0, storedInt4),
field2 = new LegacyIntField("field2", 0, storedInt2);
Document doc = new Document();
// add fields, that have a distance to test general functionality
doc.add(field8); doc.add(field4); doc.add(field2);
// Add a series of noDocs docs with increasing int values
for (int l=0; l<noDocs; l++) {
int val=distance*l+startOffset;
field8.setIntValue(val);
field4.setIntValue(val);
field2.setIntValue(val);
val=l-(noDocs/2);
writer.addDocument(doc);
}
Map<String,Type> map = new HashMap<>();
map.put("field2", Type.LEGACY_INTEGER);
map.put("field4", Type.LEGACY_INTEGER);
map.put("field8", Type.LEGACY_INTEGER);
reader = UninvertingReader.wrap(writer.getReader(), map);
searcher=newSearcher(reader);
writer.close();
}
@AfterClass
public static void afterClass() throws Exception {
searcher = null;
TestUtil.checkReader(reader);
reader.close();
reader = null;
directory.close();
directory = null;
}
private void testSorting(int precisionStep) throws Exception {
String field="field"+precisionStep;
// 10 random tests, the index order is ascending,
// so using a reverse sort field should retun descending documents
int num = TestUtil.nextInt(random(), 10, 20);
for (int i = 0; i < num; i++) {
int lower=(int)(random().nextDouble()*noDocs*distance)+startOffset;
int upper=(int)(random().nextDouble()*noDocs*distance)+startOffset;
if (lower>upper) {
int a=lower; lower=upper; upper=a;
}
Query tq= LegacyNumericRangeQuery.newIntRange(field, precisionStep, lower, upper, true, true);
TopDocs topDocs = searcher.search(tq, noDocs, new Sort(new SortField(field, SortField.Type.INT, true)));
if (topDocs.totalHits==0) continue;
ScoreDoc[] sd = topDocs.scoreDocs;
assertNotNull(sd);
int last = searcher.doc(sd[0].doc).getField(field).numericValue().intValue();
for (int j=1; j<sd.length; j++) {
int act = searcher.doc(sd[j].doc).getField(field).numericValue().intValue();
assertTrue("Docs should be sorted backwards", last>act );
last=act;
}
}
}
@Test
public void testSorting_8bit() throws Exception {
testSorting(8);
}
@Test
public void testSorting_4bit() throws Exception {
testSorting(4);
}
@Test
public void testSorting_2bit() throws Exception {
testSorting(2);
}
}

View File

@ -0,0 +1,166 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.uninverting;
import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.LegacyLongField;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.LegacyNumericRangeQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
import org.apache.solr.uninverting.UninvertingReader.Type;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
public class TestNumericTerms64 extends LuceneTestCase {
// distance of entries
private static long distance;
// shift the starting of the values to the left, to also have negative values:
private static final long startOffset = - 1L << 31;
// number of docs to generate for testing
private static int noDocs;
private static Directory directory = null;
private static IndexReader reader = null;
private static IndexSearcher searcher = null;
@BeforeClass
public static void beforeClass() throws Exception {
noDocs = atLeast(4096);
distance = (1L << 60) / noDocs;
directory = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), directory,
newIndexWriterConfig(new MockAnalyzer(random()))
.setMaxBufferedDocs(TestUtil.nextInt(random(), 100, 1000))
.setMergePolicy(newLogMergePolicy()));
final FieldType storedLong = new FieldType(LegacyLongField.TYPE_NOT_STORED);
storedLong.setStored(true);
storedLong.freeze();
final FieldType storedLong8 = new FieldType(storedLong);
storedLong8.setNumericPrecisionStep(8);
final FieldType storedLong4 = new FieldType(storedLong);
storedLong4.setNumericPrecisionStep(4);
final FieldType storedLong6 = new FieldType(storedLong);
storedLong6.setNumericPrecisionStep(6);
final FieldType storedLong2 = new FieldType(storedLong);
storedLong2.setNumericPrecisionStep(2);
LegacyLongField
field8 = new LegacyLongField("field8", 0L, storedLong8),
field6 = new LegacyLongField("field6", 0L, storedLong6),
field4 = new LegacyLongField("field4", 0L, storedLong4),
field2 = new LegacyLongField("field2", 0L, storedLong2);
Document doc = new Document();
// add fields, that have a distance to test general functionality
doc.add(field8); doc.add(field6); doc.add(field4); doc.add(field2);
// Add a series of noDocs docs with increasing long values, by updating the fields
for (int l=0; l<noDocs; l++) {
long val=distance*l+startOffset;
field8.setLongValue(val);
field6.setLongValue(val);
field4.setLongValue(val);
field2.setLongValue(val);
val=l-(noDocs/2);
writer.addDocument(doc);
}
Map<String,Type> map = new HashMap<>();
map.put("field2", Type.LEGACY_LONG);
map.put("field4", Type.LEGACY_LONG);
map.put("field6", Type.LEGACY_LONG);
map.put("field8", Type.LEGACY_LONG);
reader = UninvertingReader.wrap(writer.getReader(), map);
searcher=newSearcher(reader);
writer.close();
}
@AfterClass
public static void afterClass() throws Exception {
searcher = null;
TestUtil.checkReader(reader);
reader.close();
reader = null;
directory.close();
directory = null;
}
private void testSorting(int precisionStep) throws Exception {
String field="field"+precisionStep;
// 10 random tests, the index order is ascending,
// so using a reverse sort field should retun descending documents
int num = TestUtil.nextInt(random(), 10, 20);
for (int i = 0; i < num; i++) {
long lower=(long)(random().nextDouble()*noDocs*distance)+startOffset;
long upper=(long)(random().nextDouble()*noDocs*distance)+startOffset;
if (lower>upper) {
long a=lower; lower=upper; upper=a;
}
Query tq= LegacyNumericRangeQuery.newLongRange(field, precisionStep, lower, upper, true, true);
TopDocs topDocs = searcher.search(tq, noDocs, new Sort(new SortField(field, SortField.Type.LONG, true)));
if (topDocs.totalHits==0) continue;
ScoreDoc[] sd = topDocs.scoreDocs;
assertNotNull(sd);
long last=searcher.doc(sd[0].doc).getField(field).numericValue().longValue();
for (int j=1; j<sd.length; j++) {
long act=searcher.doc(sd[j].doc).getField(field).numericValue().longValue();
assertTrue("Docs should be sorted backwards", last>act );
last=act;
}
}
}
@Test
public void testSorting_8bit() throws Exception {
testSorting(8);
}
@Test
public void testSorting_6bit() throws Exception {
testSorting(6);
}
@Test
public void testSorting_4bit() throws Exception {
testSorting(4);
}
@Test
public void testSorting_2bit() throws Exception {
testSorting(2);
}
}

View File

@ -0,0 +1,395 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.uninverting;
import java.io.IOException;
import java.util.Collections;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.IntPoint;
import org.apache.lucene.document.LegacyIntField;
import org.apache.lucene.document.LegacyLongField;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LegacyNumericUtils;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
import org.apache.solr.index.SlowCompositeReaderWrapper;
import org.apache.solr.uninverting.UninvertingReader.Type;
public class TestUninvertingReader extends LuceneTestCase {
public void testSortedSetInteger() throws IOException {
Directory dir = newDirectory();
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
Document doc = new Document();
doc.add(new LegacyIntField("foo", 5, Field.Store.NO));
iw.addDocument(doc);
doc = new Document();
doc.add(new LegacyIntField("foo", 5, Field.Store.NO));
doc.add(new LegacyIntField("foo", -3, Field.Store.NO));
iw.addDocument(doc);
iw.forceMerge(1);
iw.close();
DirectoryReader ir = UninvertingReader.wrap(DirectoryReader.open(dir),
Collections.singletonMap("foo", Type.SORTED_SET_INTEGER));
LeafReader ar = ir.leaves().get(0).reader();
SortedSetDocValues v = ar.getSortedSetDocValues("foo");
assertEquals(2, v.getValueCount());
v.setDocument(0);
assertEquals(1, v.nextOrd());
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
v.setDocument(1);
assertEquals(0, v.nextOrd());
assertEquals(1, v.nextOrd());
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
BytesRef value = v.lookupOrd(0);
assertEquals(-3, LegacyNumericUtils.prefixCodedToInt(value));
value = v.lookupOrd(1);
assertEquals(5, LegacyNumericUtils.prefixCodedToInt(value));
TestUtil.checkReader(ir);
ir.close();
dir.close();
}
public void testSortedSetFloat() throws IOException {
Directory dir = newDirectory();
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
Document doc = new Document();
doc.add(new LegacyIntField("foo", Float.floatToRawIntBits(5f), Field.Store.NO));
iw.addDocument(doc);
doc = new Document();
doc.add(new LegacyIntField("foo", Float.floatToRawIntBits(5f), Field.Store.NO));
doc.add(new LegacyIntField("foo", Float.floatToRawIntBits(-3f), Field.Store.NO));
iw.addDocument(doc);
iw.forceMerge(1);
iw.close();
DirectoryReader ir = UninvertingReader.wrap(DirectoryReader.open(dir),
Collections.singletonMap("foo", Type.SORTED_SET_FLOAT));
LeafReader ar = ir.leaves().get(0).reader();
SortedSetDocValues v = ar.getSortedSetDocValues("foo");
assertEquals(2, v.getValueCount());
v.setDocument(0);
assertEquals(1, v.nextOrd());
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
v.setDocument(1);
assertEquals(0, v.nextOrd());
assertEquals(1, v.nextOrd());
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
BytesRef value = v.lookupOrd(0);
assertEquals(Float.floatToRawIntBits(-3f), LegacyNumericUtils.prefixCodedToInt(value));
value = v.lookupOrd(1);
assertEquals(Float.floatToRawIntBits(5f), LegacyNumericUtils.prefixCodedToInt(value));
TestUtil.checkReader(ir);
ir.close();
dir.close();
}
public void testSortedSetLong() throws IOException {
Directory dir = newDirectory();
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
Document doc = new Document();
doc.add(new LegacyLongField("foo", 5, Field.Store.NO));
iw.addDocument(doc);
doc = new Document();
doc.add(new LegacyLongField("foo", 5, Field.Store.NO));
doc.add(new LegacyLongField("foo", -3, Field.Store.NO));
iw.addDocument(doc);
iw.forceMerge(1);
iw.close();
DirectoryReader ir = UninvertingReader.wrap(DirectoryReader.open(dir),
Collections.singletonMap("foo", Type.SORTED_SET_LONG));
LeafReader ar = ir.leaves().get(0).reader();
SortedSetDocValues v = ar.getSortedSetDocValues("foo");
assertEquals(2, v.getValueCount());
v.setDocument(0);
assertEquals(1, v.nextOrd());
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
v.setDocument(1);
assertEquals(0, v.nextOrd());
assertEquals(1, v.nextOrd());
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
BytesRef value = v.lookupOrd(0);
assertEquals(-3, LegacyNumericUtils.prefixCodedToLong(value));
value = v.lookupOrd(1);
assertEquals(5, LegacyNumericUtils.prefixCodedToLong(value));
TestUtil.checkReader(ir);
ir.close();
dir.close();
}
public void testSortedSetDouble() throws IOException {
Directory dir = newDirectory();
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
Document doc = new Document();
doc.add(new LegacyLongField("foo", Double.doubleToRawLongBits(5d), Field.Store.NO));
iw.addDocument(doc);
doc = new Document();
doc.add(new LegacyLongField("foo", Double.doubleToRawLongBits(5d), Field.Store.NO));
doc.add(new LegacyLongField("foo", Double.doubleToRawLongBits(-3d), Field.Store.NO));
iw.addDocument(doc);
iw.forceMerge(1);
iw.close();
DirectoryReader ir = UninvertingReader.wrap(DirectoryReader.open(dir),
Collections.singletonMap("foo", Type.SORTED_SET_DOUBLE));
LeafReader ar = ir.leaves().get(0).reader();
SortedSetDocValues v = ar.getSortedSetDocValues("foo");
assertEquals(2, v.getValueCount());
v.setDocument(0);
assertEquals(1, v.nextOrd());
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
v.setDocument(1);
assertEquals(0, v.nextOrd());
assertEquals(1, v.nextOrd());
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
BytesRef value = v.lookupOrd(0);
assertEquals(Double.doubleToRawLongBits(-3d), LegacyNumericUtils.prefixCodedToLong(value));
value = v.lookupOrd(1);
assertEquals(Double.doubleToRawLongBits(5d), LegacyNumericUtils.prefixCodedToLong(value));
TestUtil.checkReader(ir);
ir.close();
dir.close();
}
/** Tests {@link Type#SORTED_SET_INTEGER} using Integer based fields, with and w/o precision steps */
public void testSortedSetIntegerManyValues() throws IOException {
final Directory dir = newDirectory();
final IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
final FieldType NO_TRIE_TYPE = new FieldType(LegacyIntField.TYPE_NOT_STORED);
NO_TRIE_TYPE.setNumericPrecisionStep(Integer.MAX_VALUE);
final Map<String,Type> UNINVERT_MAP = new LinkedHashMap<String,Type>();
UNINVERT_MAP.put("notrie_single", Type.SORTED_SET_INTEGER);
UNINVERT_MAP.put("notrie_multi", Type.SORTED_SET_INTEGER);
UNINVERT_MAP.put("trie_single", Type.SORTED_SET_INTEGER);
UNINVERT_MAP.put("trie_multi", Type.SORTED_SET_INTEGER);
final Set<String> MULTI_VALUES = new LinkedHashSet<String>();
MULTI_VALUES.add("trie_multi");
MULTI_VALUES.add("notrie_multi");
final int NUM_DOCS = TestUtil.nextInt(random(), 200, 1500);
final int MIN = TestUtil.nextInt(random(), 10, 100);
final int MAX = MIN + TestUtil.nextInt(random(), 10, 100);
final long EXPECTED_VALSET_SIZE = 1 + MAX - MIN;
{ // (at least) one doc should have every value, so that at least one segment has every value
final Document doc = new Document();
for (int i = MIN; i <= MAX; i++) {
doc.add(new LegacyIntField("trie_multi", i, Field.Store.NO));
doc.add(new LegacyIntField("notrie_multi", i, NO_TRIE_TYPE));
}
iw.addDocument(doc);
}
// now add some more random docs (note: starting at i=1 because of previously added doc)
for (int i = 1; i < NUM_DOCS; i++) {
final Document doc = new Document();
if (0 != TestUtil.nextInt(random(), 0, 9)) {
int val = TestUtil.nextInt(random(), MIN, MAX);
doc.add(new LegacyIntField("trie_single", val, Field.Store.NO));
doc.add(new LegacyIntField("notrie_single", val, NO_TRIE_TYPE));
}
if (0 != TestUtil.nextInt(random(), 0, 9)) {
int numMulti = atLeast(1);
while (0 < numMulti--) {
int val = TestUtil.nextInt(random(), MIN, MAX);
doc.add(new LegacyIntField("trie_multi", val, Field.Store.NO));
doc.add(new LegacyIntField("notrie_multi", val, NO_TRIE_TYPE));
}
}
iw.addDocument(doc);
}
iw.close();
final DirectoryReader ir = UninvertingReader.wrap(DirectoryReader.open(dir), UNINVERT_MAP);
TestUtil.checkReader(ir);
final int NUM_LEAVES = ir.leaves().size();
// check the leaves: no more then total set size
for (LeafReaderContext rc : ir.leaves()) {
final LeafReader ar = rc.reader();
for (String f : UNINVERT_MAP.keySet()) {
final SortedSetDocValues v = DocValues.getSortedSet(ar, f);
final long valSetSize = v.getValueCount();
assertTrue(f + ": Expected no more then " + EXPECTED_VALSET_SIZE + " values per segment, got " +
valSetSize + " from: " + ar.toString(),
valSetSize <= EXPECTED_VALSET_SIZE);
if (1 == NUM_LEAVES && MULTI_VALUES.contains(f)) {
// tighter check on multi fields in single segment index since we know one doc has all of them
assertEquals(f + ": Single segment LeafReader's value set should have had exactly expected size",
EXPECTED_VALSET_SIZE, valSetSize);
}
}
}
// check the composite of all leaves: exact expectation of set size
final LeafReader composite = SlowCompositeReaderWrapper.wrap(ir);
TestUtil.checkReader(composite);
for (String f : MULTI_VALUES) {
final SortedSetDocValues v = composite.getSortedSetDocValues(f);
final long valSetSize = v.getValueCount();
assertEquals(f + ": Composite reader value set should have had exactly expected size",
EXPECTED_VALSET_SIZE, valSetSize);
}
ir.close();
dir.close();
}
public void testSortedSetEmptyIndex() throws IOException {
final Directory dir = newDirectory();
final IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
iw.close();
final Map<String,Type> UNINVERT_MAP = new LinkedHashMap<String,Type>();
for (Type t : EnumSet.allOf(Type.class)) {
UNINVERT_MAP.put(t.name(), t);
}
final DirectoryReader ir = UninvertingReader.wrap(DirectoryReader.open(dir), UNINVERT_MAP);
TestUtil.checkReader(ir);
final LeafReader composite = SlowCompositeReaderWrapper.wrap(ir);
TestUtil.checkReader(composite);
for (String f : UNINVERT_MAP.keySet()) {
// check the leaves
// (normally there are none for an empty index, so this is really just future
// proofing in case that changes for some reason)
for (LeafReaderContext rc : ir.leaves()) {
final LeafReader ar = rc.reader();
assertNull(f + ": Expected no doc values from empty index (leaf)",
ar.getSortedSetDocValues(f));
}
// check the composite
assertNull(f + ": Expected no doc values from empty index (composite)",
composite.getSortedSetDocValues(f));
}
ir.close();
dir.close();
}
public void testFieldInfos() throws IOException {
Directory dir = newDirectory();
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
Document doc = new Document();
BytesRef idBytes = new BytesRef("id");
doc.add(new StringField("id", idBytes, Store.YES));
doc.add(new LegacyIntField("int", 5, Store.YES));
doc.add(new NumericDocValuesField("dv", 5));
doc.add(new IntPoint("dint", 5));
doc.add(new StoredField("stored", 5)); // not indexed
iw.addDocument(doc);
iw.forceMerge(1);
iw.close();
Map<String, Type> uninvertingMap = new HashMap<>();
uninvertingMap.put("int", Type.LEGACY_INTEGER);
uninvertingMap.put("dv", Type.LEGACY_INTEGER);
uninvertingMap.put("dint", Type.INTEGER_POINT);
DirectoryReader ir = UninvertingReader.wrap(DirectoryReader.open(dir),
uninvertingMap);
LeafReader leafReader = ir.leaves().get(0).reader();
FieldInfo intFInfo = leafReader.getFieldInfos().fieldInfo("int");
assertEquals(DocValuesType.NUMERIC, intFInfo.getDocValuesType());
assertEquals(0, intFInfo.getPointDimensionCount());
assertEquals(0, intFInfo.getPointNumBytes());
FieldInfo dintFInfo = leafReader.getFieldInfos().fieldInfo("dint");
assertEquals(DocValuesType.NUMERIC, dintFInfo.getDocValuesType());
assertEquals(1, dintFInfo.getPointDimensionCount());
assertEquals(4, dintFInfo.getPointNumBytes());
FieldInfo dvFInfo = leafReader.getFieldInfos().fieldInfo("dv");
assertEquals(DocValuesType.NUMERIC, dvFInfo.getDocValuesType());
FieldInfo storedFInfo = leafReader.getFieldInfos().fieldInfo("stored");
assertEquals(DocValuesType.NONE, storedFInfo.getDocValuesType());
TestUtil.checkReader(ir);
ir.close();
dir.close();
}
}

View File

@ -22,25 +22,24 @@ import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SlowCompositeReaderWrapper;
import org.apache.lucene.search.similarities.ClassicSimilarity; import org.apache.lucene.search.similarities.ClassicSimilarity;
import org.apache.lucene.util.TestUtil; import org.apache.lucene.util.TestUtil;
import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrInputDocument; import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.SolrInputField; import org.apache.solr.common.SolrInputField;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.core.SolrCore; import org.apache.solr.core.SolrCore;
import org.apache.solr.index.SlowCompositeReaderWrapper;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.ResultContext; import org.apache.solr.response.ResultContext;
import org.apache.solr.search.SolrIndexSearcher; import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.search.DocList;
import org.apache.solr.schema.CopyField; import org.apache.solr.schema.CopyField;
import org.apache.solr.schema.FieldType; import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.IndexSchema; import org.apache.solr.schema.IndexSchema;
import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.search.DocList;
import org.apache.solr.response.SolrQueryResponse; import org.apache.solr.search.SolrIndexSearcher;
import org.junit.BeforeClass; import org.junit.BeforeClass;
import org.junit.Test; import org.junit.Test;