mirror of https://github.com/apache/lucene.git
SOLR-9160: Sync 6x and 7.0 move of UninvertingReader, SlowCompositeReaderWrapper for Solr (LUCENE-7283)
This commit is contained in:
parent
aec3654fb8
commit
5525f42928
|
@ -661,7 +661,7 @@ public class MultiDocValues {
|
||||||
public final OrdinalMap mapping;
|
public final OrdinalMap mapping;
|
||||||
|
|
||||||
/** Creates a new MultiSortedDocValues over <code>values</code> */
|
/** Creates a new MultiSortedDocValues over <code>values</code> */
|
||||||
MultiSortedDocValues(SortedDocValues values[], int docStarts[], OrdinalMap mapping) throws IOException {
|
public MultiSortedDocValues(SortedDocValues values[], int docStarts[], OrdinalMap mapping) throws IOException {
|
||||||
assert docStarts.length == values.length + 1;
|
assert docStarts.length == values.length + 1;
|
||||||
this.values = values;
|
this.values = values;
|
||||||
this.docStarts = docStarts;
|
this.docStarts = docStarts;
|
||||||
|
|
|
@ -303,6 +303,9 @@ Other Changes
|
||||||
|
|
||||||
* SOLR-9110: Move JoinFromCollection- SubQueryTransformer- BlockJoinFacet- Distrib Tests to SolrCloudTestCase (Mikhail Khludnev)
|
* SOLR-9110: Move JoinFromCollection- SubQueryTransformer- BlockJoinFacet- Distrib Tests to SolrCloudTestCase (Mikhail Khludnev)
|
||||||
|
|
||||||
|
* SOLR-9160: Sync 6x and 7.0 move of UninvertingReader, SlowCompositeReaderWrapper for Solr (LUCENE-7283)
|
||||||
|
(yonik)
|
||||||
|
|
||||||
================== 6.0.1 ==================
|
================== 6.0.1 ==================
|
||||||
(No Changes)
|
(No Changes)
|
||||||
|
|
||||||
|
|
|
@ -36,12 +36,12 @@ import org.apache.lucene.search.DocValuesRangeQuery;
|
||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
import org.apache.lucene.search.SortField;
|
import org.apache.lucene.search.SortField;
|
||||||
import org.apache.lucene.search.TermRangeQuery;
|
import org.apache.lucene.search.TermRangeQuery;
|
||||||
import org.apache.lucene.uninverting.UninvertingReader.Type;
|
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.solr.common.SolrException;
|
|
||||||
import org.apache.solr.common.SolrException.ErrorCode;
|
import org.apache.solr.common.SolrException.ErrorCode;
|
||||||
|
import org.apache.solr.common.SolrException;
|
||||||
import org.apache.solr.response.TextResponseWriter;
|
import org.apache.solr.response.TextResponseWriter;
|
||||||
import org.apache.solr.search.QParser;
|
import org.apache.solr.search.QParser;
|
||||||
|
import org.apache.solr.uninverting.UninvertingReader.Type;
|
||||||
|
|
||||||
import com.ibm.icu.text.Collator;
|
import com.ibm.icu.text.Collator;
|
||||||
import com.ibm.icu.text.RuleBasedCollator;
|
import com.ibm.icu.text.RuleBasedCollator;
|
||||||
|
|
|
@ -26,16 +26,6 @@ import java.util.Iterator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
import com.carrotsearch.hppc.IntHashSet;
|
|
||||||
import com.carrotsearch.hppc.IntObjectHashMap;
|
|
||||||
import com.carrotsearch.hppc.LongHashSet;
|
|
||||||
import com.carrotsearch.hppc.LongObjectHashMap;
|
|
||||||
import com.carrotsearch.hppc.LongObjectMap;
|
|
||||||
import com.carrotsearch.hppc.cursors.IntObjectCursor;
|
|
||||||
import com.carrotsearch.hppc.cursors.LongCursor;
|
|
||||||
import com.carrotsearch.hppc.cursors.LongObjectCursor;
|
|
||||||
import com.carrotsearch.hppc.cursors.ObjectCursor;
|
|
||||||
|
|
||||||
import org.apache.lucene.index.DocValues;
|
import org.apache.lucene.index.DocValues;
|
||||||
import org.apache.lucene.index.DocValuesType;
|
import org.apache.lucene.index.DocValuesType;
|
||||||
import org.apache.lucene.index.FieldInfo;
|
import org.apache.lucene.index.FieldInfo;
|
||||||
|
@ -53,7 +43,6 @@ import org.apache.lucene.search.Collector;
|
||||||
import org.apache.lucene.search.DocIdSetIterator;
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
import org.apache.lucene.search.LeafCollector;
|
import org.apache.lucene.search.LeafCollector;
|
||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
import org.apache.solr.search.QueryWrapperFilter;
|
|
||||||
import org.apache.lucene.search.ScoreDoc;
|
import org.apache.lucene.search.ScoreDoc;
|
||||||
import org.apache.lucene.search.Scorer;
|
import org.apache.lucene.search.Scorer;
|
||||||
import org.apache.lucene.search.Sort;
|
import org.apache.lucene.search.Sort;
|
||||||
|
@ -61,7 +50,6 @@ import org.apache.lucene.search.TopDocs;
|
||||||
import org.apache.lucene.search.TopDocsCollector;
|
import org.apache.lucene.search.TopDocsCollector;
|
||||||
import org.apache.lucene.search.TopFieldCollector;
|
import org.apache.lucene.search.TopFieldCollector;
|
||||||
import org.apache.lucene.search.TopScoreDocCollector;
|
import org.apache.lucene.search.TopScoreDocCollector;
|
||||||
import org.apache.lucene.uninverting.UninvertingReader;
|
|
||||||
import org.apache.lucene.util.BitSetIterator;
|
import org.apache.lucene.util.BitSetIterator;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.BytesRefBuilder;
|
import org.apache.lucene.util.BytesRefBuilder;
|
||||||
|
@ -87,12 +75,24 @@ import org.apache.solr.search.DocIterator;
|
||||||
import org.apache.solr.search.DocList;
|
import org.apache.solr.search.DocList;
|
||||||
import org.apache.solr.search.DocSlice;
|
import org.apache.solr.search.DocSlice;
|
||||||
import org.apache.solr.search.QParser;
|
import org.apache.solr.search.QParser;
|
||||||
|
import org.apache.solr.search.QueryWrapperFilter;
|
||||||
import org.apache.solr.search.SolrConstantScoreQuery;
|
import org.apache.solr.search.SolrConstantScoreQuery;
|
||||||
import org.apache.solr.search.SolrIndexSearcher;
|
import org.apache.solr.search.SolrIndexSearcher;
|
||||||
import org.apache.solr.search.SortSpecParsing;
|
import org.apache.solr.search.SortSpecParsing;
|
||||||
|
import org.apache.solr.uninverting.UninvertingReader;
|
||||||
import org.apache.solr.util.plugin.PluginInfoInitialized;
|
import org.apache.solr.util.plugin.PluginInfoInitialized;
|
||||||
import org.apache.solr.util.plugin.SolrCoreAware;
|
import org.apache.solr.util.plugin.SolrCoreAware;
|
||||||
|
|
||||||
|
import com.carrotsearch.hppc.IntHashSet;
|
||||||
|
import com.carrotsearch.hppc.IntObjectHashMap;
|
||||||
|
import com.carrotsearch.hppc.LongHashSet;
|
||||||
|
import com.carrotsearch.hppc.LongObjectHashMap;
|
||||||
|
import com.carrotsearch.hppc.LongObjectMap;
|
||||||
|
import com.carrotsearch.hppc.cursors.IntObjectCursor;
|
||||||
|
import com.carrotsearch.hppc.cursors.LongCursor;
|
||||||
|
import com.carrotsearch.hppc.cursors.LongObjectCursor;
|
||||||
|
import com.carrotsearch.hppc.cursors.ObjectCursor;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The ExpandComponent is designed to work with the CollapsingPostFilter.
|
* The ExpandComponent is designed to work with the CollapsingPostFilter.
|
||||||
* The CollapsingPostFilter collapses a result set on a field.
|
* The CollapsingPostFilter collapses a result set on a field.
|
||||||
|
|
|
@ -0,0 +1,296 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.solr.index;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.BinaryDocValues;
|
||||||
|
import org.apache.lucene.index.CompositeReader;
|
||||||
|
import org.apache.lucene.index.DirectoryReader;
|
||||||
|
import org.apache.lucene.index.DocValues;
|
||||||
|
import org.apache.lucene.index.DocValuesType;
|
||||||
|
import org.apache.lucene.index.FieldInfo;
|
||||||
|
import org.apache.lucene.index.FieldInfos;
|
||||||
|
import org.apache.lucene.index.Fields;
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.index.LeafReader.CoreClosedListener;
|
||||||
|
import org.apache.lucene.index.LeafReader;
|
||||||
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
|
import org.apache.lucene.index.MultiDocValues.MultiSortedDocValues;
|
||||||
|
import org.apache.lucene.index.MultiDocValues.MultiSortedSetDocValues;
|
||||||
|
import org.apache.lucene.index.MultiDocValues.OrdinalMap;
|
||||||
|
import org.apache.lucene.index.MultiDocValues;
|
||||||
|
import org.apache.lucene.index.MultiFields;
|
||||||
|
import org.apache.lucene.index.MultiReader;
|
||||||
|
import org.apache.lucene.index.NumericDocValues;
|
||||||
|
import org.apache.lucene.index.PointValues;
|
||||||
|
import org.apache.lucene.index.SortedDocValues;
|
||||||
|
import org.apache.lucene.index.SortedNumericDocValues;
|
||||||
|
import org.apache.lucene.index.SortedSetDocValues;
|
||||||
|
import org.apache.lucene.index.StoredFieldVisitor;
|
||||||
|
import org.apache.lucene.search.Sort;
|
||||||
|
import org.apache.lucene.util.Bits;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This class forces a composite reader (eg a {@link
|
||||||
|
* MultiReader} or {@link DirectoryReader}) to emulate a
|
||||||
|
* {@link LeafReader}. This requires implementing the postings
|
||||||
|
* APIs on-the-fly, using the static methods in {@link
|
||||||
|
* MultiFields}, {@link MultiDocValues}, by stepping through
|
||||||
|
* the sub-readers to merge fields/terms, appending docs, etc.
|
||||||
|
*
|
||||||
|
* <p><b>NOTE</b>: this class almost always results in a
|
||||||
|
* performance hit. If this is important to your use case,
|
||||||
|
* you'll get better performance by gathering the sub readers using
|
||||||
|
* {@link IndexReader#getContext()} to get the
|
||||||
|
* leaves and then operate per-LeafReader,
|
||||||
|
* instead of using this class.
|
||||||
|
*/
|
||||||
|
|
||||||
|
public final class SlowCompositeReaderWrapper extends LeafReader {
|
||||||
|
|
||||||
|
private final CompositeReader in;
|
||||||
|
private final Fields fields;
|
||||||
|
private final boolean merging;
|
||||||
|
|
||||||
|
/** This method is sugar for getting an {@link LeafReader} from
|
||||||
|
* an {@link IndexReader} of any kind. If the reader is already atomic,
|
||||||
|
* it is returned unchanged, otherwise wrapped by this class.
|
||||||
|
*/
|
||||||
|
public static LeafReader wrap(IndexReader reader) throws IOException {
|
||||||
|
if (reader instanceof CompositeReader) {
|
||||||
|
return new SlowCompositeReaderWrapper((CompositeReader) reader, false);
|
||||||
|
} else {
|
||||||
|
assert reader instanceof LeafReader;
|
||||||
|
return (LeafReader) reader;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
SlowCompositeReaderWrapper(CompositeReader reader, boolean merging) throws IOException {
|
||||||
|
super();
|
||||||
|
in = reader;
|
||||||
|
if (getFieldInfos().hasPointValues()) {
|
||||||
|
throw new IllegalArgumentException("cannot wrap points");
|
||||||
|
}
|
||||||
|
fields = MultiFields.getFields(in);
|
||||||
|
in.registerParentReader(this);
|
||||||
|
this.merging = merging;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return "SlowCompositeReaderWrapper(" + in + ")";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void addCoreClosedListener(CoreClosedListener listener) {
|
||||||
|
addCoreClosedListenerAsReaderClosedListener(in, listener);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void removeCoreClosedListener(CoreClosedListener listener) {
|
||||||
|
removeCoreClosedListenerAsReaderClosedListener(in, listener);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Fields fields() {
|
||||||
|
ensureOpen();
|
||||||
|
return fields;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public NumericDocValues getNumericDocValues(String field) throws IOException {
|
||||||
|
ensureOpen();
|
||||||
|
return MultiDocValues.getNumericValues(in, field);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Bits getDocsWithField(String field) throws IOException {
|
||||||
|
ensureOpen();
|
||||||
|
return MultiDocValues.getDocsWithField(in, field);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public BinaryDocValues getBinaryDocValues(String field) throws IOException {
|
||||||
|
ensureOpen();
|
||||||
|
return MultiDocValues.getBinaryValues(in, field);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public SortedNumericDocValues getSortedNumericDocValues(String field) throws IOException {
|
||||||
|
ensureOpen();
|
||||||
|
return MultiDocValues.getSortedNumericValues(in, field);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public SortedDocValues getSortedDocValues(String field) throws IOException {
|
||||||
|
ensureOpen();
|
||||||
|
OrdinalMap map = null;
|
||||||
|
synchronized (cachedOrdMaps) {
|
||||||
|
map = cachedOrdMaps.get(field);
|
||||||
|
if (map == null) {
|
||||||
|
// uncached, or not a multi dv
|
||||||
|
SortedDocValues dv = MultiDocValues.getSortedValues(in, field);
|
||||||
|
if (dv instanceof MultiSortedDocValues) {
|
||||||
|
map = ((MultiSortedDocValues)dv).mapping;
|
||||||
|
if (map.owner == getCoreCacheKey() && merging == false) {
|
||||||
|
cachedOrdMaps.put(field, map);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return dv;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
int size = in.leaves().size();
|
||||||
|
final SortedDocValues[] values = new SortedDocValues[size];
|
||||||
|
final int[] starts = new int[size+1];
|
||||||
|
for (int i = 0; i < size; i++) {
|
||||||
|
LeafReaderContext context = in.leaves().get(i);
|
||||||
|
final LeafReader reader = context.reader();
|
||||||
|
final FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field);
|
||||||
|
if (fieldInfo != null && fieldInfo.getDocValuesType() != DocValuesType.SORTED) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
SortedDocValues v = reader.getSortedDocValues(field);
|
||||||
|
if (v == null) {
|
||||||
|
v = DocValues.emptySorted();
|
||||||
|
}
|
||||||
|
values[i] = v;
|
||||||
|
starts[i] = context.docBase;
|
||||||
|
}
|
||||||
|
starts[size] = maxDoc();
|
||||||
|
return new MultiSortedDocValues(values, starts, map);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public SortedSetDocValues getSortedSetDocValues(String field) throws IOException {
|
||||||
|
ensureOpen();
|
||||||
|
OrdinalMap map = null;
|
||||||
|
synchronized (cachedOrdMaps) {
|
||||||
|
map = cachedOrdMaps.get(field);
|
||||||
|
if (map == null) {
|
||||||
|
// uncached, or not a multi dv
|
||||||
|
SortedSetDocValues dv = MultiDocValues.getSortedSetValues(in, field);
|
||||||
|
if (dv instanceof MultiSortedSetDocValues) {
|
||||||
|
map = ((MultiSortedSetDocValues)dv).mapping;
|
||||||
|
if (map.owner == getCoreCacheKey() && merging == false) {
|
||||||
|
cachedOrdMaps.put(field, map);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return dv;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
assert map != null;
|
||||||
|
int size = in.leaves().size();
|
||||||
|
final SortedSetDocValues[] values = new SortedSetDocValues[size];
|
||||||
|
final int[] starts = new int[size+1];
|
||||||
|
for (int i = 0; i < size; i++) {
|
||||||
|
LeafReaderContext context = in.leaves().get(i);
|
||||||
|
final LeafReader reader = context.reader();
|
||||||
|
final FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field);
|
||||||
|
if(fieldInfo != null && fieldInfo.getDocValuesType() != DocValuesType.SORTED_SET){
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
SortedSetDocValues v = reader.getSortedSetDocValues(field);
|
||||||
|
if (v == null) {
|
||||||
|
v = DocValues.emptySortedSet();
|
||||||
|
}
|
||||||
|
values[i] = v;
|
||||||
|
starts[i] = context.docBase;
|
||||||
|
}
|
||||||
|
starts[size] = maxDoc();
|
||||||
|
return new MultiSortedSetDocValues(values, starts, map);
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: this could really be a weak map somewhere else on the coreCacheKey,
|
||||||
|
// but do we really need to optimize slow-wrapper any more?
|
||||||
|
private final Map<String,OrdinalMap> cachedOrdMaps = new HashMap<>();
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public NumericDocValues getNormValues(String field) throws IOException {
|
||||||
|
ensureOpen();
|
||||||
|
return MultiDocValues.getNormValues(in, field);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Fields getTermVectors(int docID) throws IOException {
|
||||||
|
ensureOpen();
|
||||||
|
return in.getTermVectors(docID);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int numDocs() {
|
||||||
|
// Don't call ensureOpen() here (it could affect performance)
|
||||||
|
return in.numDocs();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int maxDoc() {
|
||||||
|
// Don't call ensureOpen() here (it could affect performance)
|
||||||
|
return in.maxDoc();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void document(int docID, StoredFieldVisitor visitor) throws IOException {
|
||||||
|
ensureOpen();
|
||||||
|
in.document(docID, visitor);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Bits getLiveDocs() {
|
||||||
|
ensureOpen();
|
||||||
|
return MultiFields.getLiveDocs(in);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public PointValues getPointValues() {
|
||||||
|
ensureOpen();
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public FieldInfos getFieldInfos() {
|
||||||
|
ensureOpen();
|
||||||
|
return MultiFields.getMergedFieldInfos(in);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Object getCoreCacheKey() {
|
||||||
|
return in.getCoreCacheKey();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Object getCombinedCoreAndDeletesKey() {
|
||||||
|
return in.getCombinedCoreAndDeletesKey();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void doClose() throws IOException {
|
||||||
|
// TODO: as this is a wrapper, should we really close the delegate?
|
||||||
|
in.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void checkIntegrity() throws IOException {
|
||||||
|
ensureOpen();
|
||||||
|
for (LeafReaderContext ctx : in.leaves()) {
|
||||||
|
ctx.reader().checkIntegrity();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,65 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.solr.index;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.IndexWriter;
|
||||||
|
import org.apache.lucene.index.LeafReader;
|
||||||
|
import org.apache.lucene.index.MergePolicy;
|
||||||
|
import org.apache.lucene.index.MergePolicyWrapper;
|
||||||
|
import org.apache.lucene.index.MergeState;
|
||||||
|
import org.apache.lucene.index.MergeTrigger;
|
||||||
|
import org.apache.lucene.index.MultiReader;
|
||||||
|
import org.apache.lucene.index.SegmentCommitInfo;
|
||||||
|
import org.apache.lucene.index.SegmentInfo;
|
||||||
|
import org.apache.lucene.index.SegmentInfos;
|
||||||
|
import org.apache.lucene.index.SegmentReader;
|
||||||
|
import org.apache.lucene.search.Sort;
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.util.Bits;
|
||||||
|
import org.apache.lucene.util.InfoStream;
|
||||||
|
import org.apache.lucene.util.packed.PackedInts;
|
||||||
|
import org.apache.lucene.util.packed.PackedLongValues;
|
||||||
|
import org.apache.solr.index.SlowCompositeReaderWrapper;
|
||||||
|
|
||||||
|
// TODO: remove this and add indexSort specification directly to solrconfig.xml? But for BWC, also accept SortingMergePolicy specifiction?
|
||||||
|
|
||||||
|
public final class SortingMergePolicy extends MergePolicyWrapper {
|
||||||
|
|
||||||
|
private final Sort sort;
|
||||||
|
|
||||||
|
/** Create a new {@code MergePolicy} that sorts documents with the given {@code sort}. */
|
||||||
|
public SortingMergePolicy(MergePolicy in, Sort sort) {
|
||||||
|
super(in);
|
||||||
|
this.sort = sort;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Return the {@link Sort} order that is used to sort segments when merging. */
|
||||||
|
public Sort getSort() {
|
||||||
|
return sort;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return "SortingMergePolicy(" + in + ", sort=" + sort + ")";
|
||||||
|
}
|
||||||
|
}
|
|
@ -28,6 +28,7 @@ import java.util.Set;
|
||||||
import java.util.TreeSet;
|
import java.util.TreeSet;
|
||||||
import java.util.concurrent.Callable;
|
import java.util.concurrent.Callable;
|
||||||
import java.util.concurrent.ExecutionException;
|
import java.util.concurrent.ExecutionException;
|
||||||
|
|
||||||
import org.apache.lucene.document.Field;
|
import org.apache.lucene.document.Field;
|
||||||
import org.apache.lucene.document.StoredField;
|
import org.apache.lucene.document.StoredField;
|
||||||
import org.apache.lucene.index.IndexableField;
|
import org.apache.lucene.index.IndexableField;
|
||||||
|
@ -41,22 +42,16 @@ import org.apache.lucene.spatial.SpatialStrategy;
|
||||||
import org.apache.lucene.spatial.query.SpatialArgs;
|
import org.apache.lucene.spatial.query.SpatialArgs;
|
||||||
import org.apache.lucene.spatial.query.SpatialArgsParser;
|
import org.apache.lucene.spatial.query.SpatialArgsParser;
|
||||||
import org.apache.lucene.spatial.query.SpatialOperation;
|
import org.apache.lucene.spatial.query.SpatialOperation;
|
||||||
import org.apache.lucene.uninverting.UninvertingReader.Type;
|
|
||||||
import org.apache.solr.common.SolrException;
|
|
||||||
import org.apache.solr.common.SolrException.ErrorCode;
|
import org.apache.solr.common.SolrException.ErrorCode;
|
||||||
|
import org.apache.solr.common.SolrException;
|
||||||
import org.apache.solr.common.params.SolrParams;
|
import org.apache.solr.common.params.SolrParams;
|
||||||
import org.apache.solr.response.TextResponseWriter;
|
import org.apache.solr.response.TextResponseWriter;
|
||||||
import org.apache.solr.search.QParser;
|
import org.apache.solr.search.QParser;
|
||||||
import org.apache.solr.search.SpatialOptions;
|
import org.apache.solr.search.SpatialOptions;
|
||||||
|
import org.apache.solr.uninverting.UninvertingReader.Type;
|
||||||
import org.apache.solr.util.DistanceUnits;
|
import org.apache.solr.util.DistanceUnits;
|
||||||
import org.apache.solr.util.MapListener;
|
import org.apache.solr.util.MapListener;
|
||||||
import org.apache.solr.util.SpatialUtils;
|
import org.apache.solr.util.SpatialUtils;
|
||||||
import org.slf4j.Logger;
|
|
||||||
import org.slf4j.LoggerFactory;
|
|
||||||
|
|
||||||
import com.google.common.base.Throwables;
|
|
||||||
import com.google.common.cache.Cache;
|
|
||||||
import com.google.common.cache.CacheBuilder;
|
|
||||||
import org.locationtech.spatial4j.context.SpatialContext;
|
import org.locationtech.spatial4j.context.SpatialContext;
|
||||||
import org.locationtech.spatial4j.context.SpatialContextFactory;
|
import org.locationtech.spatial4j.context.SpatialContextFactory;
|
||||||
import org.locationtech.spatial4j.distance.DistanceUtils;
|
import org.locationtech.spatial4j.distance.DistanceUtils;
|
||||||
|
@ -66,6 +61,12 @@ import org.locationtech.spatial4j.io.SupportedFormats;
|
||||||
import org.locationtech.spatial4j.shape.Point;
|
import org.locationtech.spatial4j.shape.Point;
|
||||||
import org.locationtech.spatial4j.shape.Rectangle;
|
import org.locationtech.spatial4j.shape.Rectangle;
|
||||||
import org.locationtech.spatial4j.shape.Shape;
|
import org.locationtech.spatial4j.shape.Shape;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import com.google.common.base.Throwables;
|
||||||
|
import com.google.common.cache.Cache;
|
||||||
|
import com.google.common.cache.CacheBuilder;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Abstract base class for Solr FieldTypes based on a Lucene 4 {@link SpatialStrategy}.
|
* Abstract base class for Solr FieldTypes based on a Lucene 4 {@link SpatialStrategy}.
|
||||||
|
|
|
@ -23,10 +23,10 @@ import java.nio.ByteBuffer;
|
||||||
import org.apache.lucene.document.Field;
|
import org.apache.lucene.document.Field;
|
||||||
import org.apache.lucene.index.IndexableField;
|
import org.apache.lucene.index.IndexableField;
|
||||||
import org.apache.lucene.search.SortField;
|
import org.apache.lucene.search.SortField;
|
||||||
import org.apache.lucene.uninverting.UninvertingReader.Type;
|
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.solr.common.util.Base64;
|
import org.apache.solr.common.util.Base64;
|
||||||
import org.apache.solr.response.TextResponseWriter;
|
import org.apache.solr.response.TextResponseWriter;
|
||||||
|
import org.apache.solr.uninverting.UninvertingReader.Type;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
|
|
@ -22,15 +22,14 @@ import java.util.Map;
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
import org.apache.lucene.index.LeafReaderContext;
|
|
||||||
import org.apache.lucene.index.DocValues;
|
import org.apache.lucene.index.DocValues;
|
||||||
import org.apache.lucene.index.SortedDocValues;
|
|
||||||
import org.apache.lucene.index.IndexableField;
|
import org.apache.lucene.index.IndexableField;
|
||||||
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
|
import org.apache.lucene.index.SortedDocValues;
|
||||||
import org.apache.lucene.queries.function.FunctionValues;
|
import org.apache.lucene.queries.function.FunctionValues;
|
||||||
import org.apache.lucene.queries.function.ValueSource;
|
import org.apache.lucene.queries.function.ValueSource;
|
||||||
import org.apache.lucene.queries.function.docvalues.BoolDocValues;
|
import org.apache.lucene.queries.function.docvalues.BoolDocValues;
|
||||||
import org.apache.lucene.search.SortField;
|
import org.apache.lucene.search.SortField;
|
||||||
import org.apache.lucene.uninverting.UninvertingReader.Type;
|
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.CharsRef;
|
import org.apache.lucene.util.CharsRef;
|
||||||
import org.apache.lucene.util.CharsRefBuilder;
|
import org.apache.lucene.util.CharsRefBuilder;
|
||||||
|
@ -40,6 +39,7 @@ import org.apache.solr.analysis.SolrAnalyzer;
|
||||||
import org.apache.solr.response.TextResponseWriter;
|
import org.apache.solr.response.TextResponseWriter;
|
||||||
import org.apache.solr.search.QParser;
|
import org.apache.solr.search.QParser;
|
||||||
import org.apache.solr.search.function.OrdFieldSource;
|
import org.apache.solr.search.function.OrdFieldSource;
|
||||||
|
import org.apache.solr.uninverting.UninvertingReader.Type;
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -40,12 +40,12 @@ import org.apache.lucene.search.DocValuesRangeQuery;
|
||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
import org.apache.lucene.search.SortField;
|
import org.apache.lucene.search.SortField;
|
||||||
import org.apache.lucene.search.TermRangeQuery;
|
import org.apache.lucene.search.TermRangeQuery;
|
||||||
import org.apache.lucene.uninverting.UninvertingReader.Type;
|
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.solr.common.SolrException;
|
|
||||||
import org.apache.solr.common.SolrException.ErrorCode;
|
import org.apache.solr.common.SolrException.ErrorCode;
|
||||||
|
import org.apache.solr.common.SolrException;
|
||||||
import org.apache.solr.response.TextResponseWriter;
|
import org.apache.solr.response.TextResponseWriter;
|
||||||
import org.apache.solr.search.QParser;
|
import org.apache.solr.search.QParser;
|
||||||
|
import org.apache.solr.uninverting.UninvertingReader.Type;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Field for collated sort keys.
|
* Field for collated sort keys.
|
||||||
|
|
|
@ -44,7 +44,7 @@ import org.apache.lucene.search.BooleanQuery;
|
||||||
import org.apache.lucene.search.FieldValueQuery;
|
import org.apache.lucene.search.FieldValueQuery;
|
||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
import org.apache.lucene.search.SortField;
|
import org.apache.lucene.search.SortField;
|
||||||
import org.apache.lucene.uninverting.UninvertingReader.Type;
|
import org.apache.solr.uninverting.UninvertingReader.Type;
|
||||||
import org.apache.solr.common.SolrException;
|
import org.apache.solr.common.SolrException;
|
||||||
import org.apache.solr.common.SolrException.ErrorCode;
|
import org.apache.solr.common.SolrException.ErrorCode;
|
||||||
import org.apache.solr.response.TextResponseWriter;
|
import org.apache.solr.response.TextResponseWriter;
|
||||||
|
|
|
@ -16,12 +16,6 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.solr.schema;
|
package org.apache.solr.schema;
|
||||||
|
|
||||||
import javax.xml.parsers.DocumentBuilderFactory;
|
|
||||||
import javax.xml.parsers.ParserConfigurationException;
|
|
||||||
import javax.xml.xpath.XPath;
|
|
||||||
import javax.xml.xpath.XPathConstants;
|
|
||||||
import javax.xml.xpath.XPathExpressionException;
|
|
||||||
import javax.xml.xpath.XPathFactory;
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
import java.lang.invoke.MethodHandles;
|
import java.lang.invoke.MethodHandles;
|
||||||
|
@ -31,6 +25,12 @@ import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Locale;
|
import java.util.Locale;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
import javax.xml.parsers.DocumentBuilderFactory;
|
||||||
|
import javax.xml.parsers.ParserConfigurationException;
|
||||||
|
import javax.xml.xpath.XPath;
|
||||||
|
import javax.xml.xpath.XPathConstants;
|
||||||
|
import javax.xml.xpath.XPathExpressionException;
|
||||||
|
import javax.xml.xpath.XPathFactory;
|
||||||
|
|
||||||
import org.apache.lucene.document.FieldType;
|
import org.apache.lucene.document.FieldType;
|
||||||
import org.apache.lucene.document.LegacyIntField;
|
import org.apache.lucene.document.LegacyIntField;
|
||||||
|
@ -45,7 +45,6 @@ import org.apache.lucene.search.DocValuesRangeQuery;
|
||||||
import org.apache.lucene.search.LegacyNumericRangeQuery;
|
import org.apache.lucene.search.LegacyNumericRangeQuery;
|
||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
import org.apache.lucene.search.SortField;
|
import org.apache.lucene.search.SortField;
|
||||||
import org.apache.lucene.uninverting.UninvertingReader.Type;
|
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.BytesRefBuilder;
|
import org.apache.lucene.util.BytesRefBuilder;
|
||||||
import org.apache.lucene.util.CharsRef;
|
import org.apache.lucene.util.CharsRef;
|
||||||
|
@ -55,6 +54,7 @@ import org.apache.solr.common.EnumFieldValue;
|
||||||
import org.apache.solr.common.SolrException;
|
import org.apache.solr.common.SolrException;
|
||||||
import org.apache.solr.response.TextResponseWriter;
|
import org.apache.solr.response.TextResponseWriter;
|
||||||
import org.apache.solr.search.QParser;
|
import org.apache.solr.search.QParser;
|
||||||
|
import org.apache.solr.uninverting.UninvertingReader.Type;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
import org.w3c.dom.Document;
|
import org.w3c.dom.Document;
|
||||||
|
|
|
@ -16,17 +16,17 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.solr.schema;
|
package org.apache.solr.schema;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
import org.apache.lucene.index.IndexableField;
|
import org.apache.lucene.index.IndexableField;
|
||||||
import org.apache.lucene.queries.function.ValueSource;
|
import org.apache.lucene.queries.function.ValueSource;
|
||||||
import org.apache.lucene.search.SortField;
|
import org.apache.lucene.search.SortField;
|
||||||
import org.apache.lucene.uninverting.UninvertingReader.Type;
|
|
||||||
import org.apache.solr.common.SolrException;
|
import org.apache.solr.common.SolrException;
|
||||||
import org.apache.solr.response.TextResponseWriter;
|
import org.apache.solr.response.TextResponseWriter;
|
||||||
import org.apache.solr.search.QParser;
|
import org.apache.solr.search.QParser;
|
||||||
import org.apache.solr.search.function.FileFloatSource;
|
import org.apache.solr.search.function.FileFloatSource;
|
||||||
|
import org.apache.solr.uninverting.UninvertingReader.Type;
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
/** Get values from an external file instead of the index.
|
/** Get values from an external file instead of the index.
|
||||||
*
|
*
|
||||||
|
|
|
@ -49,7 +49,6 @@ import org.apache.lucene.search.SortedNumericSelector;
|
||||||
import org.apache.lucene.search.SortedSetSelector;
|
import org.apache.lucene.search.SortedSetSelector;
|
||||||
import org.apache.lucene.search.TermQuery;
|
import org.apache.lucene.search.TermQuery;
|
||||||
import org.apache.lucene.search.similarities.Similarity;
|
import org.apache.lucene.search.similarities.Similarity;
|
||||||
import org.apache.lucene.uninverting.UninvertingReader;
|
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.BytesRefBuilder;
|
import org.apache.lucene.util.BytesRefBuilder;
|
||||||
import org.apache.lucene.util.CharsRef;
|
import org.apache.lucene.util.CharsRef;
|
||||||
|
@ -66,6 +65,7 @@ import org.apache.solr.query.SolrRangeQuery;
|
||||||
import org.apache.solr.response.TextResponseWriter;
|
import org.apache.solr.response.TextResponseWriter;
|
||||||
import org.apache.solr.search.QParser;
|
import org.apache.solr.search.QParser;
|
||||||
import org.apache.solr.search.Sorting;
|
import org.apache.solr.search.Sorting;
|
||||||
|
import org.apache.solr.uninverting.UninvertingReader;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
|
|
@ -18,23 +18,23 @@ package org.apache.solr.schema;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
import org.locationtech.spatial4j.context.SpatialContext;
|
|
||||||
import org.locationtech.spatial4j.distance.DistanceUtils;
|
|
||||||
import org.locationtech.spatial4j.io.GeohashUtils;
|
|
||||||
import org.locationtech.spatial4j.shape.Point;
|
|
||||||
import org.apache.lucene.index.IndexableField;
|
import org.apache.lucene.index.IndexableField;
|
||||||
import org.apache.lucene.queries.function.ValueSource;
|
import org.apache.lucene.queries.function.ValueSource;
|
||||||
import org.apache.lucene.queries.function.valuesource.LiteralValueSource;
|
import org.apache.lucene.queries.function.valuesource.LiteralValueSource;
|
||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
import org.apache.lucene.search.SortField;
|
import org.apache.lucene.search.SortField;
|
||||||
import org.apache.lucene.uninverting.UninvertingReader.Type;
|
|
||||||
import org.apache.solr.response.TextResponseWriter;
|
import org.apache.solr.response.TextResponseWriter;
|
||||||
import org.apache.solr.search.QParser;
|
import org.apache.solr.search.QParser;
|
||||||
import org.apache.solr.search.SolrConstantScoreQuery;
|
import org.apache.solr.search.SolrConstantScoreQuery;
|
||||||
import org.apache.solr.search.SpatialOptions;
|
import org.apache.solr.search.SpatialOptions;
|
||||||
import org.apache.solr.search.function.ValueSourceRangeFilter;
|
import org.apache.solr.search.function.ValueSourceRangeFilter;
|
||||||
import org.apache.solr.search.function.distance.GeohashHaversineFunction;
|
import org.apache.solr.search.function.distance.GeohashHaversineFunction;
|
||||||
|
import org.apache.solr.uninverting.UninvertingReader.Type;
|
||||||
import org.apache.solr.util.SpatialUtils;
|
import org.apache.solr.util.SpatialUtils;
|
||||||
|
import org.locationtech.spatial4j.context.SpatialContext;
|
||||||
|
import org.locationtech.spatial4j.distance.DistanceUtils;
|
||||||
|
import org.locationtech.spatial4j.io.GeohashUtils;
|
||||||
|
import org.locationtech.spatial4j.shape.Point;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This is a class that represents a <a
|
* This is a class that represents a <a
|
||||||
|
|
|
@ -51,7 +51,7 @@ import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.IndexableField;
|
import org.apache.lucene.index.IndexableField;
|
||||||
import org.apache.lucene.index.MultiFields;
|
import org.apache.lucene.index.MultiFields;
|
||||||
import org.apache.lucene.search.similarities.Similarity;
|
import org.apache.lucene.search.similarities.Similarity;
|
||||||
import org.apache.lucene.uninverting.UninvertingReader;
|
import org.apache.solr.uninverting.UninvertingReader;
|
||||||
import org.apache.lucene.util.Version;
|
import org.apache.lucene.util.Version;
|
||||||
import org.apache.solr.common.SolrException;
|
import org.apache.solr.common.SolrException;
|
||||||
import org.apache.solr.common.SolrException.ErrorCode;
|
import org.apache.solr.common.SolrException.ErrorCode;
|
||||||
|
|
|
@ -22,8 +22,8 @@ import java.util.Map;
|
||||||
|
|
||||||
import org.apache.lucene.document.FieldType;
|
import org.apache.lucene.document.FieldType;
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.LeafReaderContext;
|
|
||||||
import org.apache.lucene.index.IndexableField;
|
import org.apache.lucene.index.IndexableField;
|
||||||
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
import org.apache.lucene.queries.function.FunctionValues;
|
import org.apache.lucene.queries.function.FunctionValues;
|
||||||
import org.apache.lucene.queries.function.ValueSource;
|
import org.apache.lucene.queries.function.ValueSource;
|
||||||
import org.apache.lucene.queries.function.valuesource.VectorValueSource;
|
import org.apache.lucene.queries.function.valuesource.VectorValueSource;
|
||||||
|
@ -37,7 +37,6 @@ import org.apache.lucene.search.Query;
|
||||||
import org.apache.lucene.search.Scorer;
|
import org.apache.lucene.search.Scorer;
|
||||||
import org.apache.lucene.search.SortField;
|
import org.apache.lucene.search.SortField;
|
||||||
import org.apache.lucene.search.Weight;
|
import org.apache.lucene.search.Weight;
|
||||||
import org.apache.lucene.uninverting.UninvertingReader.Type;
|
|
||||||
import org.apache.solr.common.SolrException;
|
import org.apache.solr.common.SolrException;
|
||||||
import org.apache.solr.response.TextResponseWriter;
|
import org.apache.solr.response.TextResponseWriter;
|
||||||
import org.apache.solr.search.DelegatingCollector;
|
import org.apache.solr.search.DelegatingCollector;
|
||||||
|
@ -45,8 +44,8 @@ import org.apache.solr.search.ExtendedQueryBase;
|
||||||
import org.apache.solr.search.PostFilter;
|
import org.apache.solr.search.PostFilter;
|
||||||
import org.apache.solr.search.QParser;
|
import org.apache.solr.search.QParser;
|
||||||
import org.apache.solr.search.SpatialOptions;
|
import org.apache.solr.search.SpatialOptions;
|
||||||
|
import org.apache.solr.uninverting.UninvertingReader.Type;
|
||||||
import org.apache.solr.util.SpatialUtils;
|
import org.apache.solr.util.SpatialUtils;
|
||||||
|
|
||||||
import org.locationtech.spatial4j.context.SpatialContext;
|
import org.locationtech.spatial4j.context.SpatialContext;
|
||||||
import org.locationtech.spatial4j.distance.DistanceUtils;
|
import org.locationtech.spatial4j.distance.DistanceUtils;
|
||||||
import org.locationtech.spatial4j.shape.Point;
|
import org.locationtech.spatial4j.shape.Point;
|
||||||
|
|
|
@ -21,7 +21,6 @@ import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
import org.locationtech.spatial4j.distance.DistanceUtils;
|
|
||||||
import org.apache.lucene.document.FieldType;
|
import org.apache.lucene.document.FieldType;
|
||||||
import org.apache.lucene.index.IndexableField;
|
import org.apache.lucene.index.IndexableField;
|
||||||
import org.apache.lucene.queries.function.ValueSource;
|
import org.apache.lucene.queries.function.ValueSource;
|
||||||
|
@ -30,13 +29,14 @@ import org.apache.lucene.search.BooleanClause;
|
||||||
import org.apache.lucene.search.BooleanQuery;
|
import org.apache.lucene.search.BooleanQuery;
|
||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
import org.apache.lucene.search.SortField;
|
import org.apache.lucene.search.SortField;
|
||||||
import org.apache.lucene.uninverting.UninvertingReader.Type;
|
|
||||||
import org.apache.solr.common.SolrException;
|
import org.apache.solr.common.SolrException;
|
||||||
import org.apache.solr.common.params.MapSolrParams;
|
import org.apache.solr.common.params.MapSolrParams;
|
||||||
import org.apache.solr.common.params.SolrParams;
|
import org.apache.solr.common.params.SolrParams;
|
||||||
import org.apache.solr.response.TextResponseWriter;
|
import org.apache.solr.response.TextResponseWriter;
|
||||||
import org.apache.solr.search.QParser;
|
import org.apache.solr.search.QParser;
|
||||||
import org.apache.solr.search.SpatialOptions;
|
import org.apache.solr.search.SpatialOptions;
|
||||||
|
import org.apache.solr.uninverting.UninvertingReader.Type;
|
||||||
|
import org.locationtech.spatial4j.distance.DistanceUtils;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A point type that indexes a point in an n-dimensional space as separate fields and supports range queries.
|
* A point type that indexes a point in an n-dimensional space as separate fields and supports range queries.
|
||||||
|
|
|
@ -33,14 +33,14 @@ import org.apache.lucene.index.IndexableField;
|
||||||
import org.apache.lucene.queries.function.ValueSource;
|
import org.apache.lucene.queries.function.ValueSource;
|
||||||
import org.apache.lucene.queries.function.valuesource.SortedSetFieldSource;
|
import org.apache.lucene.queries.function.valuesource.SortedSetFieldSource;
|
||||||
import org.apache.lucene.search.SortField;
|
import org.apache.lucene.search.SortField;
|
||||||
import org.apache.lucene.uninverting.UninvertingReader.Type;
|
|
||||||
import org.apache.lucene.util.AttributeFactory;
|
import org.apache.lucene.util.AttributeFactory;
|
||||||
import org.apache.lucene.util.AttributeSource;
|
|
||||||
import org.apache.lucene.util.AttributeSource.State;
|
import org.apache.lucene.util.AttributeSource.State;
|
||||||
|
import org.apache.lucene.util.AttributeSource;
|
||||||
import org.apache.solr.analysis.SolrAnalyzer;
|
import org.apache.solr.analysis.SolrAnalyzer;
|
||||||
import org.apache.solr.response.TextResponseWriter;
|
import org.apache.solr.response.TextResponseWriter;
|
||||||
import org.apache.solr.search.QParser;
|
import org.apache.solr.search.QParser;
|
||||||
import org.apache.solr.search.Sorting;
|
import org.apache.solr.search.Sorting;
|
||||||
|
import org.apache.solr.uninverting.UninvertingReader.Type;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
|
|
@ -20,16 +20,16 @@ import java.io.IOException;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
import org.apache.lucene.index.DirectoryReader;
|
import org.apache.lucene.index.DirectoryReader;
|
||||||
|
import org.apache.lucene.index.IndexableField;
|
||||||
import org.apache.lucene.index.LeafReaderContext;
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
import org.apache.lucene.index.ReaderUtil;
|
import org.apache.lucene.index.ReaderUtil;
|
||||||
import org.apache.lucene.index.IndexableField;
|
|
||||||
import org.apache.lucene.queries.function.FunctionValues;
|
import org.apache.lucene.queries.function.FunctionValues;
|
||||||
import org.apache.lucene.queries.function.ValueSource;
|
import org.apache.lucene.queries.function.ValueSource;
|
||||||
import org.apache.lucene.queries.function.docvalues.IntDocValues;
|
import org.apache.lucene.queries.function.docvalues.IntDocValues;
|
||||||
import org.apache.lucene.search.*;
|
import org.apache.lucene.search.*;
|
||||||
import org.apache.lucene.uninverting.UninvertingReader.Type;
|
|
||||||
import org.apache.solr.response.TextResponseWriter;
|
import org.apache.solr.response.TextResponseWriter;
|
||||||
import org.apache.solr.search.QParser;
|
import org.apache.solr.search.QParser;
|
||||||
|
import org.apache.solr.uninverting.UninvertingReader.Type;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Utility Field used for random sorting. It should not be passed a value.
|
* Utility Field used for random sorting. It should not be passed a value.
|
||||||
|
|
|
@ -27,10 +27,10 @@ import org.apache.lucene.document.SortedSetDocValuesField;
|
||||||
import org.apache.lucene.index.IndexableField;
|
import org.apache.lucene.index.IndexableField;
|
||||||
import org.apache.lucene.queries.function.ValueSource;
|
import org.apache.lucene.queries.function.ValueSource;
|
||||||
import org.apache.lucene.search.SortField;
|
import org.apache.lucene.search.SortField;
|
||||||
import org.apache.lucene.uninverting.UninvertingReader.Type;
|
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.solr.response.TextResponseWriter;
|
import org.apache.solr.response.TextResponseWriter;
|
||||||
import org.apache.solr.search.QParser;
|
import org.apache.solr.search.QParser;
|
||||||
|
import org.apache.solr.uninverting.UninvertingReader.Type;
|
||||||
|
|
||||||
public class StrField extends PrimitiveFieldType {
|
public class StrField extends PrimitiveFieldType {
|
||||||
|
|
||||||
|
|
|
@ -16,14 +16,16 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.solr.schema;
|
package org.apache.solr.schema;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
|
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
|
||||||
|
import org.apache.lucene.index.IndexableField;
|
||||||
import org.apache.lucene.queries.function.ValueSource;
|
import org.apache.lucene.queries.function.ValueSource;
|
||||||
import org.apache.lucene.queries.function.valuesource.SortedSetFieldSource;
|
import org.apache.lucene.queries.function.valuesource.SortedSetFieldSource;
|
||||||
import org.apache.lucene.search.*;
|
import org.apache.lucene.search.*;
|
||||||
import org.apache.lucene.index.IndexableField;
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
|
||||||
import org.apache.lucene.uninverting.UninvertingReader.Type;
|
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.QueryBuilder;
|
import org.apache.lucene.util.QueryBuilder;
|
||||||
import org.apache.solr.common.SolrException;
|
import org.apache.solr.common.SolrException;
|
||||||
|
@ -31,9 +33,7 @@ import org.apache.solr.query.SolrRangeQuery;
|
||||||
import org.apache.solr.response.TextResponseWriter;
|
import org.apache.solr.response.TextResponseWriter;
|
||||||
import org.apache.solr.search.QParser;
|
import org.apache.solr.search.QParser;
|
||||||
import org.apache.solr.search.Sorting;
|
import org.apache.solr.search.Sorting;
|
||||||
|
import org.apache.solr.uninverting.UninvertingReader.Type;
|
||||||
import java.util.Map;
|
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
/** <code>TextField</code> is the basic type for configurable text analysis.
|
/** <code>TextField</code> is the basic type for configurable text analysis.
|
||||||
* Analyzers for field types using this implementation should be defined in the schema.
|
* Analyzers for field types using this implementation should be defined in the schema.
|
||||||
|
|
|
@ -26,8 +26,8 @@ import java.util.List;
|
||||||
import java.util.Locale;
|
import java.util.Locale;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
import org.apache.lucene.document.FieldType;
|
|
||||||
import org.apache.lucene.document.FieldType.LegacyNumericType;
|
import org.apache.lucene.document.FieldType.LegacyNumericType;
|
||||||
|
import org.apache.lucene.document.FieldType;
|
||||||
import org.apache.lucene.document.LegacyDoubleField;
|
import org.apache.lucene.document.LegacyDoubleField;
|
||||||
import org.apache.lucene.document.LegacyFloatField;
|
import org.apache.lucene.document.LegacyFloatField;
|
||||||
import org.apache.lucene.document.LegacyIntField;
|
import org.apache.lucene.document.LegacyIntField;
|
||||||
|
@ -47,7 +47,6 @@ import org.apache.lucene.search.LegacyNumericRangeQuery;
|
||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
import org.apache.lucene.search.SortField;
|
import org.apache.lucene.search.SortField;
|
||||||
import org.apache.lucene.search.SortedSetSelector;
|
import org.apache.lucene.search.SortedSetSelector;
|
||||||
import org.apache.lucene.uninverting.UninvertingReader.Type;
|
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.BytesRefBuilder;
|
import org.apache.lucene.util.BytesRefBuilder;
|
||||||
import org.apache.lucene.util.CharsRef;
|
import org.apache.lucene.util.CharsRef;
|
||||||
|
@ -61,6 +60,7 @@ import org.apache.solr.response.TextResponseWriter;
|
||||||
import org.apache.solr.search.FunctionRangeQuery;
|
import org.apache.solr.search.FunctionRangeQuery;
|
||||||
import org.apache.solr.search.QParser;
|
import org.apache.solr.search.QParser;
|
||||||
import org.apache.solr.search.function.ValueSourceRangeFilter;
|
import org.apache.solr.search.function.ValueSourceRangeFilter;
|
||||||
|
import org.apache.solr.uninverting.UninvertingReader.Type;
|
||||||
import org.apache.solr.util.DateMathParser;
|
import org.apache.solr.util.DateMathParser;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
|
@ -25,15 +25,7 @@ import java.util.Iterator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
import com.carrotsearch.hppc.FloatArrayList;
|
|
||||||
import com.carrotsearch.hppc.IntArrayList;
|
|
||||||
import com.carrotsearch.hppc.IntIntHashMap;
|
|
||||||
import com.carrotsearch.hppc.IntLongHashMap;
|
|
||||||
import com.carrotsearch.hppc.cursors.IntIntCursor;
|
|
||||||
import com.carrotsearch.hppc.cursors.IntLongCursor;
|
|
||||||
|
|
||||||
import org.apache.commons.lang.StringUtils;
|
import org.apache.commons.lang.StringUtils;
|
||||||
|
|
||||||
import org.apache.lucene.index.DocValues;
|
import org.apache.lucene.index.DocValues;
|
||||||
import org.apache.lucene.index.DocValuesType;
|
import org.apache.lucene.index.DocValuesType;
|
||||||
import org.apache.lucene.index.FieldInfo;
|
import org.apache.lucene.index.FieldInfo;
|
||||||
|
@ -49,13 +41,12 @@ import org.apache.lucene.queries.function.FunctionValues;
|
||||||
import org.apache.lucene.queries.function.ValueSource;
|
import org.apache.lucene.queries.function.ValueSource;
|
||||||
import org.apache.lucene.search.DocIdSetIterator;
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
import org.apache.lucene.search.FieldComparator;
|
import org.apache.lucene.search.FieldComparator;
|
||||||
import org.apache.lucene.search.LeafFieldComparator;
|
|
||||||
import org.apache.lucene.search.IndexSearcher;
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
|
import org.apache.lucene.search.LeafFieldComparator;
|
||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
import org.apache.lucene.search.Scorer;
|
import org.apache.lucene.search.Scorer;
|
||||||
import org.apache.lucene.search.Sort;
|
import org.apache.lucene.search.Sort;
|
||||||
import org.apache.lucene.search.SortField;
|
import org.apache.lucene.search.SortField;
|
||||||
import org.apache.lucene.uninverting.UninvertingReader;
|
|
||||||
import org.apache.lucene.util.ArrayUtil;
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
import org.apache.lucene.util.BitSetIterator;
|
import org.apache.lucene.util.BitSetIterator;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
@ -65,8 +56,8 @@ import org.apache.solr.common.SolrException;
|
||||||
import org.apache.solr.common.params.ModifiableSolrParams;
|
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||||
import org.apache.solr.common.params.SolrParams;
|
import org.apache.solr.common.params.SolrParams;
|
||||||
import org.apache.solr.common.util.NamedList;
|
import org.apache.solr.common.util.NamedList;
|
||||||
import org.apache.solr.handler.component.ResponseBuilder;
|
|
||||||
import org.apache.solr.handler.component.QueryElevationComponent;
|
import org.apache.solr.handler.component.QueryElevationComponent;
|
||||||
|
import org.apache.solr.handler.component.ResponseBuilder;
|
||||||
import org.apache.solr.request.LocalSolrQueryRequest;
|
import org.apache.solr.request.LocalSolrQueryRequest;
|
||||||
import org.apache.solr.request.SolrQueryRequest;
|
import org.apache.solr.request.SolrQueryRequest;
|
||||||
import org.apache.solr.request.SolrRequestInfo;
|
import org.apache.solr.request.SolrRequestInfo;
|
||||||
|
@ -75,6 +66,14 @@ import org.apache.solr.schema.StrField;
|
||||||
import org.apache.solr.schema.TrieFloatField;
|
import org.apache.solr.schema.TrieFloatField;
|
||||||
import org.apache.solr.schema.TrieIntField;
|
import org.apache.solr.schema.TrieIntField;
|
||||||
import org.apache.solr.schema.TrieLongField;
|
import org.apache.solr.schema.TrieLongField;
|
||||||
|
import org.apache.solr.uninverting.UninvertingReader;
|
||||||
|
|
||||||
|
import com.carrotsearch.hppc.FloatArrayList;
|
||||||
|
import com.carrotsearch.hppc.IntArrayList;
|
||||||
|
import com.carrotsearch.hppc.IntIntHashMap;
|
||||||
|
import com.carrotsearch.hppc.IntLongHashMap;
|
||||||
|
import com.carrotsearch.hppc.cursors.IntIntCursor;
|
||||||
|
import com.carrotsearch.hppc.cursors.IntLongCursor;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
||||||
|
|
|
@ -20,16 +20,16 @@ import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
|
|
||||||
import org.apache.lucene.index.FilterLeafReader;
|
|
||||||
import org.apache.lucene.index.LeafReader;
|
|
||||||
import org.apache.lucene.index.BinaryDocValues;
|
import org.apache.lucene.index.BinaryDocValues;
|
||||||
import org.apache.lucene.index.DocValuesType;
|
import org.apache.lucene.index.DocValuesType;
|
||||||
import org.apache.lucene.index.FieldInfo;
|
import org.apache.lucene.index.FieldInfo;
|
||||||
import org.apache.lucene.index.FieldInfos;
|
import org.apache.lucene.index.FieldInfos;
|
||||||
|
import org.apache.lucene.index.FilterLeafReader;
|
||||||
|
import org.apache.lucene.index.LeafReader;
|
||||||
import org.apache.lucene.index.NumericDocValues;
|
import org.apache.lucene.index.NumericDocValues;
|
||||||
import org.apache.lucene.index.SortedDocValues;
|
import org.apache.lucene.index.SortedDocValues;
|
||||||
import org.apache.lucene.index.SortedSetDocValues;
|
import org.apache.lucene.index.SortedSetDocValues;
|
||||||
import org.apache.lucene.uninverting.UninvertingReader;
|
import org.apache.solr.uninverting.UninvertingReader;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Lucene 5.0 removes "accidental" insanity, so you must explicitly
|
* Lucene 5.0 removes "accidental" insanity, so you must explicitly
|
||||||
|
|
|
@ -18,13 +18,12 @@ package org.apache.solr.search;
|
||||||
|
|
||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
|
|
||||||
import org.apache.lucene.uninverting.UninvertingReader;
|
|
||||||
import org.apache.solr.common.util.NamedList;
|
import org.apache.solr.common.util.NamedList;
|
||||||
import org.apache.solr.common.util.SimpleOrderedMap;
|
import org.apache.solr.common.util.SimpleOrderedMap;
|
||||||
|
import org.apache.solr.core.JmxMonitoredMap.JmxAugmentedSolrInfoMBean;
|
||||||
import org.apache.solr.core.SolrCore;
|
import org.apache.solr.core.SolrCore;
|
||||||
import org.apache.solr.core.SolrInfoMBean;
|
import org.apache.solr.core.SolrInfoMBean;
|
||||||
import org.apache.solr.core.JmxMonitoredMap.JmxAugmentedSolrInfoMBean;
|
import org.apache.solr.uninverting.UninvertingReader;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A SolrInfoMBean that provides introspection of the Solr FieldCache
|
* A SolrInfoMBean that provides introspection of the Solr FieldCache
|
||||||
|
|
|
@ -53,7 +53,6 @@ import org.apache.lucene.index.LeafReaderContext;
|
||||||
import org.apache.lucene.index.MultiPostingsEnum;
|
import org.apache.lucene.index.MultiPostingsEnum;
|
||||||
import org.apache.lucene.index.NumericDocValues;
|
import org.apache.lucene.index.NumericDocValues;
|
||||||
import org.apache.lucene.index.PostingsEnum;
|
import org.apache.lucene.index.PostingsEnum;
|
||||||
import org.apache.lucene.index.SlowCompositeReaderWrapper;
|
|
||||||
import org.apache.lucene.index.SortedDocValues;
|
import org.apache.lucene.index.SortedDocValues;
|
||||||
import org.apache.lucene.index.SortedSetDocValues;
|
import org.apache.lucene.index.SortedSetDocValues;
|
||||||
import org.apache.lucene.index.StoredFieldVisitor;
|
import org.apache.lucene.index.StoredFieldVisitor;
|
||||||
|
@ -94,7 +93,6 @@ import org.apache.lucene.search.TopScoreDocCollector;
|
||||||
import org.apache.lucene.search.TotalHitCountCollector;
|
import org.apache.lucene.search.TotalHitCountCollector;
|
||||||
import org.apache.lucene.search.Weight;
|
import org.apache.lucene.search.Weight;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.uninverting.UninvertingReader;
|
|
||||||
import org.apache.lucene.util.Bits;
|
import org.apache.lucene.util.Bits;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.FixedBitSet;
|
import org.apache.lucene.util.FixedBitSet;
|
||||||
|
@ -109,6 +107,7 @@ import org.apache.solr.core.DirectoryFactory.DirContext;
|
||||||
import org.apache.solr.core.SolrConfig;
|
import org.apache.solr.core.SolrConfig;
|
||||||
import org.apache.solr.core.SolrCore;
|
import org.apache.solr.core.SolrCore;
|
||||||
import org.apache.solr.core.SolrInfoMBean;
|
import org.apache.solr.core.SolrInfoMBean;
|
||||||
|
import org.apache.solr.index.SlowCompositeReaderWrapper;
|
||||||
import org.apache.solr.request.LocalSolrQueryRequest;
|
import org.apache.solr.request.LocalSolrQueryRequest;
|
||||||
import org.apache.solr.request.SolrQueryRequest;
|
import org.apache.solr.request.SolrQueryRequest;
|
||||||
import org.apache.solr.request.SolrRequestInfo;
|
import org.apache.solr.request.SolrRequestInfo;
|
||||||
|
@ -122,6 +121,7 @@ import org.apache.solr.schema.TrieFloatField;
|
||||||
import org.apache.solr.schema.TrieIntField;
|
import org.apache.solr.schema.TrieIntField;
|
||||||
import org.apache.solr.search.facet.UnInvertedField;
|
import org.apache.solr.search.facet.UnInvertedField;
|
||||||
import org.apache.solr.search.stats.StatsSource;
|
import org.apache.solr.search.stats.StatsSource;
|
||||||
|
import org.apache.solr.uninverting.UninvertingReader;
|
||||||
import org.apache.solr.update.IndexFingerprint;
|
import org.apache.solr.update.IndexFingerprint;
|
||||||
import org.apache.solr.update.SolrIndexConfig;
|
import org.apache.solr.update.SolrIndexConfig;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
|
|
|
@ -27,17 +27,16 @@ import java.util.concurrent.atomic.AtomicLong;
|
||||||
|
|
||||||
import org.apache.lucene.index.LeafReader;
|
import org.apache.lucene.index.LeafReader;
|
||||||
import org.apache.lucene.index.LeafReaderContext;
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
import org.apache.lucene.index.SlowCompositeReaderWrapper;
|
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.index.TermsEnum;
|
import org.apache.lucene.index.TermsEnum;
|
||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
import org.apache.lucene.search.TermQuery;
|
import org.apache.lucene.search.TermQuery;
|
||||||
import org.apache.lucene.uninverting.DocTermOrds;
|
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.CharsRefBuilder;
|
import org.apache.lucene.util.CharsRefBuilder;
|
||||||
import org.apache.lucene.util.FixedBitSet;
|
import org.apache.lucene.util.FixedBitSet;
|
||||||
import org.apache.solr.common.SolrException;
|
import org.apache.solr.common.SolrException;
|
||||||
import org.apache.solr.core.SolrCore;
|
import org.apache.solr.core.SolrCore;
|
||||||
|
import org.apache.solr.index.SlowCompositeReaderWrapper;
|
||||||
import org.apache.solr.schema.FieldType;
|
import org.apache.solr.schema.FieldType;
|
||||||
import org.apache.solr.schema.TrieField;
|
import org.apache.solr.schema.TrieField;
|
||||||
import org.apache.solr.search.BitDocSet;
|
import org.apache.solr.search.BitDocSet;
|
||||||
|
@ -45,6 +44,7 @@ import org.apache.solr.search.DocIterator;
|
||||||
import org.apache.solr.search.DocSet;
|
import org.apache.solr.search.DocSet;
|
||||||
import org.apache.solr.search.SolrCache;
|
import org.apache.solr.search.SolrCache;
|
||||||
import org.apache.solr.search.SolrIndexSearcher;
|
import org.apache.solr.search.SolrIndexSearcher;
|
||||||
|
import org.apache.solr.uninverting.DocTermOrds;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
|
|
@ -20,13 +20,12 @@ import java.io.IOException;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
import org.apache.lucene.index.LeafReader;
|
|
||||||
import org.apache.lucene.index.LeafReaderContext;
|
|
||||||
import org.apache.lucene.index.DocValues;
|
import org.apache.lucene.index.DocValues;
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.index.LeafReader;
|
||||||
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
import org.apache.lucene.index.MultiReader;
|
import org.apache.lucene.index.MultiReader;
|
||||||
import org.apache.lucene.index.ReaderUtil;
|
import org.apache.lucene.index.ReaderUtil;
|
||||||
import org.apache.lucene.index.SlowCompositeReaderWrapper;
|
|
||||||
import org.apache.lucene.index.SortedDocValues;
|
import org.apache.lucene.index.SortedDocValues;
|
||||||
import org.apache.lucene.queries.function.FunctionValues;
|
import org.apache.lucene.queries.function.FunctionValues;
|
||||||
import org.apache.lucene.queries.function.ValueSource;
|
import org.apache.lucene.queries.function.ValueSource;
|
||||||
|
@ -34,6 +33,7 @@ import org.apache.lucene.queries.function.docvalues.IntDocValues;
|
||||||
import org.apache.lucene.search.SortedSetSelector;
|
import org.apache.lucene.search.SortedSetSelector;
|
||||||
import org.apache.lucene.util.mutable.MutableValue;
|
import org.apache.lucene.util.mutable.MutableValue;
|
||||||
import org.apache.lucene.util.mutable.MutableValueInt;
|
import org.apache.lucene.util.mutable.MutableValueInt;
|
||||||
|
import org.apache.solr.index.SlowCompositeReaderWrapper;
|
||||||
import org.apache.solr.schema.SchemaField;
|
import org.apache.solr.schema.SchemaField;
|
||||||
import org.apache.solr.search.Insanity;
|
import org.apache.solr.search.Insanity;
|
||||||
import org.apache.solr.search.SolrIndexSearcher;
|
import org.apache.solr.search.SolrIndexSearcher;
|
||||||
|
|
|
@ -20,18 +20,18 @@ import java.io.IOException;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
import org.apache.lucene.index.LeafReader;
|
|
||||||
import org.apache.lucene.index.LeafReaderContext;
|
|
||||||
import org.apache.lucene.index.DocValues;
|
import org.apache.lucene.index.DocValues;
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.index.LeafReader;
|
||||||
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
import org.apache.lucene.index.MultiReader;
|
import org.apache.lucene.index.MultiReader;
|
||||||
import org.apache.lucene.index.ReaderUtil;
|
import org.apache.lucene.index.ReaderUtil;
|
||||||
import org.apache.lucene.index.SlowCompositeReaderWrapper;
|
|
||||||
import org.apache.lucene.index.SortedDocValues;
|
import org.apache.lucene.index.SortedDocValues;
|
||||||
import org.apache.lucene.queries.function.FunctionValues;
|
import org.apache.lucene.queries.function.FunctionValues;
|
||||||
import org.apache.lucene.queries.function.ValueSource;
|
import org.apache.lucene.queries.function.ValueSource;
|
||||||
import org.apache.lucene.queries.function.docvalues.IntDocValues;
|
import org.apache.lucene.queries.function.docvalues.IntDocValues;
|
||||||
import org.apache.lucene.search.SortedSetSelector;
|
import org.apache.lucene.search.SortedSetSelector;
|
||||||
|
import org.apache.solr.index.SlowCompositeReaderWrapper;
|
||||||
import org.apache.solr.schema.SchemaField;
|
import org.apache.solr.schema.SchemaField;
|
||||||
import org.apache.solr.search.Insanity;
|
import org.apache.solr.search.Insanity;
|
||||||
import org.apache.solr.search.SolrIndexSearcher;
|
import org.apache.solr.search.SolrIndexSearcher;
|
||||||
|
|
|
@ -25,7 +25,6 @@ import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
import org.apache.lucene.search.join.JoinUtil;
|
import org.apache.lucene.search.join.JoinUtil;
|
||||||
import org.apache.lucene.search.join.ScoreMode;
|
import org.apache.lucene.search.join.ScoreMode;
|
||||||
import org.apache.lucene.uninverting.UninvertingReader;
|
|
||||||
import org.apache.solr.cloud.ZkController;
|
import org.apache.solr.cloud.ZkController;
|
||||||
import org.apache.solr.common.SolrException;
|
import org.apache.solr.common.SolrException;
|
||||||
import org.apache.solr.common.cloud.Aliases;
|
import org.apache.solr.common.cloud.Aliases;
|
||||||
|
@ -45,6 +44,7 @@ import org.apache.solr.search.QParser;
|
||||||
import org.apache.solr.search.QParserPlugin;
|
import org.apache.solr.search.QParserPlugin;
|
||||||
import org.apache.solr.search.SolrIndexSearcher;
|
import org.apache.solr.search.SolrIndexSearcher;
|
||||||
import org.apache.solr.search.SyntaxError;
|
import org.apache.solr.search.SyntaxError;
|
||||||
|
import org.apache.solr.uninverting.UninvertingReader;
|
||||||
import org.apache.solr.util.RefCounted;
|
import org.apache.solr.util.RefCounted;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -0,0 +1,887 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.solr.uninverting;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
|
||||||
|
import org.apache.lucene.codecs.PostingsFormat; // javadocs
|
||||||
|
import org.apache.lucene.index.DocValues;
|
||||||
|
import org.apache.lucene.index.DocValuesType;
|
||||||
|
import org.apache.lucene.index.FieldInfo;
|
||||||
|
import org.apache.lucene.index.LeafReader;
|
||||||
|
import org.apache.lucene.index.PostingsEnum;
|
||||||
|
import org.apache.lucene.index.SortedSetDocValues;
|
||||||
|
import org.apache.lucene.index.Terms;
|
||||||
|
import org.apache.lucene.index.TermsEnum;
|
||||||
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
|
import org.apache.lucene.util.Accountable;
|
||||||
|
import org.apache.lucene.util.Bits;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.PagedBytes;
|
||||||
|
import org.apache.lucene.util.StringHelper;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This class enables fast access to multiple term ords for
|
||||||
|
* a specified field across all docIDs.
|
||||||
|
*
|
||||||
|
* Like FieldCache, it uninverts the index and holds a
|
||||||
|
* packed data structure in RAM to enable fast access.
|
||||||
|
* Unlike FieldCache, it can handle multi-valued fields,
|
||||||
|
* and, it does not hold the term bytes in RAM. Rather, you
|
||||||
|
* must obtain a TermsEnum from the {@link #getOrdTermsEnum}
|
||||||
|
* method, and then seek-by-ord to get the term's bytes.
|
||||||
|
*
|
||||||
|
* While normally term ords are type long, in this API they are
|
||||||
|
* int as the internal representation here cannot address
|
||||||
|
* more than MAX_INT unique terms. Also, typically this
|
||||||
|
* class is used on fields with relatively few unique terms
|
||||||
|
* vs the number of documents. In addition, there is an
|
||||||
|
* internal limit (16 MB) on how many bytes each chunk of
|
||||||
|
* documents may consume. If you trip this limit you'll hit
|
||||||
|
* an IllegalStateException.
|
||||||
|
*
|
||||||
|
* Deleted documents are skipped during uninversion, and if
|
||||||
|
* you look them up you'll get 0 ords.
|
||||||
|
*
|
||||||
|
* The returned per-document ords do not retain their
|
||||||
|
* original order in the document. Instead they are returned
|
||||||
|
* in sorted (by ord, ie term's BytesRef comparator) order. They
|
||||||
|
* are also de-dup'd (ie if doc has same term more than once
|
||||||
|
* in this field, you'll only get that ord back once).
|
||||||
|
*
|
||||||
|
* This class
|
||||||
|
* will create its own term index internally, allowing to
|
||||||
|
* create a wrapped TermsEnum that can handle ord. The
|
||||||
|
* {@link #getOrdTermsEnum} method then provides this
|
||||||
|
* wrapped enum.
|
||||||
|
*
|
||||||
|
* The RAM consumption of this class can be high!
|
||||||
|
*
|
||||||
|
* @lucene.experimental
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Final form of the un-inverted field:
|
||||||
|
* Each document points to a list of term numbers that are contained in that document.
|
||||||
|
*
|
||||||
|
* Term numbers are in sorted order, and are encoded as variable-length deltas from the
|
||||||
|
* previous term number. Real term numbers start at 2 since 0 and 1 are reserved. A
|
||||||
|
* term number of 0 signals the end of the termNumber list.
|
||||||
|
*
|
||||||
|
* There is a single int[maxDoc()] which either contains a pointer into a byte[] for
|
||||||
|
* the termNumber lists, or directly contains the termNumber list if it fits in the 4
|
||||||
|
* bytes of an integer. If the first byte in the integer is 1, the next 3 bytes
|
||||||
|
* are a pointer into a byte[] where the termNumber list starts.
|
||||||
|
*
|
||||||
|
* There are actually 256 byte arrays, to compensate for the fact that the pointers
|
||||||
|
* into the byte arrays are only 3 bytes long. The correct byte array for a document
|
||||||
|
* is a function of its id.
|
||||||
|
*
|
||||||
|
* To save space and speed up faceting, any term that matches enough documents will
|
||||||
|
* not be un-inverted... it will be skipped while building the un-inverted field structure,
|
||||||
|
* and will use a set intersection method during faceting.
|
||||||
|
*
|
||||||
|
* To further save memory, the terms (the actual string values) are not all stored in
|
||||||
|
* memory, but a TermIndex is used to convert term numbers to term values only
|
||||||
|
* for the terms needed after faceting has completed. Only every 128th term value
|
||||||
|
* is stored, along with its corresponding term number, and this is used as an
|
||||||
|
* index to find the closest term and iterate until the desired number is hit (very
|
||||||
|
* much like Lucene's own internal term index).
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
public class DocTermOrds implements Accountable {
|
||||||
|
|
||||||
|
// Term ords are shifted by this, internally, to reserve
|
||||||
|
// values 0 (end term) and 1 (index is a pointer into byte array)
|
||||||
|
private final static int TNUM_OFFSET = 2;
|
||||||
|
|
||||||
|
/** Every 128th term is indexed, by default. */
|
||||||
|
public final static int DEFAULT_INDEX_INTERVAL_BITS = 7; // decrease to a low number like 2 for testing
|
||||||
|
|
||||||
|
private int indexIntervalBits;
|
||||||
|
private int indexIntervalMask;
|
||||||
|
private int indexInterval;
|
||||||
|
|
||||||
|
/** Don't uninvert terms that exceed this count. */
|
||||||
|
protected final int maxTermDocFreq;
|
||||||
|
|
||||||
|
/** Field we are uninverting. */
|
||||||
|
protected final String field;
|
||||||
|
|
||||||
|
/** Number of terms in the field. */
|
||||||
|
protected int numTermsInField;
|
||||||
|
|
||||||
|
/** Total number of references to term numbers. */
|
||||||
|
protected long termInstances;
|
||||||
|
private long memsz;
|
||||||
|
|
||||||
|
/** Total time to uninvert the field. */
|
||||||
|
protected int total_time;
|
||||||
|
|
||||||
|
/** Time for phase1 of the uninvert process. */
|
||||||
|
protected int phase1_time;
|
||||||
|
|
||||||
|
/** Holds the per-document ords or a pointer to the ords. */
|
||||||
|
protected int[] index;
|
||||||
|
|
||||||
|
/** Holds term ords for documents. */
|
||||||
|
protected byte[][] tnums = new byte[256][];
|
||||||
|
|
||||||
|
/** Total bytes (sum of term lengths) for all indexed terms.*/
|
||||||
|
protected long sizeOfIndexedStrings;
|
||||||
|
|
||||||
|
/** Holds the indexed (by default every 128th) terms. */
|
||||||
|
protected BytesRef[] indexedTermsArray = new BytesRef[0];
|
||||||
|
|
||||||
|
/** If non-null, only terms matching this prefix were
|
||||||
|
* indexed. */
|
||||||
|
protected BytesRef prefix;
|
||||||
|
|
||||||
|
/** Ordinal of the first term in the field, or 0 if the
|
||||||
|
* {@link PostingsFormat} does not implement {@link
|
||||||
|
* TermsEnum#ord}. */
|
||||||
|
protected int ordBase;
|
||||||
|
|
||||||
|
/** Used while uninverting. */
|
||||||
|
protected PostingsEnum postingsEnum;
|
||||||
|
|
||||||
|
/** If true, check and throw an exception if the field has docValues enabled.
|
||||||
|
* Normally, docValues should be used in preference to DocTermOrds. */
|
||||||
|
protected boolean checkForDocValues = true;
|
||||||
|
|
||||||
|
/** Returns total bytes used. */
|
||||||
|
public long ramBytesUsed() {
|
||||||
|
// can cache the mem size since it shouldn't change
|
||||||
|
if (memsz!=0) return memsz;
|
||||||
|
long sz = 8*8 + 32; // local fields
|
||||||
|
if (index != null) sz += index.length * 4;
|
||||||
|
if (tnums!=null) {
|
||||||
|
for (byte[] arr : tnums)
|
||||||
|
if (arr != null) sz += arr.length;
|
||||||
|
}
|
||||||
|
memsz = sz;
|
||||||
|
return sz;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Inverts all terms */
|
||||||
|
public DocTermOrds(LeafReader reader, Bits liveDocs, String field) throws IOException {
|
||||||
|
this(reader, liveDocs, field, null, Integer.MAX_VALUE);
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: instead of all these ctors and options, take termsenum!
|
||||||
|
|
||||||
|
/** Inverts only terms starting w/ prefix */
|
||||||
|
public DocTermOrds(LeafReader reader, Bits liveDocs, String field, BytesRef termPrefix) throws IOException {
|
||||||
|
this(reader, liveDocs, field, termPrefix, Integer.MAX_VALUE);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Inverts only terms starting w/ prefix, and only terms
|
||||||
|
* whose docFreq (not taking deletions into account) is
|
||||||
|
* <= maxTermDocFreq */
|
||||||
|
public DocTermOrds(LeafReader reader, Bits liveDocs, String field, BytesRef termPrefix, int maxTermDocFreq) throws IOException {
|
||||||
|
this(reader, liveDocs, field, termPrefix, maxTermDocFreq, DEFAULT_INDEX_INTERVAL_BITS);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Inverts only terms starting w/ prefix, and only terms
|
||||||
|
* whose docFreq (not taking deletions into account) is
|
||||||
|
* <= maxTermDocFreq, with a custom indexing interval
|
||||||
|
* (default is every 128nd term). */
|
||||||
|
public DocTermOrds(LeafReader reader, Bits liveDocs, String field, BytesRef termPrefix, int maxTermDocFreq, int indexIntervalBits) throws IOException {
|
||||||
|
this(field, maxTermDocFreq, indexIntervalBits);
|
||||||
|
uninvert(reader, liveDocs, termPrefix);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Subclass inits w/ this, but be sure you then call
|
||||||
|
* uninvert, only once */
|
||||||
|
protected DocTermOrds(String field, int maxTermDocFreq, int indexIntervalBits) {
|
||||||
|
//System.out.println("DTO init field=" + field + " maxTDFreq=" + maxTermDocFreq);
|
||||||
|
this.field = field;
|
||||||
|
this.maxTermDocFreq = maxTermDocFreq;
|
||||||
|
this.indexIntervalBits = indexIntervalBits;
|
||||||
|
indexIntervalMask = 0xffffffff >>> (32-indexIntervalBits);
|
||||||
|
indexInterval = 1 << indexIntervalBits;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a TermsEnum that implements ord, or null if no terms in field.
|
||||||
|
* <p>
|
||||||
|
* we build a "private" terms
|
||||||
|
* index internally (WARNING: consumes RAM) and use that
|
||||||
|
* index to implement ord. This also enables ord on top
|
||||||
|
* of a composite reader. The returned TermsEnum is
|
||||||
|
* unpositioned. This returns null if there are no terms.
|
||||||
|
* </p>
|
||||||
|
* <p><b>NOTE</b>: you must pass the same reader that was
|
||||||
|
* used when creating this class
|
||||||
|
*/
|
||||||
|
public TermsEnum getOrdTermsEnum(LeafReader reader) throws IOException {
|
||||||
|
// NOTE: see LUCENE-6529 before attempting to optimize this method to
|
||||||
|
// return a TermsEnum directly from the reader if it already supports ord().
|
||||||
|
|
||||||
|
assert null != indexedTermsArray;
|
||||||
|
|
||||||
|
if (0 == indexedTermsArray.length) {
|
||||||
|
return null;
|
||||||
|
} else {
|
||||||
|
return new OrdWrappedTermsEnum(reader);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the number of terms in this field
|
||||||
|
*/
|
||||||
|
public int numTerms() {
|
||||||
|
return numTermsInField;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns {@code true} if no terms were indexed.
|
||||||
|
*/
|
||||||
|
public boolean isEmpty() {
|
||||||
|
return index == null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Subclass can override this */
|
||||||
|
protected void visitTerm(TermsEnum te, int termNum) throws IOException {
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Invoked during {@link #uninvert(org.apache.lucene.index.LeafReader,Bits,BytesRef)}
|
||||||
|
* to record the document frequency for each uninverted
|
||||||
|
* term. */
|
||||||
|
protected void setActualDocFreq(int termNum, int df) throws IOException {
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Call this only once (if you subclass!) */
|
||||||
|
protected void uninvert(final LeafReader reader, Bits liveDocs, final BytesRef termPrefix) throws IOException {
|
||||||
|
final FieldInfo info = reader.getFieldInfos().fieldInfo(field);
|
||||||
|
if (checkForDocValues && info != null && info.getDocValuesType() != DocValuesType.NONE) {
|
||||||
|
throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType());
|
||||||
|
}
|
||||||
|
//System.out.println("DTO uninvert field=" + field + " prefix=" + termPrefix);
|
||||||
|
final long startTime = System.nanoTime();
|
||||||
|
prefix = termPrefix == null ? null : BytesRef.deepCopyOf(termPrefix);
|
||||||
|
|
||||||
|
final int maxDoc = reader.maxDoc();
|
||||||
|
final int[] index = new int[maxDoc]; // immediate term numbers, or the index into the byte[] representing the last number
|
||||||
|
final int[] lastTerm = new int[maxDoc]; // last term we saw for this document
|
||||||
|
final byte[][] bytes = new byte[maxDoc][]; // list of term numbers for the doc (delta encoded vInts)
|
||||||
|
|
||||||
|
final Terms terms = reader.terms(field);
|
||||||
|
if (terms == null) {
|
||||||
|
// No terms
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
final TermsEnum te = terms.iterator();
|
||||||
|
final BytesRef seekStart = termPrefix != null ? termPrefix : new BytesRef();
|
||||||
|
//System.out.println("seekStart=" + seekStart.utf8ToString());
|
||||||
|
if (te.seekCeil(seekStart) == TermsEnum.SeekStatus.END) {
|
||||||
|
// No terms match
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// For our "term index wrapper"
|
||||||
|
final List<BytesRef> indexedTerms = new ArrayList<>();
|
||||||
|
final PagedBytes indexedTermsBytes = new PagedBytes(15);
|
||||||
|
|
||||||
|
// we need a minimum of 9 bytes, but round up to 12 since the space would
|
||||||
|
// be wasted with most allocators anyway.
|
||||||
|
byte[] tempArr = new byte[12];
|
||||||
|
|
||||||
|
//
|
||||||
|
// enumerate all terms, and build an intermediate form of the un-inverted field.
|
||||||
|
//
|
||||||
|
// During this intermediate form, every document has a (potential) byte[]
|
||||||
|
// and the int[maxDoc()] array either contains the termNumber list directly
|
||||||
|
// or the *end* offset of the termNumber list in its byte array (for faster
|
||||||
|
// appending and faster creation of the final form).
|
||||||
|
//
|
||||||
|
// idea... if things are too large while building, we could do a range of docs
|
||||||
|
// at a time (but it would be a fair amount slower to build)
|
||||||
|
// could also do ranges in parallel to take advantage of multiple CPUs
|
||||||
|
|
||||||
|
// OPTIONAL: remap the largest df terms to the lowest 128 (single byte)
|
||||||
|
// values. This requires going over the field first to find the most
|
||||||
|
// frequent terms ahead of time.
|
||||||
|
|
||||||
|
int termNum = 0;
|
||||||
|
postingsEnum = null;
|
||||||
|
|
||||||
|
// Loop begins with te positioned to first term (we call
|
||||||
|
// seek above):
|
||||||
|
for (;;) {
|
||||||
|
final BytesRef t = te.term();
|
||||||
|
if (t == null || (termPrefix != null && !StringHelper.startsWith(t, termPrefix))) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
//System.out.println("visit term=" + t.utf8ToString() + " " + t + " termNum=" + termNum);
|
||||||
|
|
||||||
|
visitTerm(te, termNum);
|
||||||
|
|
||||||
|
if ((termNum & indexIntervalMask) == 0) {
|
||||||
|
// Index this term
|
||||||
|
sizeOfIndexedStrings += t.length;
|
||||||
|
BytesRef indexedTerm = new BytesRef();
|
||||||
|
indexedTermsBytes.copy(t, indexedTerm);
|
||||||
|
// TODO: really should 1) strip off useless suffix,
|
||||||
|
// and 2) use FST not array/PagedBytes
|
||||||
|
indexedTerms.add(indexedTerm);
|
||||||
|
}
|
||||||
|
|
||||||
|
final int df = te.docFreq();
|
||||||
|
if (df <= maxTermDocFreq) {
|
||||||
|
|
||||||
|
postingsEnum = te.postings(postingsEnum, PostingsEnum.NONE);
|
||||||
|
|
||||||
|
// dF, but takes deletions into account
|
||||||
|
int actualDF = 0;
|
||||||
|
|
||||||
|
for (;;) {
|
||||||
|
int doc = postingsEnum.nextDoc();
|
||||||
|
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
//System.out.println(" chunk=" + chunk + " docs");
|
||||||
|
|
||||||
|
actualDF ++;
|
||||||
|
termInstances++;
|
||||||
|
|
||||||
|
//System.out.println(" docID=" + doc);
|
||||||
|
// add TNUM_OFFSET to the term number to make room for special reserved values:
|
||||||
|
// 0 (end term) and 1 (index into byte array follows)
|
||||||
|
int delta = termNum - lastTerm[doc] + TNUM_OFFSET;
|
||||||
|
lastTerm[doc] = termNum;
|
||||||
|
int val = index[doc];
|
||||||
|
|
||||||
|
if ((val & 0xff)==1) {
|
||||||
|
// index into byte array (actually the end of
|
||||||
|
// the doc-specific byte[] when building)
|
||||||
|
int pos = val >>> 8;
|
||||||
|
int ilen = vIntSize(delta);
|
||||||
|
byte[] arr = bytes[doc];
|
||||||
|
int newend = pos+ilen;
|
||||||
|
if (newend > arr.length) {
|
||||||
|
// We avoid a doubling strategy to lower memory usage.
|
||||||
|
// this faceting method isn't for docs with many terms.
|
||||||
|
// In hotspot, objects have 2 words of overhead, then fields, rounded up to a 64-bit boundary.
|
||||||
|
// TODO: figure out what array lengths we can round up to w/o actually using more memory
|
||||||
|
// (how much space does a byte[] take up? Is data preceded by a 32 bit length only?
|
||||||
|
// It should be safe to round up to the nearest 32 bits in any case.
|
||||||
|
int newLen = (newend + 3) & 0xfffffffc; // 4 byte alignment
|
||||||
|
byte[] newarr = new byte[newLen];
|
||||||
|
System.arraycopy(arr, 0, newarr, 0, pos);
|
||||||
|
arr = newarr;
|
||||||
|
bytes[doc] = newarr;
|
||||||
|
}
|
||||||
|
pos = writeInt(delta, arr, pos);
|
||||||
|
index[doc] = (pos<<8) | 1; // update pointer to end index in byte[]
|
||||||
|
} else {
|
||||||
|
// OK, this int has data in it... find the end (a zero starting byte - not
|
||||||
|
// part of another number, hence not following a byte with the high bit set).
|
||||||
|
int ipos;
|
||||||
|
if (val==0) {
|
||||||
|
ipos=0;
|
||||||
|
} else if ((val & 0x0000ff80)==0) {
|
||||||
|
ipos=1;
|
||||||
|
} else if ((val & 0x00ff8000)==0) {
|
||||||
|
ipos=2;
|
||||||
|
} else if ((val & 0xff800000)==0) {
|
||||||
|
ipos=3;
|
||||||
|
} else {
|
||||||
|
ipos=4;
|
||||||
|
}
|
||||||
|
|
||||||
|
//System.out.println(" ipos=" + ipos);
|
||||||
|
|
||||||
|
int endPos = writeInt(delta, tempArr, ipos);
|
||||||
|
//System.out.println(" endpos=" + endPos);
|
||||||
|
if (endPos <= 4) {
|
||||||
|
//System.out.println(" fits!");
|
||||||
|
// value will fit in the integer... move bytes back
|
||||||
|
for (int j=ipos; j<endPos; j++) {
|
||||||
|
val |= (tempArr[j] & 0xff) << (j<<3);
|
||||||
|
}
|
||||||
|
index[doc] = val;
|
||||||
|
} else {
|
||||||
|
// value won't fit... move integer into byte[]
|
||||||
|
for (int j=0; j<ipos; j++) {
|
||||||
|
tempArr[j] = (byte)val;
|
||||||
|
val >>>=8;
|
||||||
|
}
|
||||||
|
// point at the end index in the byte[]
|
||||||
|
index[doc] = (endPos<<8) | 1;
|
||||||
|
bytes[doc] = tempArr;
|
||||||
|
tempArr = new byte[12];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
setActualDocFreq(termNum, actualDF);
|
||||||
|
}
|
||||||
|
|
||||||
|
termNum++;
|
||||||
|
if (te.next() == null) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
numTermsInField = termNum;
|
||||||
|
|
||||||
|
long midPoint = System.nanoTime();
|
||||||
|
|
||||||
|
if (termInstances == 0) {
|
||||||
|
// we didn't invert anything
|
||||||
|
// lower memory consumption.
|
||||||
|
tnums = null;
|
||||||
|
} else {
|
||||||
|
|
||||||
|
this.index = index;
|
||||||
|
|
||||||
|
//
|
||||||
|
// transform intermediate form into the final form, building a single byte[]
|
||||||
|
// at a time, and releasing the intermediate byte[]s as we go to avoid
|
||||||
|
// increasing the memory footprint.
|
||||||
|
//
|
||||||
|
|
||||||
|
for (int pass = 0; pass<256; pass++) {
|
||||||
|
byte[] target = tnums[pass];
|
||||||
|
int pos=0; // end in target;
|
||||||
|
if (target != null) {
|
||||||
|
pos = target.length;
|
||||||
|
} else {
|
||||||
|
target = new byte[4096];
|
||||||
|
}
|
||||||
|
|
||||||
|
// loop over documents, 0x00ppxxxx, 0x01ppxxxx, 0x02ppxxxx
|
||||||
|
// where pp is the pass (which array we are building), and xx is all values.
|
||||||
|
// each pass shares the same byte[] for termNumber lists.
|
||||||
|
for (int docbase = pass<<16; docbase<maxDoc; docbase+=(1<<24)) {
|
||||||
|
int lim = Math.min(docbase + (1<<16), maxDoc);
|
||||||
|
for (int doc=docbase; doc<lim; doc++) {
|
||||||
|
//System.out.println(" pass=" + pass + " process docID=" + doc);
|
||||||
|
int val = index[doc];
|
||||||
|
if ((val&0xff) == 1) {
|
||||||
|
int len = val >>> 8;
|
||||||
|
//System.out.println(" ptr pos=" + pos);
|
||||||
|
index[doc] = (pos<<8)|1; // change index to point to start of array
|
||||||
|
if ((pos & 0xff000000) != 0) {
|
||||||
|
// we only have 24 bits for the array index
|
||||||
|
throw new IllegalStateException("Too many values for UnInvertedField faceting on field "+field);
|
||||||
|
}
|
||||||
|
byte[] arr = bytes[doc];
|
||||||
|
/*
|
||||||
|
for(byte b : arr) {
|
||||||
|
//System.out.println(" b=" + Integer.toHexString((int) b));
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
bytes[doc] = null; // IMPORTANT: allow GC to avoid OOM
|
||||||
|
if (target.length <= pos + len) {
|
||||||
|
int newlen = target.length;
|
||||||
|
/*** we don't have to worry about the array getting too large
|
||||||
|
* since the "pos" param will overflow first (only 24 bits available)
|
||||||
|
if ((newlen<<1) <= 0) {
|
||||||
|
// overflow...
|
||||||
|
newlen = Integer.MAX_VALUE;
|
||||||
|
if (newlen <= pos + len) {
|
||||||
|
throw new SolrException(400,"Too many terms to uninvert field!");
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
while (newlen <= pos + len) newlen<<=1; // doubling strategy
|
||||||
|
}
|
||||||
|
****/
|
||||||
|
while (newlen <= pos + len) newlen<<=1; // doubling strategy
|
||||||
|
byte[] newtarget = new byte[newlen];
|
||||||
|
System.arraycopy(target, 0, newtarget, 0, pos);
|
||||||
|
target = newtarget;
|
||||||
|
}
|
||||||
|
System.arraycopy(arr, 0, target, pos, len);
|
||||||
|
pos += len + 1; // skip single byte at end and leave it 0 for terminator
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// shrink array
|
||||||
|
if (pos < target.length) {
|
||||||
|
byte[] newtarget = new byte[pos];
|
||||||
|
System.arraycopy(target, 0, newtarget, 0, pos);
|
||||||
|
target = newtarget;
|
||||||
|
}
|
||||||
|
|
||||||
|
tnums[pass] = target;
|
||||||
|
|
||||||
|
if ((pass << 16) > maxDoc)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
indexedTermsArray = indexedTerms.toArray(new BytesRef[indexedTerms.size()]);
|
||||||
|
|
||||||
|
long endTime = System.nanoTime();
|
||||||
|
|
||||||
|
total_time = (int) TimeUnit.MILLISECONDS.convert(endTime-startTime, TimeUnit.NANOSECONDS);
|
||||||
|
phase1_time = (int) TimeUnit.MILLISECONDS.convert(midPoint-startTime, TimeUnit.NANOSECONDS);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Number of bytes to represent an unsigned int as a vint. */
|
||||||
|
private static int vIntSize(int x) {
|
||||||
|
if ((x & (0xffffffff << (7*1))) == 0 ) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
if ((x & (0xffffffff << (7*2))) == 0 ) {
|
||||||
|
return 2;
|
||||||
|
}
|
||||||
|
if ((x & (0xffffffff << (7*3))) == 0 ) {
|
||||||
|
return 3;
|
||||||
|
}
|
||||||
|
if ((x & (0xffffffff << (7*4))) == 0 ) {
|
||||||
|
return 4;
|
||||||
|
}
|
||||||
|
return 5;
|
||||||
|
}
|
||||||
|
|
||||||
|
// todo: if we know the size of the vInt already, we could do
|
||||||
|
// a single switch on the size
|
||||||
|
private static int writeInt(int x, byte[] arr, int pos) {
|
||||||
|
int a;
|
||||||
|
a = (x >>> (7*4));
|
||||||
|
if (a != 0) {
|
||||||
|
arr[pos++] = (byte)(a | 0x80);
|
||||||
|
}
|
||||||
|
a = (x >>> (7*3));
|
||||||
|
if (a != 0) {
|
||||||
|
arr[pos++] = (byte)(a | 0x80);
|
||||||
|
}
|
||||||
|
a = (x >>> (7*2));
|
||||||
|
if (a != 0) {
|
||||||
|
arr[pos++] = (byte)(a | 0x80);
|
||||||
|
}
|
||||||
|
a = (x >>> (7*1));
|
||||||
|
if (a != 0) {
|
||||||
|
arr[pos++] = (byte)(a | 0x80);
|
||||||
|
}
|
||||||
|
arr[pos++] = (byte)(x & 0x7f);
|
||||||
|
return pos;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* "wrap" our own terms index around the original IndexReader.
|
||||||
|
* Only valid if there are terms for this field rom the original reader
|
||||||
|
*/
|
||||||
|
private final class OrdWrappedTermsEnum extends TermsEnum {
|
||||||
|
private final TermsEnum termsEnum;
|
||||||
|
private BytesRef term;
|
||||||
|
private long ord = -indexInterval-1; // force "real" seek
|
||||||
|
|
||||||
|
public OrdWrappedTermsEnum(LeafReader reader) throws IOException {
|
||||||
|
assert indexedTermsArray != null;
|
||||||
|
assert 0 != indexedTermsArray.length;
|
||||||
|
termsEnum = reader.fields().terms(field).iterator();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public PostingsEnum postings(PostingsEnum reuse, int flags) throws IOException {
|
||||||
|
return termsEnum.postings(reuse, flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public BytesRef term() {
|
||||||
|
return term;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public BytesRef next() throws IOException {
|
||||||
|
if (++ord < 0) {
|
||||||
|
ord = 0;
|
||||||
|
}
|
||||||
|
if (termsEnum.next() == null) {
|
||||||
|
term = null;
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
return setTerm(); // this is extra work if we know we are in bounds...
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int docFreq() throws IOException {
|
||||||
|
return termsEnum.docFreq();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long totalTermFreq() throws IOException {
|
||||||
|
return termsEnum.totalTermFreq();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long ord() {
|
||||||
|
return ordBase + ord;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public SeekStatus seekCeil(BytesRef target) throws IOException {
|
||||||
|
|
||||||
|
// already here
|
||||||
|
if (term != null && term.equals(target)) {
|
||||||
|
return SeekStatus.FOUND;
|
||||||
|
}
|
||||||
|
|
||||||
|
int startIdx = Arrays.binarySearch(indexedTermsArray, target);
|
||||||
|
|
||||||
|
if (startIdx >= 0) {
|
||||||
|
// we hit the term exactly... lucky us!
|
||||||
|
TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(target);
|
||||||
|
assert seekStatus == TermsEnum.SeekStatus.FOUND;
|
||||||
|
ord = startIdx << indexIntervalBits;
|
||||||
|
setTerm();
|
||||||
|
assert term != null;
|
||||||
|
return SeekStatus.FOUND;
|
||||||
|
}
|
||||||
|
|
||||||
|
// we didn't hit the term exactly
|
||||||
|
startIdx = -startIdx-1;
|
||||||
|
|
||||||
|
if (startIdx == 0) {
|
||||||
|
// our target occurs *before* the first term
|
||||||
|
TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(target);
|
||||||
|
assert seekStatus == TermsEnum.SeekStatus.NOT_FOUND;
|
||||||
|
ord = 0;
|
||||||
|
setTerm();
|
||||||
|
assert term != null;
|
||||||
|
return SeekStatus.NOT_FOUND;
|
||||||
|
}
|
||||||
|
|
||||||
|
// back up to the start of the block
|
||||||
|
startIdx--;
|
||||||
|
|
||||||
|
if ((ord >> indexIntervalBits) == startIdx && term != null && term.compareTo(target) <= 0) {
|
||||||
|
// we are already in the right block and the current term is before the term we want,
|
||||||
|
// so we don't need to seek.
|
||||||
|
} else {
|
||||||
|
// seek to the right block
|
||||||
|
TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(indexedTermsArray[startIdx]);
|
||||||
|
assert seekStatus == TermsEnum.SeekStatus.FOUND;
|
||||||
|
ord = startIdx << indexIntervalBits;
|
||||||
|
setTerm();
|
||||||
|
assert term != null; // should be non-null since it's in the index
|
||||||
|
}
|
||||||
|
|
||||||
|
while (term != null && term.compareTo(target) < 0) {
|
||||||
|
next();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (term == null) {
|
||||||
|
return SeekStatus.END;
|
||||||
|
} else if (term.compareTo(target) == 0) {
|
||||||
|
return SeekStatus.FOUND;
|
||||||
|
} else {
|
||||||
|
return SeekStatus.NOT_FOUND;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void seekExact(long targetOrd) throws IOException {
|
||||||
|
int delta = (int) (targetOrd - ordBase - ord);
|
||||||
|
//System.out.println(" seek(ord) targetOrd=" + targetOrd + " delta=" + delta + " ord=" + ord + " ii=" + indexInterval);
|
||||||
|
if (delta < 0 || delta > indexInterval) {
|
||||||
|
final int idx = (int) (targetOrd >>> indexIntervalBits);
|
||||||
|
final BytesRef base = indexedTermsArray[idx];
|
||||||
|
//System.out.println(" do seek term=" + base.utf8ToString());
|
||||||
|
ord = idx << indexIntervalBits;
|
||||||
|
delta = (int) (targetOrd - ord);
|
||||||
|
final TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(base);
|
||||||
|
assert seekStatus == TermsEnum.SeekStatus.FOUND;
|
||||||
|
} else {
|
||||||
|
//System.out.println("seek w/in block");
|
||||||
|
}
|
||||||
|
|
||||||
|
while (--delta >= 0) {
|
||||||
|
BytesRef br = termsEnum.next();
|
||||||
|
if (br == null) {
|
||||||
|
assert false;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
ord++;
|
||||||
|
}
|
||||||
|
|
||||||
|
setTerm();
|
||||||
|
assert term != null;
|
||||||
|
}
|
||||||
|
|
||||||
|
private BytesRef setTerm() throws IOException {
|
||||||
|
term = termsEnum.term();
|
||||||
|
//System.out.println(" setTerm() term=" + term.utf8ToString() + " vs prefix=" + (prefix == null ? "null" : prefix.utf8ToString()));
|
||||||
|
if (prefix != null && !StringHelper.startsWith(term, prefix)) {
|
||||||
|
term = null;
|
||||||
|
}
|
||||||
|
return term;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Returns the term ({@link BytesRef}) corresponding to
|
||||||
|
* the provided ordinal. */
|
||||||
|
public BytesRef lookupTerm(TermsEnum termsEnum, int ord) throws IOException {
|
||||||
|
termsEnum.seekExact(ord);
|
||||||
|
return termsEnum.term();
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Returns a SortedSetDocValues view of this instance */
|
||||||
|
public SortedSetDocValues iterator(LeafReader reader) throws IOException {
|
||||||
|
if (isEmpty()) {
|
||||||
|
return DocValues.emptySortedSet();
|
||||||
|
} else {
|
||||||
|
return new Iterator(reader);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private class Iterator extends SortedSetDocValues {
|
||||||
|
final LeafReader reader;
|
||||||
|
final TermsEnum te; // used internally for lookupOrd() and lookupTerm()
|
||||||
|
// currently we read 5 at a time (using the logic of the old iterator)
|
||||||
|
final int buffer[] = new int[5];
|
||||||
|
int bufferUpto;
|
||||||
|
int bufferLength;
|
||||||
|
|
||||||
|
private int tnum;
|
||||||
|
private int upto;
|
||||||
|
private byte[] arr;
|
||||||
|
|
||||||
|
Iterator(LeafReader reader) throws IOException {
|
||||||
|
this.reader = reader;
|
||||||
|
this.te = termsEnum();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long nextOrd() {
|
||||||
|
while (bufferUpto == bufferLength) {
|
||||||
|
if (bufferLength < buffer.length) {
|
||||||
|
return NO_MORE_ORDS;
|
||||||
|
} else {
|
||||||
|
bufferLength = read(buffer);
|
||||||
|
bufferUpto = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return buffer[bufferUpto++];
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Buffer must be at least 5 ints long. Returns number
|
||||||
|
* of term ords placed into buffer; if this count is
|
||||||
|
* less than buffer.length then that is the end. */
|
||||||
|
int read(int[] buffer) {
|
||||||
|
int bufferUpto = 0;
|
||||||
|
if (arr == null) {
|
||||||
|
// code is inlined into upto
|
||||||
|
//System.out.println("inlined");
|
||||||
|
int code = upto;
|
||||||
|
int delta = 0;
|
||||||
|
for (;;) {
|
||||||
|
delta = (delta << 7) | (code & 0x7f);
|
||||||
|
if ((code & 0x80)==0) {
|
||||||
|
if (delta==0) break;
|
||||||
|
tnum += delta - TNUM_OFFSET;
|
||||||
|
buffer[bufferUpto++] = ordBase+tnum;
|
||||||
|
//System.out.println(" tnum=" + tnum);
|
||||||
|
delta = 0;
|
||||||
|
}
|
||||||
|
code >>>= 8;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// code is a pointer
|
||||||
|
for(;;) {
|
||||||
|
int delta = 0;
|
||||||
|
for(;;) {
|
||||||
|
byte b = arr[upto++];
|
||||||
|
delta = (delta << 7) | (b & 0x7f);
|
||||||
|
//System.out.println(" cycle: upto=" + upto + " delta=" + delta + " b=" + b);
|
||||||
|
if ((b & 0x80) == 0) break;
|
||||||
|
}
|
||||||
|
//System.out.println(" delta=" + delta);
|
||||||
|
if (delta == 0) break;
|
||||||
|
tnum += delta - TNUM_OFFSET;
|
||||||
|
//System.out.println(" tnum=" + tnum);
|
||||||
|
buffer[bufferUpto++] = ordBase+tnum;
|
||||||
|
if (bufferUpto == buffer.length) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return bufferUpto;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setDocument(int docID) {
|
||||||
|
tnum = 0;
|
||||||
|
final int code = index[docID];
|
||||||
|
if ((code & 0xff)==1) {
|
||||||
|
// a pointer
|
||||||
|
upto = code>>>8;
|
||||||
|
//System.out.println(" pointer! upto=" + upto);
|
||||||
|
int whichArray = (docID >>> 16) & 0xff;
|
||||||
|
arr = tnums[whichArray];
|
||||||
|
} else {
|
||||||
|
//System.out.println(" inline!");
|
||||||
|
arr = null;
|
||||||
|
upto = code;
|
||||||
|
}
|
||||||
|
bufferUpto = 0;
|
||||||
|
bufferLength = read(buffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public BytesRef lookupOrd(long ord) {
|
||||||
|
try {
|
||||||
|
return DocTermOrds.this.lookupTerm(te, (int) ord);
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long getValueCount() {
|
||||||
|
return numTerms();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long lookupTerm(BytesRef key) {
|
||||||
|
try {
|
||||||
|
switch (te.seekCeil(key)) {
|
||||||
|
case FOUND:
|
||||||
|
assert te.ord() >= 0;
|
||||||
|
return te.ord();
|
||||||
|
case NOT_FOUND:
|
||||||
|
assert te.ord() >= 0;
|
||||||
|
return -te.ord()-1;
|
||||||
|
default: /* END */
|
||||||
|
return -numTerms()-1;
|
||||||
|
}
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public TermsEnum termsEnum() {
|
||||||
|
try {
|
||||||
|
return getOrdTermsEnum(reader);
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,466 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.solr.uninverting;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.PrintStream;
|
||||||
|
|
||||||
|
import org.apache.lucene.document.NumericDocValuesField;
|
||||||
|
import org.apache.lucene.index.LeafReader;
|
||||||
|
import org.apache.lucene.index.BinaryDocValues;
|
||||||
|
import org.apache.lucene.index.IndexReader; // javadocs
|
||||||
|
import org.apache.lucene.index.NumericDocValues;
|
||||||
|
import org.apache.lucene.index.SortedDocValues;
|
||||||
|
import org.apache.lucene.index.SortedSetDocValues;
|
||||||
|
import org.apache.lucene.index.Terms;
|
||||||
|
import org.apache.lucene.index.TermsEnum;
|
||||||
|
import org.apache.lucene.util.Accountable;
|
||||||
|
import org.apache.lucene.util.Bits;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.LegacyNumericUtils;
|
||||||
|
import org.apache.lucene.util.NumericUtils;
|
||||||
|
import org.apache.lucene.util.RamUsageEstimator;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Expert: Maintains caches of term values.
|
||||||
|
*
|
||||||
|
* <p>Created: May 19, 2004 11:13:14 AM
|
||||||
|
*
|
||||||
|
* @since lucene 1.4
|
||||||
|
* @see FieldCacheSanityChecker
|
||||||
|
*
|
||||||
|
* @lucene.internal
|
||||||
|
*/
|
||||||
|
interface FieldCache {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Placeholder indicating creation of this cache is currently in-progress.
|
||||||
|
*/
|
||||||
|
public static final class CreationPlaceholder implements Accountable {
|
||||||
|
Accountable value;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long ramBytesUsed() {
|
||||||
|
// don't call on the in-progress value, might make things angry.
|
||||||
|
return RamUsageEstimator.NUM_BYTES_OBJECT_REF;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* interface to all parsers. It is used to parse different numeric types.
|
||||||
|
*/
|
||||||
|
public interface Parser {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Pulls a {@link TermsEnum} from the given {@link Terms}. This method allows certain parsers
|
||||||
|
* to filter the actual TermsEnum before the field cache is filled.
|
||||||
|
*
|
||||||
|
* @param terms the {@link Terms} instance to create the {@link TermsEnum} from.
|
||||||
|
* @return a possibly filtered {@link TermsEnum} instance, this method must not return <code>null</code>.
|
||||||
|
* @throws IOException if an {@link IOException} occurs
|
||||||
|
* @deprecated index with Points instead
|
||||||
|
*/
|
||||||
|
@Deprecated
|
||||||
|
public TermsEnum termsEnum(Terms terms) throws IOException;
|
||||||
|
|
||||||
|
/** Parse's this field's value */
|
||||||
|
public long parseValue(BytesRef term);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Base class for points parsers. These parsers do not use the inverted index, but instead
|
||||||
|
* uninvert point data.
|
||||||
|
*
|
||||||
|
* This abstraction can be cleaned up when Parser.termsEnum is removed.
|
||||||
|
*/
|
||||||
|
public abstract class PointParser implements Parser {
|
||||||
|
public final TermsEnum termsEnum(Terms terms) throws IOException {
|
||||||
|
throw new UnsupportedOperationException("makes no sense for parsing points");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Expert: The cache used internally by sorting and range query classes. */
|
||||||
|
public static FieldCache DEFAULT = new FieldCacheImpl();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A parser instance for int values encoded by {@link org.apache.lucene.util.NumericUtils}, e.g. when indexed
|
||||||
|
* via {@link org.apache.lucene.document.IntPoint}.
|
||||||
|
*/
|
||||||
|
public static final Parser INT_POINT_PARSER = new PointParser() {
|
||||||
|
@Override
|
||||||
|
public long parseValue(BytesRef point) {
|
||||||
|
return NumericUtils.sortableBytesToInt(point.bytes, point.offset);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return FieldCache.class.getName()+".INT_POINT_PARSER";
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A parser instance for long values encoded by {@link org.apache.lucene.util.NumericUtils}, e.g. when indexed
|
||||||
|
* via {@link org.apache.lucene.document.LongPoint}.
|
||||||
|
*/
|
||||||
|
public static final Parser LONG_POINT_PARSER = new PointParser() {
|
||||||
|
@Override
|
||||||
|
public long parseValue(BytesRef point) {
|
||||||
|
return NumericUtils.sortableBytesToLong(point.bytes, point.offset);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return FieldCache.class.getName()+".LONG_POINT_PARSER";
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A parser instance for float values encoded by {@link org.apache.lucene.util.NumericUtils}, e.g. when indexed
|
||||||
|
* via {@link org.apache.lucene.document.FloatPoint}.
|
||||||
|
*/
|
||||||
|
public static final Parser FLOAT_POINT_PARSER = new PointParser() {
|
||||||
|
@Override
|
||||||
|
public long parseValue(BytesRef point) {
|
||||||
|
return NumericUtils.sortableFloatBits(NumericUtils.sortableBytesToInt(point.bytes, point.offset));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return FieldCache.class.getName()+".FLOAT_POINT_PARSER";
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A parser instance for double values encoded by {@link org.apache.lucene.util.NumericUtils}, e.g. when indexed
|
||||||
|
* via {@link org.apache.lucene.document.DoublePoint}.
|
||||||
|
*/
|
||||||
|
public static final Parser DOUBLE_POINT_PARSER = new PointParser() {
|
||||||
|
@Override
|
||||||
|
public long parseValue(BytesRef point) {
|
||||||
|
return NumericUtils.sortableDoubleBits(NumericUtils.sortableBytesToLong(point.bytes, point.offset));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return FieldCache.class.getName()+".DOUBLE_POINT_PARSER";
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A parser instance for int values encoded by {@link org.apache.lucene.util.LegacyNumericUtils}, e.g. when indexed
|
||||||
|
* via {@link org.apache.lucene.document.LegacyIntField}/{@link org.apache.lucene.analysis.LegacyNumericTokenStream}.
|
||||||
|
* @deprecated Index with points and use {@link #INT_POINT_PARSER} instead.
|
||||||
|
*/
|
||||||
|
@Deprecated
|
||||||
|
public static final Parser LEGACY_INT_PARSER = new Parser() {
|
||||||
|
@Override
|
||||||
|
public long parseValue(BytesRef term) {
|
||||||
|
return LegacyNumericUtils.prefixCodedToInt(term);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public TermsEnum termsEnum(Terms terms) throws IOException {
|
||||||
|
return LegacyNumericUtils.filterPrefixCodedInts(terms.iterator());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return FieldCache.class.getName()+".LEGACY_INT_PARSER";
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A parser instance for float values encoded with {@link org.apache.lucene.util.LegacyNumericUtils}, e.g. when indexed
|
||||||
|
* via {@link org.apache.lucene.document.LegacyFloatField}/{@link org.apache.lucene.analysis.LegacyNumericTokenStream}.
|
||||||
|
* @deprecated Index with points and use {@link #FLOAT_POINT_PARSER} instead.
|
||||||
|
*/
|
||||||
|
@Deprecated
|
||||||
|
public static final Parser LEGACY_FLOAT_PARSER = new Parser() {
|
||||||
|
@Override
|
||||||
|
public long parseValue(BytesRef term) {
|
||||||
|
int val = LegacyNumericUtils.prefixCodedToInt(term);
|
||||||
|
if (val<0) val ^= 0x7fffffff;
|
||||||
|
return val;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return FieldCache.class.getName()+".LEGACY_FLOAT_PARSER";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public TermsEnum termsEnum(Terms terms) throws IOException {
|
||||||
|
return LegacyNumericUtils.filterPrefixCodedInts(terms.iterator());
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A parser instance for long values encoded by {@link org.apache.lucene.util.LegacyNumericUtils}, e.g. when indexed
|
||||||
|
* via {@link org.apache.lucene.document.LegacyLongField}/{@link org.apache.lucene.analysis.LegacyNumericTokenStream}.
|
||||||
|
* @deprecated Index with points and use {@link #LONG_POINT_PARSER} instead.
|
||||||
|
*/
|
||||||
|
@Deprecated
|
||||||
|
public static final Parser LEGACY_LONG_PARSER = new Parser() {
|
||||||
|
@Override
|
||||||
|
public long parseValue(BytesRef term) {
|
||||||
|
return LegacyNumericUtils.prefixCodedToLong(term);
|
||||||
|
}
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return FieldCache.class.getName()+".LEGACY_LONG_PARSER";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public TermsEnum termsEnum(Terms terms) throws IOException {
|
||||||
|
return LegacyNumericUtils.filterPrefixCodedLongs(terms.iterator());
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A parser instance for double values encoded with {@link org.apache.lucene.util.LegacyNumericUtils}, e.g. when indexed
|
||||||
|
* via {@link org.apache.lucene.document.LegacyDoubleField}/{@link org.apache.lucene.analysis.LegacyNumericTokenStream}.
|
||||||
|
* @deprecated Index with points and use {@link #DOUBLE_POINT_PARSER} instead.
|
||||||
|
*/
|
||||||
|
@Deprecated
|
||||||
|
public static final Parser LEGACY_DOUBLE_PARSER = new Parser() {
|
||||||
|
@Override
|
||||||
|
public long parseValue(BytesRef term) {
|
||||||
|
long val = LegacyNumericUtils.prefixCodedToLong(term);
|
||||||
|
if (val<0) val ^= 0x7fffffffffffffffL;
|
||||||
|
return val;
|
||||||
|
}
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return FieldCache.class.getName()+".LEGACY_DOUBLE_PARSER";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public TermsEnum termsEnum(Terms terms) throws IOException {
|
||||||
|
return LegacyNumericUtils.filterPrefixCodedLongs(terms.iterator());
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
/** Checks the internal cache for an appropriate entry, and if none is found,
|
||||||
|
* reads the terms/points in <code>field</code> and returns a bit set at the size of
|
||||||
|
* <code>reader.maxDoc()</code>, with turned on bits for each docid that
|
||||||
|
* does have a value for this field.
|
||||||
|
* @param parser May be {@code null} if coming from the inverted index, otherwise
|
||||||
|
* can be a {@link PointParser} to compute from point values.
|
||||||
|
*/
|
||||||
|
public Bits getDocsWithField(LeafReader reader, String field, Parser parser) throws IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a {@link NumericDocValues} over the values found in documents in the given
|
||||||
|
* field. If the field was indexed as {@link NumericDocValuesField}, it simply
|
||||||
|
* uses {@link org.apache.lucene.index.LeafReader#getNumericDocValues(String)} to read the values.
|
||||||
|
* Otherwise, it checks the internal cache for an appropriate entry, and if
|
||||||
|
* none is found, reads the terms/points in <code>field</code> as longs and returns
|
||||||
|
* an array of size <code>reader.maxDoc()</code> of the value each document
|
||||||
|
* has in the given field.
|
||||||
|
*
|
||||||
|
* @param reader
|
||||||
|
* Used to get field values.
|
||||||
|
* @param field
|
||||||
|
* Which field contains the longs.
|
||||||
|
* @param parser
|
||||||
|
* Computes long for string values. May be {@code null} if the
|
||||||
|
* requested field was indexed as {@link NumericDocValuesField} or
|
||||||
|
* {@link org.apache.lucene.document.LegacyLongField}.
|
||||||
|
* @param setDocsWithField
|
||||||
|
* If true then {@link #getDocsWithField} will also be computed and
|
||||||
|
* stored in the FieldCache.
|
||||||
|
* @return The values in the given field for each document.
|
||||||
|
* @throws IOException
|
||||||
|
* If any error occurs.
|
||||||
|
*/
|
||||||
|
public NumericDocValues getNumerics(LeafReader reader, String field, Parser parser, boolean setDocsWithField) throws IOException;
|
||||||
|
|
||||||
|
/** Checks the internal cache for an appropriate entry, and if none
|
||||||
|
* is found, reads the term values in <code>field</code>
|
||||||
|
* and returns a {@link BinaryDocValues} instance, providing a
|
||||||
|
* method to retrieve the term (as a BytesRef) per document.
|
||||||
|
* @param reader Used to get field values.
|
||||||
|
* @param field Which field contains the strings.
|
||||||
|
* @param setDocsWithField If true then {@link #getDocsWithField} will
|
||||||
|
* also be computed and stored in the FieldCache.
|
||||||
|
* @return The values in the given field for each document.
|
||||||
|
* @throws IOException If any error occurs.
|
||||||
|
*/
|
||||||
|
public BinaryDocValues getTerms(LeafReader reader, String field, boolean setDocsWithField) throws IOException;
|
||||||
|
|
||||||
|
/** Expert: just like {@link #getTerms(org.apache.lucene.index.LeafReader,String,boolean)},
|
||||||
|
* but you can specify whether more RAM should be consumed in exchange for
|
||||||
|
* faster lookups (default is "true"). Note that the
|
||||||
|
* first call for a given reader and field "wins",
|
||||||
|
* subsequent calls will share the same cache entry. */
|
||||||
|
public BinaryDocValues getTerms(LeafReader reader, String field, boolean setDocsWithField, float acceptableOverheadRatio) throws IOException;
|
||||||
|
|
||||||
|
/** Checks the internal cache for an appropriate entry, and if none
|
||||||
|
* is found, reads the term values in <code>field</code>
|
||||||
|
* and returns a {@link SortedDocValues} instance,
|
||||||
|
* providing methods to retrieve sort ordinals and terms
|
||||||
|
* (as a ByteRef) per document.
|
||||||
|
* @param reader Used to get field values.
|
||||||
|
* @param field Which field contains the strings.
|
||||||
|
* @return The values in the given field for each document.
|
||||||
|
* @throws IOException If any error occurs.
|
||||||
|
*/
|
||||||
|
public SortedDocValues getTermsIndex(LeafReader reader, String field) throws IOException;
|
||||||
|
|
||||||
|
/** Expert: just like {@link
|
||||||
|
* #getTermsIndex(org.apache.lucene.index.LeafReader,String)}, but you can specify
|
||||||
|
* whether more RAM should be consumed in exchange for
|
||||||
|
* faster lookups (default is "true"). Note that the
|
||||||
|
* first call for a given reader and field "wins",
|
||||||
|
* subsequent calls will share the same cache entry. */
|
||||||
|
public SortedDocValues getTermsIndex(LeafReader reader, String field, float acceptableOverheadRatio) throws IOException;
|
||||||
|
|
||||||
|
/** Can be passed to {@link #getDocTermOrds} to filter for 32-bit numeric terms */
|
||||||
|
public static final BytesRef INT32_TERM_PREFIX = new BytesRef(new byte[] { LegacyNumericUtils.SHIFT_START_INT });
|
||||||
|
/** Can be passed to {@link #getDocTermOrds} to filter for 64-bit numeric terms */
|
||||||
|
public static final BytesRef INT64_TERM_PREFIX = new BytesRef(new byte[] { LegacyNumericUtils.SHIFT_START_LONG });
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks the internal cache for an appropriate entry, and if none is found, reads the term values
|
||||||
|
* in <code>field</code> and returns a {@link DocTermOrds} instance, providing a method to retrieve
|
||||||
|
* the terms (as ords) per document.
|
||||||
|
*
|
||||||
|
* @param reader Used to build a {@link DocTermOrds} instance
|
||||||
|
* @param field Which field contains the strings.
|
||||||
|
* @param prefix prefix for a subset of the terms which should be uninverted. Can be null or
|
||||||
|
* {@link #INT32_TERM_PREFIX} or {@link #INT64_TERM_PREFIX}
|
||||||
|
*
|
||||||
|
* @return a {@link DocTermOrds} instance
|
||||||
|
* @throws IOException If any error occurs.
|
||||||
|
*/
|
||||||
|
public SortedSetDocValues getDocTermOrds(LeafReader reader, String field, BytesRef prefix) throws IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* EXPERT: A unique Identifier/Description for each item in the FieldCache.
|
||||||
|
* Can be useful for logging/debugging.
|
||||||
|
* @lucene.experimental
|
||||||
|
*/
|
||||||
|
public final class CacheEntry {
|
||||||
|
|
||||||
|
private final Object readerKey;
|
||||||
|
private final String fieldName;
|
||||||
|
private final Class<?> cacheType;
|
||||||
|
private final Object custom;
|
||||||
|
private final Accountable value;
|
||||||
|
|
||||||
|
public CacheEntry(Object readerKey, String fieldName,
|
||||||
|
Class<?> cacheType,
|
||||||
|
Object custom,
|
||||||
|
Accountable value) {
|
||||||
|
this.readerKey = readerKey;
|
||||||
|
this.fieldName = fieldName;
|
||||||
|
this.cacheType = cacheType;
|
||||||
|
this.custom = custom;
|
||||||
|
this.value = value;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Object getReaderKey() {
|
||||||
|
return readerKey;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getFieldName() {
|
||||||
|
return fieldName;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Class<?> getCacheType() {
|
||||||
|
return cacheType;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Object getCustom() {
|
||||||
|
return custom;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Object getValue() {
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The most recently estimated size of the value, null unless
|
||||||
|
* estimateSize has been called.
|
||||||
|
*/
|
||||||
|
public String getEstimatedSize() {
|
||||||
|
long bytesUsed = value == null ? 0L : value.ramBytesUsed();
|
||||||
|
return RamUsageEstimator.humanReadableUnits(bytesUsed);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
StringBuilder b = new StringBuilder(250);
|
||||||
|
b.append("'").append(getReaderKey()).append("'=>");
|
||||||
|
b.append("'").append(getFieldName()).append("',");
|
||||||
|
b.append(getCacheType()).append(",").append(getCustom());
|
||||||
|
b.append("=>").append(getValue().getClass().getName()).append("#");
|
||||||
|
b.append(System.identityHashCode(getValue()));
|
||||||
|
|
||||||
|
String s = getEstimatedSize();
|
||||||
|
b.append(" (size =~ ").append(s).append(')');
|
||||||
|
|
||||||
|
return b.toString();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* EXPERT: Generates an array of CacheEntry objects representing all items
|
||||||
|
* currently in the FieldCache.
|
||||||
|
* <p>
|
||||||
|
* NOTE: These CacheEntry objects maintain a strong reference to the
|
||||||
|
* Cached Values. Maintaining references to a CacheEntry the AtomicIndexReader
|
||||||
|
* associated with it has garbage collected will prevent the Value itself
|
||||||
|
* from being garbage collected when the Cache drops the WeakReference.
|
||||||
|
* </p>
|
||||||
|
* @lucene.experimental
|
||||||
|
*/
|
||||||
|
public CacheEntry[] getCacheEntries();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* <p>
|
||||||
|
* EXPERT: Instructs the FieldCache to forcibly expunge all entries
|
||||||
|
* from the underlying caches. This is intended only to be used for
|
||||||
|
* test methods as a way to ensure a known base state of the Cache
|
||||||
|
* (with out needing to rely on GC to free WeakReferences).
|
||||||
|
* It should not be relied on for "Cache maintenance" in general
|
||||||
|
* application code.
|
||||||
|
* </p>
|
||||||
|
* @lucene.experimental
|
||||||
|
*/
|
||||||
|
public void purgeAllCaches();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Expert: drops all cache entries associated with this
|
||||||
|
* reader {@link IndexReader#getCoreCacheKey}. NOTE: this cache key must
|
||||||
|
* precisely match the reader that the cache entry is
|
||||||
|
* keyed on. If you pass a top-level reader, it usually
|
||||||
|
* will have no effect as Lucene now caches at the segment
|
||||||
|
* reader level.
|
||||||
|
*/
|
||||||
|
public void purgeByCacheKey(Object coreCacheKey);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* If non-null, FieldCacheImpl will warn whenever
|
||||||
|
* entries are created that are not sane according to
|
||||||
|
* {@link FieldCacheSanityChecker}.
|
||||||
|
*/
|
||||||
|
public void setInfoStream(PrintStream stream);
|
||||||
|
|
||||||
|
/** counterpart of {@link #setInfoStream(PrintStream)} */
|
||||||
|
public PrintStream getInfoStream();
|
||||||
|
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,425 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.solr.uninverting;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.index.IndexReaderContext;
|
||||||
|
import org.apache.lucene.store.AlreadyClosedException;
|
||||||
|
import org.apache.lucene.util.MapOfSets;
|
||||||
|
import org.apache.solr.uninverting.FieldCache.CacheEntry;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Provides methods for sanity checking that entries in the FieldCache
|
||||||
|
* are not wasteful or inconsistent.
|
||||||
|
* </p>
|
||||||
|
* <p>
|
||||||
|
* Lucene 2.9 Introduced numerous enhancements into how the FieldCache
|
||||||
|
* is used by the low levels of Lucene searching (for Sorting and
|
||||||
|
* ValueSourceQueries) to improve both the speed for Sorting, as well
|
||||||
|
* as reopening of IndexReaders. But these changes have shifted the
|
||||||
|
* usage of FieldCache from "top level" IndexReaders (frequently a
|
||||||
|
* MultiReader or DirectoryReader) down to the leaf level SegmentReaders.
|
||||||
|
* As a result, existing applications that directly access the FieldCache
|
||||||
|
* may find RAM usage increase significantly when upgrading to 2.9 or
|
||||||
|
* Later. This class provides an API for these applications (or their
|
||||||
|
* Unit tests) to check at run time if the FieldCache contains "insane"
|
||||||
|
* usages of the FieldCache.
|
||||||
|
* </p>
|
||||||
|
* @lucene.experimental
|
||||||
|
* @see FieldCache
|
||||||
|
* @see FieldCacheSanityChecker.Insanity
|
||||||
|
* @see FieldCacheSanityChecker.InsanityType
|
||||||
|
*/
|
||||||
|
final class FieldCacheSanityChecker {
|
||||||
|
|
||||||
|
public FieldCacheSanityChecker() {
|
||||||
|
/* NOOP */
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Quick and dirty convenience method
|
||||||
|
* @see #check
|
||||||
|
*/
|
||||||
|
public static Insanity[] checkSanity(FieldCache cache) {
|
||||||
|
return checkSanity(cache.getCacheEntries());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Quick and dirty convenience method that instantiates an instance with
|
||||||
|
* "good defaults" and uses it to test the CacheEntrys
|
||||||
|
* @see #check
|
||||||
|
*/
|
||||||
|
public static Insanity[] checkSanity(CacheEntry... cacheEntries) {
|
||||||
|
FieldCacheSanityChecker sanityChecker = new FieldCacheSanityChecker();
|
||||||
|
return sanityChecker.check(cacheEntries);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tests a CacheEntry[] for indication of "insane" cache usage.
|
||||||
|
* <p>
|
||||||
|
* <B>NOTE:</b>FieldCache CreationPlaceholder objects are ignored.
|
||||||
|
* (:TODO: is this a bad idea? are we masking a real problem?)
|
||||||
|
* </p>
|
||||||
|
*/
|
||||||
|
public Insanity[] check(CacheEntry... cacheEntries) {
|
||||||
|
if (null == cacheEntries || 0 == cacheEntries.length)
|
||||||
|
return new Insanity[0];
|
||||||
|
|
||||||
|
// the indirect mapping lets MapOfSet dedup identical valIds for us
|
||||||
|
//
|
||||||
|
// maps the (valId) identityhashCode of cache values to
|
||||||
|
// sets of CacheEntry instances
|
||||||
|
final MapOfSets<Integer, CacheEntry> valIdToItems = new MapOfSets<>(new HashMap<Integer, Set<CacheEntry>>(17));
|
||||||
|
// maps ReaderField keys to Sets of ValueIds
|
||||||
|
final MapOfSets<ReaderField, Integer> readerFieldToValIds = new MapOfSets<>(new HashMap<ReaderField, Set<Integer>>(17));
|
||||||
|
//
|
||||||
|
|
||||||
|
// any keys that we know result in more then one valId
|
||||||
|
final Set<ReaderField> valMismatchKeys = new HashSet<>();
|
||||||
|
|
||||||
|
// iterate over all the cacheEntries to get the mappings we'll need
|
||||||
|
for (int i = 0; i < cacheEntries.length; i++) {
|
||||||
|
final CacheEntry item = cacheEntries[i];
|
||||||
|
final Object val = item.getValue();
|
||||||
|
|
||||||
|
// It's OK to have dup entries, where one is eg
|
||||||
|
// float[] and the other is the Bits (from
|
||||||
|
// getDocWithField())
|
||||||
|
if (val instanceof FieldCacheImpl.BitsEntry) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (val instanceof FieldCache.CreationPlaceholder)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
final ReaderField rf = new ReaderField(item.getReaderKey(),
|
||||||
|
item.getFieldName());
|
||||||
|
|
||||||
|
final Integer valId = Integer.valueOf(System.identityHashCode(val));
|
||||||
|
|
||||||
|
// indirect mapping, so the MapOfSet will dedup identical valIds for us
|
||||||
|
valIdToItems.put(valId, item);
|
||||||
|
if (1 < readerFieldToValIds.put(rf, valId)) {
|
||||||
|
valMismatchKeys.add(rf);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
final List<Insanity> insanity = new ArrayList<>(valMismatchKeys.size() * 3);
|
||||||
|
|
||||||
|
insanity.addAll(checkValueMismatch(valIdToItems,
|
||||||
|
readerFieldToValIds,
|
||||||
|
valMismatchKeys));
|
||||||
|
insanity.addAll(checkSubreaders(valIdToItems,
|
||||||
|
readerFieldToValIds));
|
||||||
|
|
||||||
|
return insanity.toArray(new Insanity[insanity.size()]);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Internal helper method used by check that iterates over
|
||||||
|
* valMismatchKeys and generates a Collection of Insanity
|
||||||
|
* instances accordingly. The MapOfSets are used to populate
|
||||||
|
* the Insanity objects.
|
||||||
|
* @see InsanityType#VALUEMISMATCH
|
||||||
|
*/
|
||||||
|
private Collection<Insanity> checkValueMismatch(MapOfSets<Integer, CacheEntry> valIdToItems,
|
||||||
|
MapOfSets<ReaderField, Integer> readerFieldToValIds,
|
||||||
|
Set<ReaderField> valMismatchKeys) {
|
||||||
|
|
||||||
|
final List<Insanity> insanity = new ArrayList<>(valMismatchKeys.size() * 3);
|
||||||
|
|
||||||
|
if (! valMismatchKeys.isEmpty() ) {
|
||||||
|
// we have multiple values for some ReaderFields
|
||||||
|
|
||||||
|
final Map<ReaderField, Set<Integer>> rfMap = readerFieldToValIds.getMap();
|
||||||
|
final Map<Integer, Set<CacheEntry>> valMap = valIdToItems.getMap();
|
||||||
|
for (final ReaderField rf : valMismatchKeys) {
|
||||||
|
final List<CacheEntry> badEntries = new ArrayList<>(valMismatchKeys.size() * 2);
|
||||||
|
for(final Integer value: rfMap.get(rf)) {
|
||||||
|
for (final CacheEntry cacheEntry : valMap.get(value)) {
|
||||||
|
badEntries.add(cacheEntry);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
CacheEntry[] badness = new CacheEntry[badEntries.size()];
|
||||||
|
badness = badEntries.toArray(badness);
|
||||||
|
|
||||||
|
insanity.add(new Insanity(InsanityType.VALUEMISMATCH,
|
||||||
|
"Multiple distinct value objects for " +
|
||||||
|
rf.toString(), badness));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return insanity;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Internal helper method used by check that iterates over
|
||||||
|
* the keys of readerFieldToValIds and generates a Collection
|
||||||
|
* of Insanity instances whenever two (or more) ReaderField instances are
|
||||||
|
* found that have an ancestry relationships.
|
||||||
|
*
|
||||||
|
* @see InsanityType#SUBREADER
|
||||||
|
*/
|
||||||
|
private Collection<Insanity> checkSubreaders( MapOfSets<Integer, CacheEntry> valIdToItems,
|
||||||
|
MapOfSets<ReaderField, Integer> readerFieldToValIds) {
|
||||||
|
|
||||||
|
final List<Insanity> insanity = new ArrayList<>(23);
|
||||||
|
|
||||||
|
Map<ReaderField, Set<ReaderField>> badChildren = new HashMap<>(17);
|
||||||
|
MapOfSets<ReaderField, ReaderField> badKids = new MapOfSets<>(badChildren); // wrapper
|
||||||
|
|
||||||
|
Map<Integer, Set<CacheEntry>> viToItemSets = valIdToItems.getMap();
|
||||||
|
Map<ReaderField, Set<Integer>> rfToValIdSets = readerFieldToValIds.getMap();
|
||||||
|
|
||||||
|
Set<ReaderField> seen = new HashSet<>(17);
|
||||||
|
|
||||||
|
Set<ReaderField> readerFields = rfToValIdSets.keySet();
|
||||||
|
for (final ReaderField rf : readerFields) {
|
||||||
|
|
||||||
|
if (seen.contains(rf)) continue;
|
||||||
|
|
||||||
|
List<Object> kids = getAllDescendantReaderKeys(rf.readerKey);
|
||||||
|
for (Object kidKey : kids) {
|
||||||
|
ReaderField kid = new ReaderField(kidKey, rf.fieldName);
|
||||||
|
|
||||||
|
if (badChildren.containsKey(kid)) {
|
||||||
|
// we've already process this kid as RF and found other problems
|
||||||
|
// track those problems as our own
|
||||||
|
badKids.put(rf, kid);
|
||||||
|
badKids.putAll(rf, badChildren.get(kid));
|
||||||
|
badChildren.remove(kid);
|
||||||
|
|
||||||
|
} else if (rfToValIdSets.containsKey(kid)) {
|
||||||
|
// we have cache entries for the kid
|
||||||
|
badKids.put(rf, kid);
|
||||||
|
}
|
||||||
|
seen.add(kid);
|
||||||
|
}
|
||||||
|
seen.add(rf);
|
||||||
|
}
|
||||||
|
|
||||||
|
// every mapping in badKids represents an Insanity
|
||||||
|
for (final ReaderField parent : badChildren.keySet()) {
|
||||||
|
Set<ReaderField> kids = badChildren.get(parent);
|
||||||
|
|
||||||
|
List<CacheEntry> badEntries = new ArrayList<>(kids.size() * 2);
|
||||||
|
|
||||||
|
// put parent entr(ies) in first
|
||||||
|
{
|
||||||
|
for (final Integer value : rfToValIdSets.get(parent)) {
|
||||||
|
badEntries.addAll(viToItemSets.get(value));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// now the entries for the descendants
|
||||||
|
for (final ReaderField kid : kids) {
|
||||||
|
for (final Integer value : rfToValIdSets.get(kid)) {
|
||||||
|
badEntries.addAll(viToItemSets.get(value));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
CacheEntry[] badness = new CacheEntry[badEntries.size()];
|
||||||
|
badness = badEntries.toArray(badness);
|
||||||
|
|
||||||
|
insanity.add(new Insanity(InsanityType.SUBREADER,
|
||||||
|
"Found caches for descendants of " +
|
||||||
|
parent.toString(),
|
||||||
|
badness));
|
||||||
|
}
|
||||||
|
|
||||||
|
return insanity;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks if the seed is an IndexReader, and if so will walk
|
||||||
|
* the hierarchy of subReaders building up a list of the objects
|
||||||
|
* returned by {@code seed.getCoreCacheKey()}
|
||||||
|
*/
|
||||||
|
private List<Object> getAllDescendantReaderKeys(Object seed) {
|
||||||
|
List<Object> all = new ArrayList<>(17); // will grow as we iter
|
||||||
|
all.add(seed);
|
||||||
|
for (int i = 0; i < all.size(); i++) {
|
||||||
|
final Object obj = all.get(i);
|
||||||
|
// TODO: We don't check closed readers here (as getTopReaderContext
|
||||||
|
// throws AlreadyClosedException), what should we do? Reflection?
|
||||||
|
if (obj instanceof IndexReader) {
|
||||||
|
try {
|
||||||
|
final List<IndexReaderContext> childs =
|
||||||
|
((IndexReader) obj).getContext().children();
|
||||||
|
if (childs != null) { // it is composite reader
|
||||||
|
for (final IndexReaderContext ctx : childs) {
|
||||||
|
all.add(ctx.reader().getCoreCacheKey());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (AlreadyClosedException ace) {
|
||||||
|
// ignore this reader
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// need to skip the first, because it was the seed
|
||||||
|
return all.subList(1, all.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Simple pair object for using "readerKey + fieldName" a Map key
|
||||||
|
*/
|
||||||
|
private final static class ReaderField {
|
||||||
|
public final Object readerKey;
|
||||||
|
public final String fieldName;
|
||||||
|
public ReaderField(Object readerKey, String fieldName) {
|
||||||
|
this.readerKey = readerKey;
|
||||||
|
this.fieldName = fieldName;
|
||||||
|
}
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
return System.identityHashCode(readerKey) * fieldName.hashCode();
|
||||||
|
}
|
||||||
|
@Override
|
||||||
|
public boolean equals(Object that) {
|
||||||
|
if (! (that instanceof ReaderField)) return false;
|
||||||
|
|
||||||
|
ReaderField other = (ReaderField) that;
|
||||||
|
return (this.readerKey == other.readerKey &&
|
||||||
|
this.fieldName.equals(other.fieldName));
|
||||||
|
}
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return readerKey.toString() + "+" + fieldName;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Simple container for a collection of related CacheEntry objects that
|
||||||
|
* in conjunction with each other represent some "insane" usage of the
|
||||||
|
* FieldCache.
|
||||||
|
*/
|
||||||
|
public final static class Insanity {
|
||||||
|
private final InsanityType type;
|
||||||
|
private final String msg;
|
||||||
|
private final CacheEntry[] entries;
|
||||||
|
public Insanity(InsanityType type, String msg, CacheEntry... entries) {
|
||||||
|
if (null == type) {
|
||||||
|
throw new IllegalArgumentException
|
||||||
|
("Insanity requires non-null InsanityType");
|
||||||
|
}
|
||||||
|
if (null == entries || 0 == entries.length) {
|
||||||
|
throw new IllegalArgumentException
|
||||||
|
("Insanity requires non-null/non-empty CacheEntry[]");
|
||||||
|
}
|
||||||
|
this.type = type;
|
||||||
|
this.msg = msg;
|
||||||
|
this.entries = entries;
|
||||||
|
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* Type of insane behavior this object represents
|
||||||
|
*/
|
||||||
|
public InsanityType getType() { return type; }
|
||||||
|
/**
|
||||||
|
* Description of hte insane behavior
|
||||||
|
*/
|
||||||
|
public String getMsg() { return msg; }
|
||||||
|
/**
|
||||||
|
* CacheEntry objects which suggest a problem
|
||||||
|
*/
|
||||||
|
public CacheEntry[] getCacheEntries() { return entries; }
|
||||||
|
/**
|
||||||
|
* Multi-Line representation of this Insanity object, starting with
|
||||||
|
* the Type and Msg, followed by each CacheEntry.toString() on its
|
||||||
|
* own line prefaced by a tab character
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
StringBuilder buf = new StringBuilder();
|
||||||
|
buf.append(getType()).append(": ");
|
||||||
|
|
||||||
|
String m = getMsg();
|
||||||
|
if (null != m) buf.append(m);
|
||||||
|
|
||||||
|
buf.append('\n');
|
||||||
|
|
||||||
|
CacheEntry[] ce = getCacheEntries();
|
||||||
|
for (int i = 0; i < ce.length; i++) {
|
||||||
|
buf.append('\t').append(ce[i].toString()).append('\n');
|
||||||
|
}
|
||||||
|
|
||||||
|
return buf.toString();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* An Enumeration of the different types of "insane" behavior that
|
||||||
|
* may be detected in a FieldCache.
|
||||||
|
*
|
||||||
|
* @see InsanityType#SUBREADER
|
||||||
|
* @see InsanityType#VALUEMISMATCH
|
||||||
|
* @see InsanityType#EXPECTED
|
||||||
|
*/
|
||||||
|
public final static class InsanityType {
|
||||||
|
private final String label;
|
||||||
|
private InsanityType(final String label) {
|
||||||
|
this.label = label;
|
||||||
|
}
|
||||||
|
@Override
|
||||||
|
public String toString() { return label; }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Indicates an overlap in cache usage on a given field
|
||||||
|
* in sub/super readers.
|
||||||
|
*/
|
||||||
|
public final static InsanityType SUBREADER
|
||||||
|
= new InsanityType("SUBREADER");
|
||||||
|
|
||||||
|
/**
|
||||||
|
* <p>
|
||||||
|
* Indicates entries have the same reader+fieldname but
|
||||||
|
* different cached values. This can happen if different datatypes,
|
||||||
|
* or parsers are used -- and while it's not necessarily a bug
|
||||||
|
* it's typically an indication of a possible problem.
|
||||||
|
* </p>
|
||||||
|
* <p>
|
||||||
|
* <b>NOTE:</b> Only the reader, fieldname, and cached value are actually
|
||||||
|
* tested -- if two cache entries have different parsers or datatypes but
|
||||||
|
* the cached values are the same Object (== not just equal()) this method
|
||||||
|
* does not consider that a red flag. This allows for subtle variations
|
||||||
|
* in the way a Parser is specified (null vs DEFAULT_LONG_PARSER, etc...)
|
||||||
|
* </p>
|
||||||
|
*/
|
||||||
|
public final static InsanityType VALUEMISMATCH
|
||||||
|
= new InsanityType("VALUEMISMATCH");
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Indicates an expected bit of "insanity". This may be useful for
|
||||||
|
* clients that wish to preserve/log information about insane usage
|
||||||
|
* but indicate that it was expected.
|
||||||
|
*/
|
||||||
|
public final static InsanityType EXPECTED
|
||||||
|
= new InsanityType("EXPECTED");
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,391 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.solr.uninverting;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
import org.apache.lucene.document.BinaryDocValuesField; // javadocs
|
||||||
|
import org.apache.lucene.document.NumericDocValuesField; // javadocs
|
||||||
|
import org.apache.lucene.document.SortedDocValuesField; // javadocs
|
||||||
|
import org.apache.lucene.document.SortedSetDocValuesField; // javadocs
|
||||||
|
import org.apache.lucene.document.StringField; // javadocs
|
||||||
|
import org.apache.lucene.index.BinaryDocValues;
|
||||||
|
import org.apache.lucene.index.DirectoryReader;
|
||||||
|
import org.apache.lucene.index.DocValuesType;
|
||||||
|
import org.apache.lucene.index.FieldInfo;
|
||||||
|
import org.apache.lucene.index.FieldInfos;
|
||||||
|
import org.apache.lucene.index.FilterDirectoryReader;
|
||||||
|
import org.apache.lucene.index.FilterLeafReader;
|
||||||
|
import org.apache.lucene.index.IndexOptions;
|
||||||
|
import org.apache.lucene.index.LeafReader;
|
||||||
|
import org.apache.lucene.index.NumericDocValues;
|
||||||
|
import org.apache.lucene.index.SortedDocValues;
|
||||||
|
import org.apache.lucene.index.SortedSetDocValues;
|
||||||
|
import org.apache.lucene.util.Bits;
|
||||||
|
import org.apache.solr.uninverting.FieldCache.CacheEntry;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A FilterReader that exposes <i>indexed</i> values as if they also had
|
||||||
|
* docvalues.
|
||||||
|
* <p>
|
||||||
|
* This is accomplished by "inverting the inverted index" or "uninversion".
|
||||||
|
* <p>
|
||||||
|
* The uninversion process happens lazily: upon the first request for the
|
||||||
|
* field's docvalues (e.g. via {@link org.apache.lucene.index.LeafReader#getNumericDocValues(String)}
|
||||||
|
* or similar), it will create the docvalues on-the-fly if needed and cache it,
|
||||||
|
* based on the core cache key of the wrapped LeafReader.
|
||||||
|
*/
|
||||||
|
public class UninvertingReader extends FilterLeafReader {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Specifies the type of uninversion to apply for the field.
|
||||||
|
*/
|
||||||
|
public static enum Type {
|
||||||
|
/**
|
||||||
|
* Single-valued Integer, (e.g. indexed with {@link org.apache.lucene.document.IntPoint})
|
||||||
|
* <p>
|
||||||
|
* Fields with this type act as if they were indexed with
|
||||||
|
* {@link NumericDocValuesField}.
|
||||||
|
*/
|
||||||
|
INTEGER_POINT,
|
||||||
|
/**
|
||||||
|
* Single-valued Integer, (e.g. indexed with {@link org.apache.lucene.document.LongPoint})
|
||||||
|
* <p>
|
||||||
|
* Fields with this type act as if they were indexed with
|
||||||
|
* {@link NumericDocValuesField}.
|
||||||
|
*/
|
||||||
|
LONG_POINT,
|
||||||
|
/**
|
||||||
|
* Single-valued Integer, (e.g. indexed with {@link org.apache.lucene.document.FloatPoint})
|
||||||
|
* <p>
|
||||||
|
* Fields with this type act as if they were indexed with
|
||||||
|
* {@link NumericDocValuesField}.
|
||||||
|
*/
|
||||||
|
FLOAT_POINT,
|
||||||
|
/**
|
||||||
|
* Single-valued Integer, (e.g. indexed with {@link org.apache.lucene.document.DoublePoint})
|
||||||
|
* <p>
|
||||||
|
* Fields with this type act as if they were indexed with
|
||||||
|
* {@link NumericDocValuesField}.
|
||||||
|
*/
|
||||||
|
DOUBLE_POINT,
|
||||||
|
/**
|
||||||
|
* Single-valued Integer, (e.g. indexed with {@link org.apache.lucene.document.LegacyIntField})
|
||||||
|
* <p>
|
||||||
|
* Fields with this type act as if they were indexed with
|
||||||
|
* {@link NumericDocValuesField}.
|
||||||
|
* @deprecated Index with points and use {@link #INTEGER_POINT} instead.
|
||||||
|
*/
|
||||||
|
@Deprecated
|
||||||
|
LEGACY_INTEGER,
|
||||||
|
/**
|
||||||
|
* Single-valued Long, (e.g. indexed with {@link org.apache.lucene.document.LegacyLongField})
|
||||||
|
* <p>
|
||||||
|
* Fields with this type act as if they were indexed with
|
||||||
|
* {@link NumericDocValuesField}.
|
||||||
|
* @deprecated Index with points and use {@link #LONG_POINT} instead.
|
||||||
|
*/
|
||||||
|
@Deprecated
|
||||||
|
LEGACY_LONG,
|
||||||
|
/**
|
||||||
|
* Single-valued Float, (e.g. indexed with {@link org.apache.lucene.document.LegacyFloatField})
|
||||||
|
* <p>
|
||||||
|
* Fields with this type act as if they were indexed with
|
||||||
|
* {@link NumericDocValuesField}.
|
||||||
|
* @deprecated Index with points and use {@link #FLOAT_POINT} instead.
|
||||||
|
*/
|
||||||
|
@Deprecated
|
||||||
|
LEGACY_FLOAT,
|
||||||
|
/**
|
||||||
|
* Single-valued Double, (e.g. indexed with {@link org.apache.lucene.document.LegacyDoubleField})
|
||||||
|
* <p>
|
||||||
|
* Fields with this type act as if they were indexed with
|
||||||
|
* {@link NumericDocValuesField}.
|
||||||
|
* @deprecated Index with points and use {@link #DOUBLE_POINT} instead.
|
||||||
|
*/
|
||||||
|
@Deprecated
|
||||||
|
LEGACY_DOUBLE,
|
||||||
|
/**
|
||||||
|
* Single-valued Binary, (e.g. indexed with {@link StringField})
|
||||||
|
* <p>
|
||||||
|
* Fields with this type act as if they were indexed with
|
||||||
|
* {@link BinaryDocValuesField}.
|
||||||
|
*/
|
||||||
|
BINARY,
|
||||||
|
/**
|
||||||
|
* Single-valued Binary, (e.g. indexed with {@link StringField})
|
||||||
|
* <p>
|
||||||
|
* Fields with this type act as if they were indexed with
|
||||||
|
* {@link SortedDocValuesField}.
|
||||||
|
*/
|
||||||
|
SORTED,
|
||||||
|
/**
|
||||||
|
* Multi-valued Binary, (e.g. indexed with {@link StringField})
|
||||||
|
* <p>
|
||||||
|
* Fields with this type act as if they were indexed with
|
||||||
|
* {@link SortedSetDocValuesField}.
|
||||||
|
*/
|
||||||
|
SORTED_SET_BINARY,
|
||||||
|
/**
|
||||||
|
* Multi-valued Integer, (e.g. indexed with {@link org.apache.lucene.document.LegacyIntField})
|
||||||
|
* <p>
|
||||||
|
* Fields with this type act as if they were indexed with
|
||||||
|
* {@link SortedSetDocValuesField}.
|
||||||
|
*/
|
||||||
|
SORTED_SET_INTEGER,
|
||||||
|
/**
|
||||||
|
* Multi-valued Float, (e.g. indexed with {@link org.apache.lucene.document.LegacyFloatField})
|
||||||
|
* <p>
|
||||||
|
* Fields with this type act as if they were indexed with
|
||||||
|
* {@link SortedSetDocValuesField}.
|
||||||
|
*/
|
||||||
|
SORTED_SET_FLOAT,
|
||||||
|
/**
|
||||||
|
* Multi-valued Long, (e.g. indexed with {@link org.apache.lucene.document.LegacyLongField})
|
||||||
|
* <p>
|
||||||
|
* Fields with this type act as if they were indexed with
|
||||||
|
* {@link SortedSetDocValuesField}.
|
||||||
|
*/
|
||||||
|
SORTED_SET_LONG,
|
||||||
|
/**
|
||||||
|
* Multi-valued Double, (e.g. indexed with {@link org.apache.lucene.document.LegacyDoubleField})
|
||||||
|
* <p>
|
||||||
|
* Fields with this type act as if they were indexed with
|
||||||
|
* {@link SortedSetDocValuesField}.
|
||||||
|
*/
|
||||||
|
SORTED_SET_DOUBLE
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Wraps a provided DirectoryReader. Note that for convenience, the returned reader
|
||||||
|
* can be used normally (e.g. passed to {@link DirectoryReader#openIfChanged(DirectoryReader)})
|
||||||
|
* and so on.
|
||||||
|
*/
|
||||||
|
public static DirectoryReader wrap(DirectoryReader in, final Map<String,Type> mapping) throws IOException {
|
||||||
|
return new UninvertingDirectoryReader(in, mapping);
|
||||||
|
}
|
||||||
|
|
||||||
|
static class UninvertingDirectoryReader extends FilterDirectoryReader {
|
||||||
|
final Map<String,Type> mapping;
|
||||||
|
|
||||||
|
public UninvertingDirectoryReader(DirectoryReader in, final Map<String,Type> mapping) throws IOException {
|
||||||
|
super(in, new FilterDirectoryReader.SubReaderWrapper() {
|
||||||
|
@Override
|
||||||
|
public LeafReader wrap(LeafReader reader) {
|
||||||
|
return new UninvertingReader(reader, mapping);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
this.mapping = mapping;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected DirectoryReader doWrapDirectoryReader(DirectoryReader in) throws IOException {
|
||||||
|
return new UninvertingDirectoryReader(in, mapping);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
final Map<String,Type> mapping;
|
||||||
|
final FieldInfos fieldInfos;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a new UninvertingReader with the specified mapping
|
||||||
|
* <p>
|
||||||
|
* Expert: This should almost never be used. Use {@link #wrap(DirectoryReader, Map)}
|
||||||
|
* instead.
|
||||||
|
*
|
||||||
|
* @lucene.internal
|
||||||
|
*/
|
||||||
|
public UninvertingReader(LeafReader in, Map<String,Type> mapping) {
|
||||||
|
super(in);
|
||||||
|
this.mapping = mapping;
|
||||||
|
ArrayList<FieldInfo> filteredInfos = new ArrayList<>();
|
||||||
|
for (FieldInfo fi : in.getFieldInfos()) {
|
||||||
|
DocValuesType type = fi.getDocValuesType();
|
||||||
|
if (type == DocValuesType.NONE) {
|
||||||
|
Type t = mapping.get(fi.name);
|
||||||
|
if (t != null) {
|
||||||
|
if (t == Type.INTEGER_POINT || t == Type.LONG_POINT || t == Type.FLOAT_POINT || t == Type.DOUBLE_POINT) {
|
||||||
|
// type uses points
|
||||||
|
if (fi.getPointDimensionCount() == 0) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// type uses inverted index
|
||||||
|
if (fi.getIndexOptions() == IndexOptions.NONE) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
switch(t) {
|
||||||
|
case INTEGER_POINT:
|
||||||
|
case LONG_POINT:
|
||||||
|
case FLOAT_POINT:
|
||||||
|
case DOUBLE_POINT:
|
||||||
|
case LEGACY_INTEGER:
|
||||||
|
case LEGACY_LONG:
|
||||||
|
case LEGACY_FLOAT:
|
||||||
|
case LEGACY_DOUBLE:
|
||||||
|
type = DocValuesType.NUMERIC;
|
||||||
|
break;
|
||||||
|
case BINARY:
|
||||||
|
type = DocValuesType.BINARY;
|
||||||
|
break;
|
||||||
|
case SORTED:
|
||||||
|
type = DocValuesType.SORTED;
|
||||||
|
break;
|
||||||
|
case SORTED_SET_BINARY:
|
||||||
|
case SORTED_SET_INTEGER:
|
||||||
|
case SORTED_SET_FLOAT:
|
||||||
|
case SORTED_SET_LONG:
|
||||||
|
case SORTED_SET_DOUBLE:
|
||||||
|
type = DocValuesType.SORTED_SET;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
throw new AssertionError();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
filteredInfos.add(new FieldInfo(fi.name, fi.number, fi.hasVectors(), fi.omitsNorms(),
|
||||||
|
fi.hasPayloads(), fi.getIndexOptions(), type, fi.getDocValuesGen(), fi.attributes(),
|
||||||
|
fi.getPointDimensionCount(), fi.getPointNumBytes()));
|
||||||
|
}
|
||||||
|
fieldInfos = new FieldInfos(filteredInfos.toArray(new FieldInfo[filteredInfos.size()]));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public FieldInfos getFieldInfos() {
|
||||||
|
return fieldInfos;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public NumericDocValues getNumericDocValues(String field) throws IOException {
|
||||||
|
Type v = getType(field);
|
||||||
|
if (v != null) {
|
||||||
|
switch (v) {
|
||||||
|
case INTEGER_POINT: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.INT_POINT_PARSER, true);
|
||||||
|
case FLOAT_POINT: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.FLOAT_POINT_PARSER, true);
|
||||||
|
case LONG_POINT: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.LONG_POINT_PARSER, true);
|
||||||
|
case DOUBLE_POINT: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.DOUBLE_POINT_PARSER, true);
|
||||||
|
case LEGACY_INTEGER: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.LEGACY_INT_PARSER, true);
|
||||||
|
case LEGACY_FLOAT: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.LEGACY_FLOAT_PARSER, true);
|
||||||
|
case LEGACY_LONG: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.LEGACY_LONG_PARSER, true);
|
||||||
|
case LEGACY_DOUBLE: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.LEGACY_DOUBLE_PARSER, true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return super.getNumericDocValues(field);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public BinaryDocValues getBinaryDocValues(String field) throws IOException {
|
||||||
|
Type v = getType(field);
|
||||||
|
if (v == Type.BINARY) {
|
||||||
|
return FieldCache.DEFAULT.getTerms(in, field, true);
|
||||||
|
} else {
|
||||||
|
return in.getBinaryDocValues(field);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public SortedDocValues getSortedDocValues(String field) throws IOException {
|
||||||
|
Type v = getType(field);
|
||||||
|
if (v == Type.SORTED) {
|
||||||
|
return FieldCache.DEFAULT.getTermsIndex(in, field);
|
||||||
|
} else {
|
||||||
|
return in.getSortedDocValues(field);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public SortedSetDocValues getSortedSetDocValues(String field) throws IOException {
|
||||||
|
Type v = getType(field);
|
||||||
|
if (v != null) {
|
||||||
|
switch (v) {
|
||||||
|
case SORTED_SET_INTEGER:
|
||||||
|
case SORTED_SET_FLOAT:
|
||||||
|
return FieldCache.DEFAULT.getDocTermOrds(in, field, FieldCache.INT32_TERM_PREFIX);
|
||||||
|
case SORTED_SET_LONG:
|
||||||
|
case SORTED_SET_DOUBLE:
|
||||||
|
return FieldCache.DEFAULT.getDocTermOrds(in, field, FieldCache.INT64_TERM_PREFIX);
|
||||||
|
case SORTED_SET_BINARY:
|
||||||
|
return FieldCache.DEFAULT.getDocTermOrds(in, field, null);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return in.getSortedSetDocValues(field);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Bits getDocsWithField(String field) throws IOException {
|
||||||
|
Type v = getType(field);
|
||||||
|
if (v != null) {
|
||||||
|
switch (v) {
|
||||||
|
case INTEGER_POINT: return FieldCache.DEFAULT.getDocsWithField(in, field, FieldCache.INT_POINT_PARSER);
|
||||||
|
case FLOAT_POINT: return FieldCache.DEFAULT.getDocsWithField(in, field, FieldCache.FLOAT_POINT_PARSER);
|
||||||
|
case LONG_POINT: return FieldCache.DEFAULT.getDocsWithField(in, field, FieldCache.LONG_POINT_PARSER);
|
||||||
|
case DOUBLE_POINT: return FieldCache.DEFAULT.getDocsWithField(in, field, FieldCache.DOUBLE_POINT_PARSER);
|
||||||
|
case LEGACY_INTEGER: return FieldCache.DEFAULT.getDocsWithField(in, field, FieldCache.LEGACY_INT_PARSER);
|
||||||
|
case LEGACY_FLOAT: return FieldCache.DEFAULT.getDocsWithField(in, field, FieldCache.LEGACY_FLOAT_PARSER);
|
||||||
|
case LEGACY_LONG: return FieldCache.DEFAULT.getDocsWithField(in, field, FieldCache.LEGACY_LONG_PARSER);
|
||||||
|
case LEGACY_DOUBLE: return FieldCache.DEFAULT.getDocsWithField(in, field, FieldCache.LEGACY_DOUBLE_PARSER);
|
||||||
|
default:
|
||||||
|
return FieldCache.DEFAULT.getDocsWithField(in, field, null);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
return in.getDocsWithField(field);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the field's uninversion type, or null
|
||||||
|
* if the field doesn't exist or doesn't have a mapping.
|
||||||
|
*/
|
||||||
|
private Type getType(String field) {
|
||||||
|
FieldInfo info = fieldInfos.fieldInfo(field);
|
||||||
|
if (info == null || info.getDocValuesType() == DocValuesType.NONE) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
return mapping.get(field);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Object getCoreCacheKey() {
|
||||||
|
return in.getCoreCacheKey();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Object getCombinedCoreAndDeletesKey() {
|
||||||
|
return in.getCombinedCoreAndDeletesKey();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return "Uninverting(" + in.toString() + ")";
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return information about the backing cache
|
||||||
|
* @lucene.internal
|
||||||
|
*/
|
||||||
|
public static String[] getUninvertedStats() {
|
||||||
|
CacheEntry[] entries = FieldCache.DEFAULT.getCacheEntries();
|
||||||
|
String[] info = new String[entries.length];
|
||||||
|
for (int i = 0; i < entries.length; i++) {
|
||||||
|
info[i] = entries[i].toString();
|
||||||
|
}
|
||||||
|
return info;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,21 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Support for creating docvalues on-the-fly from the inverted index at runtime.
|
||||||
|
*/
|
||||||
|
package org.apache.solr.uninverting;
|
|
@ -29,8 +29,9 @@ import org.apache.lucene.search.IndexSearcher;
|
||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
import org.apache.lucene.search.Scorer;
|
import org.apache.lucene.search.Scorer;
|
||||||
import org.apache.lucene.search.Weight;
|
import org.apache.lucene.search.Weight;
|
||||||
import org.apache.lucene.uninverting.UninvertingReader;
|
|
||||||
import org.apache.solr.schema.IndexSchema;
|
import org.apache.solr.schema.IndexSchema;
|
||||||
|
import org.apache.solr.uninverting.UninvertingReader;
|
||||||
|
import org.apache.solr.uninverting.UninvertingReader;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Allows access to uninverted docvalues by delete-by-queries.
|
* Allows access to uninverted docvalues by delete-by-queries.
|
||||||
|
|
|
@ -24,7 +24,6 @@ import java.util.concurrent.locks.ReentrantReadWriteLock;
|
||||||
|
|
||||||
import org.apache.lucene.index.LeafReader;
|
import org.apache.lucene.index.LeafReader;
|
||||||
import org.apache.lucene.index.LeafReaderContext;
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
import org.apache.lucene.index.SlowCompositeReaderWrapper;
|
|
||||||
import org.apache.lucene.index.Terms;
|
import org.apache.lucene.index.Terms;
|
||||||
import org.apache.lucene.queries.function.FunctionValues;
|
import org.apache.lucene.queries.function.FunctionValues;
|
||||||
import org.apache.lucene.queries.function.ValueSource;
|
import org.apache.lucene.queries.function.ValueSource;
|
||||||
|
@ -34,6 +33,7 @@ import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.LegacyNumericUtils;
|
import org.apache.lucene.util.LegacyNumericUtils;
|
||||||
import org.apache.solr.common.SolrException;
|
import org.apache.solr.common.SolrException;
|
||||||
import org.apache.solr.common.util.SuppressForbidden;
|
import org.apache.solr.common.util.SuppressForbidden;
|
||||||
|
import org.apache.solr.index.SlowCompositeReaderWrapper;
|
||||||
import org.apache.solr.schema.IndexSchema;
|
import org.apache.solr.schema.IndexSchema;
|
||||||
import org.apache.solr.schema.SchemaField;
|
import org.apache.solr.schema.SchemaField;
|
||||||
import org.apache.solr.search.SolrIndexSearcher;
|
import org.apache.solr.search.SolrIndexSearcher;
|
||||||
|
|
|
@ -0,0 +1,95 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.solr.index;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.concurrent.atomic.AtomicInteger;
|
||||||
|
|
||||||
|
import org.apache.lucene.document.Document;
|
||||||
|
import org.apache.lucene.index.DirectoryReader;
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.index.LeafReader;
|
||||||
|
import org.apache.lucene.index.RandomIndexWriter;
|
||||||
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
import org.apache.lucene.util.TestUtil;
|
||||||
|
|
||||||
|
public class TestSlowCompositeReaderWrapper extends LuceneTestCase {
|
||||||
|
|
||||||
|
public void testCoreListenerOnSlowCompositeReaderWrapper() throws IOException {
|
||||||
|
RandomIndexWriter w = new RandomIndexWriter(random(), newDirectory());
|
||||||
|
final int numDocs = TestUtil.nextInt(random(), 1, 5);
|
||||||
|
for (int i = 0; i < numDocs; ++i) {
|
||||||
|
w.addDocument(new Document());
|
||||||
|
if (random().nextBoolean()) {
|
||||||
|
w.commit();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
w.commit();
|
||||||
|
w.close();
|
||||||
|
|
||||||
|
final IndexReader reader = DirectoryReader.open(w.w.getDirectory());
|
||||||
|
final LeafReader leafReader = SlowCompositeReaderWrapper.wrap(reader);
|
||||||
|
|
||||||
|
final int numListeners = TestUtil.nextInt(random(), 1, 10);
|
||||||
|
final List<LeafReader.CoreClosedListener> listeners = new ArrayList<>();
|
||||||
|
AtomicInteger counter = new AtomicInteger(numListeners);
|
||||||
|
|
||||||
|
for (int i = 0; i < numListeners; ++i) {
|
||||||
|
CountCoreListener listener = new CountCoreListener(counter, leafReader.getCoreCacheKey());
|
||||||
|
listeners.add(listener);
|
||||||
|
leafReader.addCoreClosedListener(listener);
|
||||||
|
}
|
||||||
|
for (int i = 0; i < 100; ++i) {
|
||||||
|
leafReader.addCoreClosedListener(listeners.get(random().nextInt(listeners.size())));
|
||||||
|
}
|
||||||
|
final int removed = random().nextInt(numListeners);
|
||||||
|
Collections.shuffle(listeners, random());
|
||||||
|
for (int i = 0; i < removed; ++i) {
|
||||||
|
leafReader.removeCoreClosedListener(listeners.get(i));
|
||||||
|
}
|
||||||
|
assertEquals(numListeners, counter.get());
|
||||||
|
// make sure listeners are registered on the wrapped reader and that closing any of them has the same effect
|
||||||
|
if (random().nextBoolean()) {
|
||||||
|
reader.close();
|
||||||
|
} else {
|
||||||
|
leafReader.close();
|
||||||
|
}
|
||||||
|
assertEquals(removed, counter.get());
|
||||||
|
w.w.getDirectory().close();
|
||||||
|
}
|
||||||
|
|
||||||
|
private static final class CountCoreListener implements LeafReader.CoreClosedListener {
|
||||||
|
|
||||||
|
private final AtomicInteger count;
|
||||||
|
private final Object coreCacheKey;
|
||||||
|
|
||||||
|
public CountCoreListener(AtomicInteger count, Object coreCacheKey) {
|
||||||
|
this.count = count;
|
||||||
|
this.coreCacheKey = coreCacheKey;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void onClose(Object coreCacheKey) {
|
||||||
|
assertSame(this.coreCacheKey, coreCacheKey);
|
||||||
|
count.decrementAndGet();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
|
@ -25,12 +25,12 @@ import org.apache.lucene.index.DocValues;
|
||||||
import org.apache.lucene.index.SortedSetDocValues;
|
import org.apache.lucene.index.SortedSetDocValues;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.index.TermsEnum;
|
import org.apache.lucene.index.TermsEnum;
|
||||||
import org.apache.lucene.uninverting.DocTermOrds;
|
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.solr.SolrTestCaseJ4;
|
import org.apache.solr.SolrTestCaseJ4;
|
||||||
import org.apache.solr.common.SolrException.ErrorCode;
|
import org.apache.solr.common.SolrException.ErrorCode;
|
||||||
import org.apache.solr.common.params.FacetParams;
|
import org.apache.solr.common.params.FacetParams;
|
||||||
import org.apache.solr.search.SolrIndexSearcher;
|
import org.apache.solr.search.SolrIndexSearcher;
|
||||||
|
import org.apache.solr.uninverting.DocTermOrds;
|
||||||
import org.apache.solr.util.RefCounted;
|
import org.apache.solr.util.RefCounted;
|
||||||
import org.junit.After;
|
import org.junit.After;
|
||||||
import org.junit.BeforeClass;
|
import org.junit.BeforeClass;
|
||||||
|
|
|
@ -42,13 +42,12 @@ import org.apache.lucene.search.IndexSearcher;
|
||||||
import org.apache.lucene.search.LeafCollector;
|
import org.apache.lucene.search.LeafCollector;
|
||||||
import org.apache.lucene.search.ScoreDoc;
|
import org.apache.lucene.search.ScoreDoc;
|
||||||
import org.apache.lucene.search.Sort;
|
import org.apache.lucene.search.Sort;
|
||||||
import org.apache.lucene.search.SortField;
|
|
||||||
import org.apache.lucene.search.SortField.Type;
|
import org.apache.lucene.search.SortField.Type;
|
||||||
|
import org.apache.lucene.search.SortField;
|
||||||
import org.apache.lucene.search.TopDocs;
|
import org.apache.lucene.search.TopDocs;
|
||||||
import org.apache.lucene.search.TopFieldCollector;
|
import org.apache.lucene.search.TopFieldCollector;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.store.RAMDirectory;
|
import org.apache.lucene.store.RAMDirectory;
|
||||||
import org.apache.lucene.uninverting.UninvertingReader;
|
|
||||||
import org.apache.lucene.util.BitDocIdSet;
|
import org.apache.lucene.util.BitDocIdSet;
|
||||||
import org.apache.lucene.util.Bits;
|
import org.apache.lucene.util.Bits;
|
||||||
import org.apache.lucene.util.FixedBitSet;
|
import org.apache.lucene.util.FixedBitSet;
|
||||||
|
@ -56,6 +55,7 @@ import org.apache.lucene.util.TestUtil;
|
||||||
import org.apache.solr.SolrTestCaseJ4;
|
import org.apache.solr.SolrTestCaseJ4;
|
||||||
import org.apache.solr.request.SolrQueryRequest;
|
import org.apache.solr.request.SolrQueryRequest;
|
||||||
import org.apache.solr.schema.SchemaField;
|
import org.apache.solr.schema.SchemaField;
|
||||||
|
import org.apache.solr.uninverting.UninvertingReader;
|
||||||
import org.junit.BeforeClass;
|
import org.junit.BeforeClass;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
|
@ -0,0 +1,681 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.solr.uninverting;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
|
import org.apache.lucene.analysis.MockAnalyzer;
|
||||||
|
import org.apache.lucene.codecs.Codec;
|
||||||
|
import org.apache.lucene.document.Document;
|
||||||
|
import org.apache.lucene.document.Field;
|
||||||
|
import org.apache.lucene.document.LegacyIntField;
|
||||||
|
import org.apache.lucene.document.LegacyLongField;
|
||||||
|
import org.apache.lucene.document.StringField;
|
||||||
|
import org.apache.lucene.index.DirectoryReader;
|
||||||
|
import org.apache.lucene.index.DocValues;
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.index.IndexWriter;
|
||||||
|
import org.apache.lucene.index.IndexWriterConfig;
|
||||||
|
import org.apache.lucene.index.LeafReader;
|
||||||
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
|
import org.apache.lucene.index.MultiFields;
|
||||||
|
import org.apache.lucene.index.NumericDocValues;
|
||||||
|
import org.apache.lucene.index.RandomIndexWriter;
|
||||||
|
import org.apache.lucene.index.SortedSetDocValues;
|
||||||
|
import org.apache.lucene.index.Term;
|
||||||
|
import org.apache.lucene.index.Terms;
|
||||||
|
import org.apache.lucene.index.TermsEnum.SeekStatus;
|
||||||
|
import org.apache.lucene.index.TermsEnum;
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.LegacyNumericUtils;
|
||||||
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
import org.apache.lucene.util.StringHelper;
|
||||||
|
import org.apache.lucene.util.TestUtil;
|
||||||
|
import org.apache.solr.index.SlowCompositeReaderWrapper;
|
||||||
|
|
||||||
|
// TODO:
|
||||||
|
// - test w/ del docs
|
||||||
|
// - test prefix
|
||||||
|
// - test w/ cutoff
|
||||||
|
// - crank docs way up so we get some merging sometimes
|
||||||
|
|
||||||
|
public class TestDocTermOrds extends LuceneTestCase {
|
||||||
|
|
||||||
|
public void testEmptyIndex() throws IOException {
|
||||||
|
final Directory dir = newDirectory();
|
||||||
|
final IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
|
||||||
|
iw.close();
|
||||||
|
|
||||||
|
final DirectoryReader ir = DirectoryReader.open(dir);
|
||||||
|
TestUtil.checkReader(ir);
|
||||||
|
|
||||||
|
final LeafReader composite = SlowCompositeReaderWrapper.wrap(ir);
|
||||||
|
TestUtil.checkReader(composite);
|
||||||
|
|
||||||
|
// check the leaves
|
||||||
|
// (normally there are none for an empty index, so this is really just future
|
||||||
|
// proofing in case that changes for some reason)
|
||||||
|
for (LeafReaderContext rc : ir.leaves()) {
|
||||||
|
final LeafReader r = rc.reader();
|
||||||
|
final DocTermOrds dto = new DocTermOrds(r, r.getLiveDocs(), "any_field");
|
||||||
|
assertNull("OrdTermsEnum should be null (leaf)", dto.getOrdTermsEnum(r));
|
||||||
|
assertEquals("iterator should be empty (leaf)", 0, dto.iterator(r).getValueCount());
|
||||||
|
}
|
||||||
|
|
||||||
|
// check the composite
|
||||||
|
final DocTermOrds dto = new DocTermOrds(composite, composite.getLiveDocs(), "any_field");
|
||||||
|
assertNull("OrdTermsEnum should be null (composite)", dto.getOrdTermsEnum(composite));
|
||||||
|
assertEquals("iterator should be empty (composite)", 0, dto.iterator(composite).getValueCount());
|
||||||
|
|
||||||
|
ir.close();
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testSimple() throws Exception {
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
final RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy()));
|
||||||
|
Document doc = new Document();
|
||||||
|
Field field = newTextField("field", "", Field.Store.NO);
|
||||||
|
doc.add(field);
|
||||||
|
field.setStringValue("a b c");
|
||||||
|
w.addDocument(doc);
|
||||||
|
|
||||||
|
field.setStringValue("d e f");
|
||||||
|
w.addDocument(doc);
|
||||||
|
|
||||||
|
field.setStringValue("a f");
|
||||||
|
w.addDocument(doc);
|
||||||
|
|
||||||
|
final IndexReader r = w.getReader();
|
||||||
|
w.close();
|
||||||
|
|
||||||
|
final LeafReader ar = SlowCompositeReaderWrapper.wrap(r);
|
||||||
|
TestUtil.checkReader(ar);
|
||||||
|
final DocTermOrds dto = new DocTermOrds(ar, ar.getLiveDocs(), "field");
|
||||||
|
SortedSetDocValues iter = dto.iterator(ar);
|
||||||
|
|
||||||
|
iter.setDocument(0);
|
||||||
|
assertEquals(0, iter.nextOrd());
|
||||||
|
assertEquals(1, iter.nextOrd());
|
||||||
|
assertEquals(2, iter.nextOrd());
|
||||||
|
assertEquals(SortedSetDocValues.NO_MORE_ORDS, iter.nextOrd());
|
||||||
|
|
||||||
|
iter.setDocument(1);
|
||||||
|
assertEquals(3, iter.nextOrd());
|
||||||
|
assertEquals(4, iter.nextOrd());
|
||||||
|
assertEquals(5, iter.nextOrd());
|
||||||
|
assertEquals(SortedSetDocValues.NO_MORE_ORDS, iter.nextOrd());
|
||||||
|
|
||||||
|
iter.setDocument(2);
|
||||||
|
assertEquals(0, iter.nextOrd());
|
||||||
|
assertEquals(5, iter.nextOrd());
|
||||||
|
assertEquals(SortedSetDocValues.NO_MORE_ORDS, iter.nextOrd());
|
||||||
|
|
||||||
|
r.close();
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testRandom() throws Exception {
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
|
||||||
|
final int NUM_TERMS = atLeast(20);
|
||||||
|
final Set<BytesRef> terms = new HashSet<>();
|
||||||
|
while(terms.size() < NUM_TERMS) {
|
||||||
|
final String s = TestUtil.randomRealisticUnicodeString(random());
|
||||||
|
//final String s = _TestUtil.randomSimpleString(random);
|
||||||
|
if (s.length() > 0) {
|
||||||
|
terms.add(new BytesRef(s));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
final BytesRef[] termsArray = terms.toArray(new BytesRef[terms.size()]);
|
||||||
|
Arrays.sort(termsArray);
|
||||||
|
|
||||||
|
final int NUM_DOCS = atLeast(100);
|
||||||
|
|
||||||
|
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
|
||||||
|
|
||||||
|
// Sometimes swap in codec that impls ord():
|
||||||
|
if (random().nextInt(10) == 7) {
|
||||||
|
// Make sure terms index has ords:
|
||||||
|
Codec codec = TestUtil.alwaysPostingsFormat(TestUtil.getPostingsFormatWithOrds(random()));
|
||||||
|
conf.setCodec(codec);
|
||||||
|
}
|
||||||
|
|
||||||
|
final RandomIndexWriter w = new RandomIndexWriter(random(), dir, conf);
|
||||||
|
|
||||||
|
final int[][] idToOrds = new int[NUM_DOCS][];
|
||||||
|
final Set<Integer> ordsForDocSet = new HashSet<>();
|
||||||
|
|
||||||
|
for(int id=0;id<NUM_DOCS;id++) {
|
||||||
|
Document doc = new Document();
|
||||||
|
|
||||||
|
doc.add(new LegacyIntField("id", id, Field.Store.YES));
|
||||||
|
|
||||||
|
final int termCount = TestUtil.nextInt(random(), 0, 20 * RANDOM_MULTIPLIER);
|
||||||
|
while(ordsForDocSet.size() < termCount) {
|
||||||
|
ordsForDocSet.add(random().nextInt(termsArray.length));
|
||||||
|
}
|
||||||
|
final int[] ordsForDoc = new int[termCount];
|
||||||
|
int upto = 0;
|
||||||
|
if (VERBOSE) {
|
||||||
|
System.out.println("TEST: doc id=" + id);
|
||||||
|
}
|
||||||
|
for(int ord : ordsForDocSet) {
|
||||||
|
ordsForDoc[upto++] = ord;
|
||||||
|
Field field = newStringField("field", termsArray[ord].utf8ToString(), Field.Store.NO);
|
||||||
|
if (VERBOSE) {
|
||||||
|
System.out.println(" f=" + termsArray[ord].utf8ToString());
|
||||||
|
}
|
||||||
|
doc.add(field);
|
||||||
|
}
|
||||||
|
ordsForDocSet.clear();
|
||||||
|
Arrays.sort(ordsForDoc);
|
||||||
|
idToOrds[id] = ordsForDoc;
|
||||||
|
w.addDocument(doc);
|
||||||
|
}
|
||||||
|
|
||||||
|
final DirectoryReader r = w.getReader();
|
||||||
|
w.close();
|
||||||
|
|
||||||
|
if (VERBOSE) {
|
||||||
|
System.out.println("TEST: reader=" + r);
|
||||||
|
}
|
||||||
|
|
||||||
|
for(LeafReaderContext ctx : r.leaves()) {
|
||||||
|
if (VERBOSE) {
|
||||||
|
System.out.println("\nTEST: sub=" + ctx.reader());
|
||||||
|
}
|
||||||
|
verify(ctx.reader(), idToOrds, termsArray, null);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Also test top-level reader: its enum does not support
|
||||||
|
// ord, so this forces the OrdWrapper to run:
|
||||||
|
if (VERBOSE) {
|
||||||
|
System.out.println("TEST: top reader");
|
||||||
|
}
|
||||||
|
LeafReader slowR = SlowCompositeReaderWrapper.wrap(r);
|
||||||
|
TestUtil.checkReader(slowR);
|
||||||
|
verify(slowR, idToOrds, termsArray, null);
|
||||||
|
|
||||||
|
FieldCache.DEFAULT.purgeByCacheKey(slowR.getCoreCacheKey());
|
||||||
|
|
||||||
|
r.close();
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testRandomWithPrefix() throws Exception {
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
|
||||||
|
final Set<String> prefixes = new HashSet<>();
|
||||||
|
final int numPrefix = TestUtil.nextInt(random(), 2, 7);
|
||||||
|
if (VERBOSE) {
|
||||||
|
System.out.println("TEST: use " + numPrefix + " prefixes");
|
||||||
|
}
|
||||||
|
while(prefixes.size() < numPrefix) {
|
||||||
|
prefixes.add(TestUtil.randomRealisticUnicodeString(random()));
|
||||||
|
//prefixes.add(_TestUtil.randomSimpleString(random));
|
||||||
|
}
|
||||||
|
final String[] prefixesArray = prefixes.toArray(new String[prefixes.size()]);
|
||||||
|
|
||||||
|
final int NUM_TERMS = atLeast(20);
|
||||||
|
final Set<BytesRef> terms = new HashSet<>();
|
||||||
|
while(terms.size() < NUM_TERMS) {
|
||||||
|
final String s = prefixesArray[random().nextInt(prefixesArray.length)] + TestUtil.randomRealisticUnicodeString(random());
|
||||||
|
//final String s = prefixesArray[random.nextInt(prefixesArray.length)] + _TestUtil.randomSimpleString(random);
|
||||||
|
if (s.length() > 0) {
|
||||||
|
terms.add(new BytesRef(s));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
final BytesRef[] termsArray = terms.toArray(new BytesRef[terms.size()]);
|
||||||
|
Arrays.sort(termsArray);
|
||||||
|
|
||||||
|
final int NUM_DOCS = atLeast(100);
|
||||||
|
|
||||||
|
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
|
||||||
|
|
||||||
|
// Sometimes swap in codec that impls ord():
|
||||||
|
if (random().nextInt(10) == 7) {
|
||||||
|
Codec codec = TestUtil.alwaysPostingsFormat(TestUtil.getPostingsFormatWithOrds(random()));
|
||||||
|
conf.setCodec(codec);
|
||||||
|
}
|
||||||
|
|
||||||
|
final RandomIndexWriter w = new RandomIndexWriter(random(), dir, conf);
|
||||||
|
|
||||||
|
final int[][] idToOrds = new int[NUM_DOCS][];
|
||||||
|
final Set<Integer> ordsForDocSet = new HashSet<>();
|
||||||
|
|
||||||
|
for(int id=0;id<NUM_DOCS;id++) {
|
||||||
|
Document doc = new Document();
|
||||||
|
|
||||||
|
doc.add(new LegacyIntField("id", id, Field.Store.YES));
|
||||||
|
|
||||||
|
final int termCount = TestUtil.nextInt(random(), 0, 20 * RANDOM_MULTIPLIER);
|
||||||
|
while(ordsForDocSet.size() < termCount) {
|
||||||
|
ordsForDocSet.add(random().nextInt(termsArray.length));
|
||||||
|
}
|
||||||
|
final int[] ordsForDoc = new int[termCount];
|
||||||
|
int upto = 0;
|
||||||
|
if (VERBOSE) {
|
||||||
|
System.out.println("TEST: doc id=" + id);
|
||||||
|
}
|
||||||
|
for(int ord : ordsForDocSet) {
|
||||||
|
ordsForDoc[upto++] = ord;
|
||||||
|
Field field = newStringField("field", termsArray[ord].utf8ToString(), Field.Store.NO);
|
||||||
|
if (VERBOSE) {
|
||||||
|
System.out.println(" f=" + termsArray[ord].utf8ToString());
|
||||||
|
}
|
||||||
|
doc.add(field);
|
||||||
|
}
|
||||||
|
ordsForDocSet.clear();
|
||||||
|
Arrays.sort(ordsForDoc);
|
||||||
|
idToOrds[id] = ordsForDoc;
|
||||||
|
w.addDocument(doc);
|
||||||
|
}
|
||||||
|
|
||||||
|
final DirectoryReader r = w.getReader();
|
||||||
|
w.close();
|
||||||
|
|
||||||
|
if (VERBOSE) {
|
||||||
|
System.out.println("TEST: reader=" + r);
|
||||||
|
}
|
||||||
|
|
||||||
|
LeafReader slowR = SlowCompositeReaderWrapper.wrap(r);
|
||||||
|
TestUtil.checkReader(slowR);
|
||||||
|
for(String prefix : prefixesArray) {
|
||||||
|
|
||||||
|
final BytesRef prefixRef = prefix == null ? null : new BytesRef(prefix);
|
||||||
|
|
||||||
|
final int[][] idToOrdsPrefix = new int[NUM_DOCS][];
|
||||||
|
for(int id=0;id<NUM_DOCS;id++) {
|
||||||
|
final int[] docOrds = idToOrds[id];
|
||||||
|
final List<Integer> newOrds = new ArrayList<>();
|
||||||
|
for(int ord : idToOrds[id]) {
|
||||||
|
if (StringHelper.startsWith(termsArray[ord], prefixRef)) {
|
||||||
|
newOrds.add(ord);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
final int[] newOrdsArray = new int[newOrds.size()];
|
||||||
|
int upto = 0;
|
||||||
|
for(int ord : newOrds) {
|
||||||
|
newOrdsArray[upto++] = ord;
|
||||||
|
}
|
||||||
|
idToOrdsPrefix[id] = newOrdsArray;
|
||||||
|
}
|
||||||
|
|
||||||
|
for(LeafReaderContext ctx : r.leaves()) {
|
||||||
|
if (VERBOSE) {
|
||||||
|
System.out.println("\nTEST: sub=" + ctx.reader());
|
||||||
|
}
|
||||||
|
verify(ctx.reader(), idToOrdsPrefix, termsArray, prefixRef);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Also test top-level reader: its enum does not support
|
||||||
|
// ord, so this forces the OrdWrapper to run:
|
||||||
|
if (VERBOSE) {
|
||||||
|
System.out.println("TEST: top reader");
|
||||||
|
}
|
||||||
|
verify(slowR, idToOrdsPrefix, termsArray, prefixRef);
|
||||||
|
}
|
||||||
|
|
||||||
|
FieldCache.DEFAULT.purgeByCacheKey(slowR.getCoreCacheKey());
|
||||||
|
|
||||||
|
r.close();
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void verify(LeafReader r, int[][] idToOrds, BytesRef[] termsArray, BytesRef prefixRef) throws Exception {
|
||||||
|
|
||||||
|
final DocTermOrds dto = new DocTermOrds(r, r.getLiveDocs(),
|
||||||
|
"field",
|
||||||
|
prefixRef,
|
||||||
|
Integer.MAX_VALUE,
|
||||||
|
TestUtil.nextInt(random(), 2, 10));
|
||||||
|
|
||||||
|
|
||||||
|
final NumericDocValues docIDToID = FieldCache.DEFAULT.getNumerics(r, "id", FieldCache.LEGACY_INT_PARSER, false);
|
||||||
|
/*
|
||||||
|
for(int docID=0;docID<subR.maxDoc();docID++) {
|
||||||
|
System.out.println(" docID=" + docID + " id=" + docIDToID[docID]);
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
|
if (VERBOSE) {
|
||||||
|
System.out.println("TEST: verify prefix=" + (prefixRef==null ? "null" : prefixRef.utf8ToString()));
|
||||||
|
System.out.println("TEST: all TERMS:");
|
||||||
|
TermsEnum allTE = MultiFields.getTerms(r, "field").iterator();
|
||||||
|
int ord = 0;
|
||||||
|
while(allTE.next() != null) {
|
||||||
|
System.out.println(" ord=" + (ord++) + " term=" + allTE.term().utf8ToString());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//final TermsEnum te = subR.fields().terms("field").iterator();
|
||||||
|
final TermsEnum te = dto.getOrdTermsEnum(r);
|
||||||
|
if (dto.numTerms() == 0) {
|
||||||
|
if (prefixRef == null) {
|
||||||
|
assertNull(MultiFields.getTerms(r, "field"));
|
||||||
|
} else {
|
||||||
|
Terms terms = MultiFields.getTerms(r, "field");
|
||||||
|
if (terms != null) {
|
||||||
|
TermsEnum termsEnum = terms.iterator();
|
||||||
|
TermsEnum.SeekStatus result = termsEnum.seekCeil(prefixRef);
|
||||||
|
if (result != TermsEnum.SeekStatus.END) {
|
||||||
|
assertFalse("term=" + termsEnum.term().utf8ToString() + " matches prefix=" + prefixRef.utf8ToString(), StringHelper.startsWith(termsEnum.term(), prefixRef));
|
||||||
|
} else {
|
||||||
|
// ok
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// ok
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (VERBOSE) {
|
||||||
|
System.out.println("TEST: TERMS:");
|
||||||
|
te.seekExact(0);
|
||||||
|
while(true) {
|
||||||
|
System.out.println(" ord=" + te.ord() + " term=" + te.term().utf8ToString());
|
||||||
|
if (te.next() == null) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
SortedSetDocValues iter = dto.iterator(r);
|
||||||
|
for(int docID=0;docID<r.maxDoc();docID++) {
|
||||||
|
if (VERBOSE) {
|
||||||
|
System.out.println("TEST: docID=" + docID + " of " + r.maxDoc() + " (id=" + docIDToID.get(docID) + ")");
|
||||||
|
}
|
||||||
|
iter.setDocument(docID);
|
||||||
|
final int[] answers = idToOrds[(int) docIDToID.get(docID)];
|
||||||
|
int upto = 0;
|
||||||
|
long ord;
|
||||||
|
while ((ord = iter.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
|
||||||
|
te.seekExact(ord);
|
||||||
|
final BytesRef expected = termsArray[answers[upto++]];
|
||||||
|
if (VERBOSE) {
|
||||||
|
System.out.println(" exp=" + expected.utf8ToString() + " actual=" + te.term().utf8ToString());
|
||||||
|
}
|
||||||
|
assertEquals("expected=" + expected.utf8ToString() + " actual=" + te.term().utf8ToString() + " ord=" + ord, expected, te.term());
|
||||||
|
}
|
||||||
|
assertEquals(answers.length, upto);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testBackToTheFuture() throws Exception {
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
|
||||||
|
|
||||||
|
Document doc = new Document();
|
||||||
|
doc.add(newStringField("foo", "bar", Field.Store.NO));
|
||||||
|
iw.addDocument(doc);
|
||||||
|
|
||||||
|
doc = new Document();
|
||||||
|
doc.add(newStringField("foo", "baz", Field.Store.NO));
|
||||||
|
// we need a second value for a doc, or we don't actually test DocTermOrds!
|
||||||
|
doc.add(newStringField("foo", "car", Field.Store.NO));
|
||||||
|
iw.addDocument(doc);
|
||||||
|
|
||||||
|
DirectoryReader r1 = DirectoryReader.open(iw);
|
||||||
|
|
||||||
|
iw.deleteDocuments(new Term("foo", "baz"));
|
||||||
|
DirectoryReader r2 = DirectoryReader.open(iw);
|
||||||
|
|
||||||
|
FieldCache.DEFAULT.getDocTermOrds(getOnlyLeafReader(r2), "foo", null);
|
||||||
|
|
||||||
|
SortedSetDocValues v = FieldCache.DEFAULT.getDocTermOrds(getOnlyLeafReader(r1), "foo", null);
|
||||||
|
assertEquals(3, v.getValueCount());
|
||||||
|
v.setDocument(1);
|
||||||
|
assertEquals(1, v.nextOrd());
|
||||||
|
|
||||||
|
iw.close();
|
||||||
|
r1.close();
|
||||||
|
r2.close();
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testNumericEncoded32() throws IOException {
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
|
||||||
|
|
||||||
|
Document doc = new Document();
|
||||||
|
doc.add(new LegacyIntField("foo", 5, Field.Store.NO));
|
||||||
|
iw.addDocument(doc);
|
||||||
|
|
||||||
|
doc = new Document();
|
||||||
|
doc.add(new LegacyIntField("foo", 5, Field.Store.NO));
|
||||||
|
doc.add(new LegacyIntField("foo", -3, Field.Store.NO));
|
||||||
|
iw.addDocument(doc);
|
||||||
|
|
||||||
|
iw.forceMerge(1);
|
||||||
|
iw.close();
|
||||||
|
|
||||||
|
DirectoryReader ir = DirectoryReader.open(dir);
|
||||||
|
LeafReader ar = getOnlyLeafReader(ir);
|
||||||
|
|
||||||
|
SortedSetDocValues v = FieldCache.DEFAULT.getDocTermOrds(ar, "foo", FieldCache.INT32_TERM_PREFIX);
|
||||||
|
assertEquals(2, v.getValueCount());
|
||||||
|
|
||||||
|
v.setDocument(0);
|
||||||
|
assertEquals(1, v.nextOrd());
|
||||||
|
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
|
||||||
|
|
||||||
|
v.setDocument(1);
|
||||||
|
assertEquals(0, v.nextOrd());
|
||||||
|
assertEquals(1, v.nextOrd());
|
||||||
|
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
|
||||||
|
|
||||||
|
BytesRef value = v.lookupOrd(0);
|
||||||
|
assertEquals(-3, LegacyNumericUtils.prefixCodedToInt(value));
|
||||||
|
|
||||||
|
value = v.lookupOrd(1);
|
||||||
|
assertEquals(5, LegacyNumericUtils.prefixCodedToInt(value));
|
||||||
|
|
||||||
|
ir.close();
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testNumericEncoded64() throws IOException {
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
|
||||||
|
|
||||||
|
Document doc = new Document();
|
||||||
|
doc.add(new LegacyLongField("foo", 5, Field.Store.NO));
|
||||||
|
iw.addDocument(doc);
|
||||||
|
|
||||||
|
doc = new Document();
|
||||||
|
doc.add(new LegacyLongField("foo", 5, Field.Store.NO));
|
||||||
|
doc.add(new LegacyLongField("foo", -3, Field.Store.NO));
|
||||||
|
iw.addDocument(doc);
|
||||||
|
|
||||||
|
iw.forceMerge(1);
|
||||||
|
iw.close();
|
||||||
|
|
||||||
|
DirectoryReader ir = DirectoryReader.open(dir);
|
||||||
|
LeafReader ar = getOnlyLeafReader(ir);
|
||||||
|
|
||||||
|
SortedSetDocValues v = FieldCache.DEFAULT.getDocTermOrds(ar, "foo", FieldCache.INT64_TERM_PREFIX);
|
||||||
|
assertEquals(2, v.getValueCount());
|
||||||
|
|
||||||
|
v.setDocument(0);
|
||||||
|
assertEquals(1, v.nextOrd());
|
||||||
|
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
|
||||||
|
|
||||||
|
v.setDocument(1);
|
||||||
|
assertEquals(0, v.nextOrd());
|
||||||
|
assertEquals(1, v.nextOrd());
|
||||||
|
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
|
||||||
|
|
||||||
|
BytesRef value = v.lookupOrd(0);
|
||||||
|
assertEquals(-3, LegacyNumericUtils.prefixCodedToLong(value));
|
||||||
|
|
||||||
|
value = v.lookupOrd(1);
|
||||||
|
assertEquals(5, LegacyNumericUtils.prefixCodedToLong(value));
|
||||||
|
|
||||||
|
ir.close();
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testSortedTermsEnum() throws IOException {
|
||||||
|
Directory directory = newDirectory();
|
||||||
|
Analyzer analyzer = new MockAnalyzer(random());
|
||||||
|
IndexWriterConfig iwconfig = newIndexWriterConfig(analyzer);
|
||||||
|
iwconfig.setMergePolicy(newLogMergePolicy());
|
||||||
|
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
|
||||||
|
|
||||||
|
Document doc = new Document();
|
||||||
|
doc.add(new StringField("field", "hello", Field.Store.NO));
|
||||||
|
iwriter.addDocument(doc);
|
||||||
|
|
||||||
|
doc = new Document();
|
||||||
|
doc.add(new StringField("field", "world", Field.Store.NO));
|
||||||
|
// we need a second value for a doc, or we don't actually test DocTermOrds!
|
||||||
|
doc.add(new StringField("field", "hello", Field.Store.NO));
|
||||||
|
iwriter.addDocument(doc);
|
||||||
|
|
||||||
|
doc = new Document();
|
||||||
|
doc.add(new StringField("field", "beer", Field.Store.NO));
|
||||||
|
iwriter.addDocument(doc);
|
||||||
|
iwriter.forceMerge(1);
|
||||||
|
|
||||||
|
DirectoryReader ireader = iwriter.getReader();
|
||||||
|
iwriter.close();
|
||||||
|
|
||||||
|
LeafReader ar = getOnlyLeafReader(ireader);
|
||||||
|
SortedSetDocValues dv = FieldCache.DEFAULT.getDocTermOrds(ar, "field", null);
|
||||||
|
assertEquals(3, dv.getValueCount());
|
||||||
|
|
||||||
|
TermsEnum termsEnum = dv.termsEnum();
|
||||||
|
|
||||||
|
// next()
|
||||||
|
assertEquals("beer", termsEnum.next().utf8ToString());
|
||||||
|
assertEquals(0, termsEnum.ord());
|
||||||
|
assertEquals("hello", termsEnum.next().utf8ToString());
|
||||||
|
assertEquals(1, termsEnum.ord());
|
||||||
|
assertEquals("world", termsEnum.next().utf8ToString());
|
||||||
|
assertEquals(2, termsEnum.ord());
|
||||||
|
|
||||||
|
// seekCeil()
|
||||||
|
assertEquals(SeekStatus.NOT_FOUND, termsEnum.seekCeil(new BytesRef("ha!")));
|
||||||
|
assertEquals("hello", termsEnum.term().utf8ToString());
|
||||||
|
assertEquals(1, termsEnum.ord());
|
||||||
|
assertEquals(SeekStatus.FOUND, termsEnum.seekCeil(new BytesRef("beer")));
|
||||||
|
assertEquals("beer", termsEnum.term().utf8ToString());
|
||||||
|
assertEquals(0, termsEnum.ord());
|
||||||
|
assertEquals(SeekStatus.END, termsEnum.seekCeil(new BytesRef("zzz")));
|
||||||
|
|
||||||
|
// seekExact()
|
||||||
|
assertTrue(termsEnum.seekExact(new BytesRef("beer")));
|
||||||
|
assertEquals("beer", termsEnum.term().utf8ToString());
|
||||||
|
assertEquals(0, termsEnum.ord());
|
||||||
|
assertTrue(termsEnum.seekExact(new BytesRef("hello")));
|
||||||
|
assertEquals("hello", termsEnum.term().utf8ToString());
|
||||||
|
assertEquals(1, termsEnum.ord());
|
||||||
|
assertTrue(termsEnum.seekExact(new BytesRef("world")));
|
||||||
|
assertEquals("world", termsEnum.term().utf8ToString());
|
||||||
|
assertEquals(2, termsEnum.ord());
|
||||||
|
assertFalse(termsEnum.seekExact(new BytesRef("bogus")));
|
||||||
|
|
||||||
|
// seek(ord)
|
||||||
|
termsEnum.seekExact(0);
|
||||||
|
assertEquals("beer", termsEnum.term().utf8ToString());
|
||||||
|
assertEquals(0, termsEnum.ord());
|
||||||
|
termsEnum.seekExact(1);
|
||||||
|
assertEquals("hello", termsEnum.term().utf8ToString());
|
||||||
|
assertEquals(1, termsEnum.ord());
|
||||||
|
termsEnum.seekExact(2);
|
||||||
|
assertEquals("world", termsEnum.term().utf8ToString());
|
||||||
|
assertEquals(2, termsEnum.ord());
|
||||||
|
|
||||||
|
// lookupTerm(BytesRef)
|
||||||
|
assertEquals(-1, dv.lookupTerm(new BytesRef("apple")));
|
||||||
|
assertEquals(0, dv.lookupTerm(new BytesRef("beer")));
|
||||||
|
assertEquals(-2, dv.lookupTerm(new BytesRef("car")));
|
||||||
|
assertEquals(1, dv.lookupTerm(new BytesRef("hello")));
|
||||||
|
assertEquals(-3, dv.lookupTerm(new BytesRef("matter")));
|
||||||
|
assertEquals(2, dv.lookupTerm(new BytesRef("world")));
|
||||||
|
assertEquals(-4, dv.lookupTerm(new BytesRef("zany")));
|
||||||
|
|
||||||
|
ireader.close();
|
||||||
|
directory.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testActuallySingleValued() throws IOException {
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
IndexWriterConfig iwconfig = newIndexWriterConfig(null);
|
||||||
|
iwconfig.setMergePolicy(newLogMergePolicy());
|
||||||
|
IndexWriter iw = new IndexWriter(dir, iwconfig);
|
||||||
|
|
||||||
|
Document doc = new Document();
|
||||||
|
doc.add(new StringField("foo", "bar", Field.Store.NO));
|
||||||
|
iw.addDocument(doc);
|
||||||
|
|
||||||
|
doc = new Document();
|
||||||
|
doc.add(new StringField("foo", "baz", Field.Store.NO));
|
||||||
|
iw.addDocument(doc);
|
||||||
|
|
||||||
|
doc = new Document();
|
||||||
|
iw.addDocument(doc);
|
||||||
|
|
||||||
|
doc = new Document();
|
||||||
|
doc.add(new StringField("foo", "baz", Field.Store.NO));
|
||||||
|
doc.add(new StringField("foo", "baz", Field.Store.NO));
|
||||||
|
iw.addDocument(doc);
|
||||||
|
|
||||||
|
iw.forceMerge(1);
|
||||||
|
iw.close();
|
||||||
|
|
||||||
|
DirectoryReader ir = DirectoryReader.open(dir);
|
||||||
|
LeafReader ar = getOnlyLeafReader(ir);
|
||||||
|
|
||||||
|
SortedSetDocValues v = FieldCache.DEFAULT.getDocTermOrds(ar, "foo", null);
|
||||||
|
assertNotNull(DocValues.unwrapSingleton(v)); // actually a single-valued field
|
||||||
|
assertEquals(2, v.getValueCount());
|
||||||
|
|
||||||
|
v.setDocument(0);
|
||||||
|
assertEquals(0, v.nextOrd());
|
||||||
|
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
|
||||||
|
|
||||||
|
v.setDocument(1);
|
||||||
|
assertEquals(1, v.nextOrd());
|
||||||
|
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
|
||||||
|
|
||||||
|
v.setDocument(2);
|
||||||
|
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
|
||||||
|
|
||||||
|
v.setDocument(3);
|
||||||
|
assertEquals(1, v.nextOrd());
|
||||||
|
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
|
||||||
|
|
||||||
|
BytesRef value = v.lookupOrd(0);
|
||||||
|
assertEquals("bar", value.utf8ToString());
|
||||||
|
|
||||||
|
value = v.lookupOrd(1);
|
||||||
|
assertEquals("baz", value.utf8ToString());
|
||||||
|
|
||||||
|
ir.close();
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,731 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.solr.uninverting;
|
||||||
|
|
||||||
|
import java.io.ByteArrayOutputStream;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.PrintStream;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.LinkedHashSet;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.concurrent.CyclicBarrier;
|
||||||
|
import java.util.concurrent.atomic.AtomicBoolean;
|
||||||
|
import java.util.concurrent.atomic.AtomicInteger;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.MockAnalyzer;
|
||||||
|
import org.apache.lucene.document.BinaryDocValuesField;
|
||||||
|
import org.apache.lucene.document.Document;
|
||||||
|
import org.apache.lucene.document.DoublePoint;
|
||||||
|
import org.apache.lucene.document.Field.Store;
|
||||||
|
import org.apache.lucene.document.Field;
|
||||||
|
import org.apache.lucene.document.FloatPoint;
|
||||||
|
import org.apache.lucene.document.IntPoint;
|
||||||
|
import org.apache.lucene.document.LegacyDoubleField;
|
||||||
|
import org.apache.lucene.document.LegacyFloatField;
|
||||||
|
import org.apache.lucene.document.LegacyIntField;
|
||||||
|
import org.apache.lucene.document.LegacyLongField;
|
||||||
|
import org.apache.lucene.document.LongPoint;
|
||||||
|
import org.apache.lucene.document.NumericDocValuesField;
|
||||||
|
import org.apache.lucene.document.SortedDocValuesField;
|
||||||
|
import org.apache.lucene.document.SortedSetDocValuesField;
|
||||||
|
import org.apache.lucene.document.StoredField;
|
||||||
|
import org.apache.lucene.index.BinaryDocValues;
|
||||||
|
import org.apache.lucene.index.DirectoryReader;
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.index.IndexWriter;
|
||||||
|
import org.apache.lucene.index.IndexWriterConfig;
|
||||||
|
import org.apache.lucene.index.LeafReader;
|
||||||
|
import org.apache.lucene.index.LogDocMergePolicy;
|
||||||
|
import org.apache.lucene.index.NumericDocValues;
|
||||||
|
import org.apache.lucene.index.RandomIndexWriter;
|
||||||
|
import org.apache.lucene.index.SortedDocValues;
|
||||||
|
import org.apache.lucene.index.SortedSetDocValues;
|
||||||
|
import org.apache.lucene.index.Terms;
|
||||||
|
import org.apache.lucene.index.TermsEnum;
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.util.Bits;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.IOUtils;
|
||||||
|
import org.apache.lucene.util.LegacyNumericUtils;
|
||||||
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
import org.apache.lucene.util.TestUtil;
|
||||||
|
import org.apache.solr.index.SlowCompositeReaderWrapper;
|
||||||
|
import org.junit.AfterClass;
|
||||||
|
import org.junit.BeforeClass;
|
||||||
|
|
||||||
|
public class TestFieldCache extends LuceneTestCase {
|
||||||
|
private static LeafReader reader;
|
||||||
|
private static int NUM_DOCS;
|
||||||
|
private static int NUM_ORDS;
|
||||||
|
private static String[] unicodeStrings;
|
||||||
|
private static BytesRef[][] multiValued;
|
||||||
|
private static Directory directory;
|
||||||
|
|
||||||
|
@BeforeClass
|
||||||
|
public static void beforeClass() throws Exception {
|
||||||
|
NUM_DOCS = atLeast(500);
|
||||||
|
NUM_ORDS = atLeast(2);
|
||||||
|
directory = newDirectory();
|
||||||
|
IndexWriter writer= new IndexWriter(directory, new IndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(new LogDocMergePolicy()));
|
||||||
|
long theLong = Long.MAX_VALUE;
|
||||||
|
double theDouble = Double.MAX_VALUE;
|
||||||
|
int theInt = Integer.MAX_VALUE;
|
||||||
|
float theFloat = Float.MAX_VALUE;
|
||||||
|
unicodeStrings = new String[NUM_DOCS];
|
||||||
|
multiValued = new BytesRef[NUM_DOCS][NUM_ORDS];
|
||||||
|
if (VERBOSE) {
|
||||||
|
System.out.println("TEST: setUp");
|
||||||
|
}
|
||||||
|
for (int i = 0; i < NUM_DOCS; i++){
|
||||||
|
Document doc = new Document();
|
||||||
|
doc.add(new LongPoint("theLong", theLong--));
|
||||||
|
doc.add(new DoublePoint("theDouble", theDouble--));
|
||||||
|
doc.add(new IntPoint("theInt", theInt--));
|
||||||
|
doc.add(new FloatPoint("theFloat", theFloat--));
|
||||||
|
if (i%2 == 0) {
|
||||||
|
doc.add(new IntPoint("sparse", i));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (i%2 == 0) {
|
||||||
|
doc.add(new IntPoint("numInt", i));
|
||||||
|
}
|
||||||
|
|
||||||
|
// sometimes skip the field:
|
||||||
|
if (random().nextInt(40) != 17) {
|
||||||
|
unicodeStrings[i] = generateString(i);
|
||||||
|
doc.add(newStringField("theRandomUnicodeString", unicodeStrings[i], Field.Store.YES));
|
||||||
|
}
|
||||||
|
|
||||||
|
// sometimes skip the field:
|
||||||
|
if (random().nextInt(10) != 8) {
|
||||||
|
for (int j = 0; j < NUM_ORDS; j++) {
|
||||||
|
String newValue = generateString(i);
|
||||||
|
multiValued[i][j] = new BytesRef(newValue);
|
||||||
|
doc.add(newStringField("theRandomUnicodeMultiValuedField", newValue, Field.Store.YES));
|
||||||
|
}
|
||||||
|
Arrays.sort(multiValued[i]);
|
||||||
|
}
|
||||||
|
writer.addDocument(doc);
|
||||||
|
}
|
||||||
|
writer.forceMerge(1); // this test relies on one segment and docid order
|
||||||
|
IndexReader r = DirectoryReader.open(writer);
|
||||||
|
assertEquals(1, r.leaves().size());
|
||||||
|
reader = r.leaves().get(0).reader();
|
||||||
|
TestUtil.checkReader(reader);
|
||||||
|
writer.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
@AfterClass
|
||||||
|
public static void afterClass() throws Exception {
|
||||||
|
reader.close();
|
||||||
|
reader = null;
|
||||||
|
directory.close();
|
||||||
|
directory = null;
|
||||||
|
unicodeStrings = null;
|
||||||
|
multiValued = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void test() throws IOException {
|
||||||
|
FieldCache cache = FieldCache.DEFAULT;
|
||||||
|
NumericDocValues doubles = cache.getNumerics(reader, "theDouble", FieldCache.DOUBLE_POINT_PARSER, random().nextBoolean());
|
||||||
|
assertSame("Second request to cache return same array", doubles, cache.getNumerics(reader, "theDouble", FieldCache.DOUBLE_POINT_PARSER, random().nextBoolean()));
|
||||||
|
for (int i = 0; i < NUM_DOCS; i++) {
|
||||||
|
assertEquals(Double.doubleToLongBits(Double.MAX_VALUE - i), doubles.get(i));
|
||||||
|
}
|
||||||
|
|
||||||
|
NumericDocValues longs = cache.getNumerics(reader, "theLong", FieldCache.LONG_POINT_PARSER, random().nextBoolean());
|
||||||
|
assertSame("Second request to cache return same array", longs, cache.getNumerics(reader, "theLong", FieldCache.LONG_POINT_PARSER, random().nextBoolean()));
|
||||||
|
for (int i = 0; i < NUM_DOCS; i++) {
|
||||||
|
assertEquals(Long.MAX_VALUE - i, longs.get(i));
|
||||||
|
}
|
||||||
|
|
||||||
|
NumericDocValues ints = cache.getNumerics(reader, "theInt", FieldCache.INT_POINT_PARSER, random().nextBoolean());
|
||||||
|
assertSame("Second request to cache return same array", ints, cache.getNumerics(reader, "theInt", FieldCache.INT_POINT_PARSER, random().nextBoolean()));
|
||||||
|
for (int i = 0; i < NUM_DOCS; i++) {
|
||||||
|
assertEquals(Integer.MAX_VALUE - i, ints.get(i));
|
||||||
|
}
|
||||||
|
|
||||||
|
NumericDocValues floats = cache.getNumerics(reader, "theFloat", FieldCache.FLOAT_POINT_PARSER, random().nextBoolean());
|
||||||
|
assertSame("Second request to cache return same array", floats, cache.getNumerics(reader, "theFloat", FieldCache.FLOAT_POINT_PARSER, random().nextBoolean()));
|
||||||
|
for (int i = 0; i < NUM_DOCS; i++) {
|
||||||
|
assertEquals(Float.floatToIntBits(Float.MAX_VALUE - i), floats.get(i));
|
||||||
|
}
|
||||||
|
|
||||||
|
Bits docsWithField = cache.getDocsWithField(reader, "theLong", FieldCache.LONG_POINT_PARSER);
|
||||||
|
assertSame("Second request to cache return same array", docsWithField, cache.getDocsWithField(reader, "theLong", FieldCache.LONG_POINT_PARSER));
|
||||||
|
assertTrue("docsWithField(theLong) must be class Bits.MatchAllBits", docsWithField instanceof Bits.MatchAllBits);
|
||||||
|
assertTrue("docsWithField(theLong) Size: " + docsWithField.length() + " is not: " + NUM_DOCS, docsWithField.length() == NUM_DOCS);
|
||||||
|
for (int i = 0; i < docsWithField.length(); i++) {
|
||||||
|
assertTrue(docsWithField.get(i));
|
||||||
|
}
|
||||||
|
|
||||||
|
docsWithField = cache.getDocsWithField(reader, "sparse", FieldCache.INT_POINT_PARSER);
|
||||||
|
assertSame("Second request to cache return same array", docsWithField, cache.getDocsWithField(reader, "sparse", FieldCache.INT_POINT_PARSER));
|
||||||
|
assertFalse("docsWithField(sparse) must not be class Bits.MatchAllBits", docsWithField instanceof Bits.MatchAllBits);
|
||||||
|
assertTrue("docsWithField(sparse) Size: " + docsWithField.length() + " is not: " + NUM_DOCS, docsWithField.length() == NUM_DOCS);
|
||||||
|
for (int i = 0; i < docsWithField.length(); i++) {
|
||||||
|
assertEquals(i%2 == 0, docsWithField.get(i));
|
||||||
|
}
|
||||||
|
|
||||||
|
// getTermsIndex
|
||||||
|
SortedDocValues termsIndex = cache.getTermsIndex(reader, "theRandomUnicodeString");
|
||||||
|
for (int i = 0; i < NUM_DOCS; i++) {
|
||||||
|
final String s;
|
||||||
|
final int ord = termsIndex.getOrd(i);
|
||||||
|
if (ord == -1) {
|
||||||
|
s = null;
|
||||||
|
} else {
|
||||||
|
s = termsIndex.lookupOrd(ord).utf8ToString();
|
||||||
|
}
|
||||||
|
assertTrue("for doc " + i + ": " + s + " does not equal: " + unicodeStrings[i], unicodeStrings[i] == null || unicodeStrings[i].equals(s));
|
||||||
|
}
|
||||||
|
|
||||||
|
int nTerms = termsIndex.getValueCount();
|
||||||
|
|
||||||
|
TermsEnum tenum = termsIndex.termsEnum();
|
||||||
|
for (int i=0; i<nTerms; i++) {
|
||||||
|
BytesRef val1 = BytesRef.deepCopyOf(tenum.next());
|
||||||
|
final BytesRef val = termsIndex.lookupOrd(i);
|
||||||
|
// System.out.println("i="+i);
|
||||||
|
assertEquals(val, val1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// seek the enum around (note this isn't a great test here)
|
||||||
|
int num = atLeast(100);
|
||||||
|
for (int i = 0; i < num; i++) {
|
||||||
|
int k = random().nextInt(nTerms);
|
||||||
|
final BytesRef val = BytesRef.deepCopyOf(termsIndex.lookupOrd(k));
|
||||||
|
assertEquals(TermsEnum.SeekStatus.FOUND, tenum.seekCeil(val));
|
||||||
|
assertEquals(val, tenum.term());
|
||||||
|
}
|
||||||
|
|
||||||
|
for(int i=0;i<nTerms;i++) {
|
||||||
|
final BytesRef val = BytesRef.deepCopyOf(termsIndex.lookupOrd(i));
|
||||||
|
assertEquals(TermsEnum.SeekStatus.FOUND, tenum.seekCeil(val));
|
||||||
|
assertEquals(val, tenum.term());
|
||||||
|
}
|
||||||
|
|
||||||
|
// test bad field
|
||||||
|
termsIndex = cache.getTermsIndex(reader, "bogusfield");
|
||||||
|
|
||||||
|
// getTerms
|
||||||
|
BinaryDocValues terms = cache.getTerms(reader, "theRandomUnicodeString", true);
|
||||||
|
Bits bits = cache.getDocsWithField(reader, "theRandomUnicodeString", null);
|
||||||
|
for (int i = 0; i < NUM_DOCS; i++) {
|
||||||
|
final String s;
|
||||||
|
if (!bits.get(i)) {
|
||||||
|
s = null;
|
||||||
|
} else {
|
||||||
|
s = terms.get(i).utf8ToString();
|
||||||
|
}
|
||||||
|
assertTrue("for doc " + i + ": " + s + " does not equal: " + unicodeStrings[i], unicodeStrings[i] == null || unicodeStrings[i].equals(s));
|
||||||
|
}
|
||||||
|
|
||||||
|
// test bad field
|
||||||
|
terms = cache.getTerms(reader, "bogusfield", false);
|
||||||
|
|
||||||
|
// getDocTermOrds
|
||||||
|
SortedSetDocValues termOrds = cache.getDocTermOrds(reader, "theRandomUnicodeMultiValuedField", null);
|
||||||
|
int numEntries = cache.getCacheEntries().length;
|
||||||
|
// ask for it again, and check that we didnt create any additional entries:
|
||||||
|
termOrds = cache.getDocTermOrds(reader, "theRandomUnicodeMultiValuedField", null);
|
||||||
|
assertEquals(numEntries, cache.getCacheEntries().length);
|
||||||
|
|
||||||
|
for (int i = 0; i < NUM_DOCS; i++) {
|
||||||
|
termOrds.setDocument(i);
|
||||||
|
// This will remove identical terms. A DocTermOrds doesn't return duplicate ords for a docId
|
||||||
|
List<BytesRef> values = new ArrayList<>(new LinkedHashSet<>(Arrays.asList(multiValued[i])));
|
||||||
|
for (BytesRef v : values) {
|
||||||
|
if (v == null) {
|
||||||
|
// why does this test use null values... instead of an empty list: confusing
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
long ord = termOrds.nextOrd();
|
||||||
|
assert ord != SortedSetDocValues.NO_MORE_ORDS;
|
||||||
|
BytesRef scratch = termOrds.lookupOrd(ord);
|
||||||
|
assertEquals(v, scratch);
|
||||||
|
}
|
||||||
|
assertEquals(SortedSetDocValues.NO_MORE_ORDS, termOrds.nextOrd());
|
||||||
|
}
|
||||||
|
|
||||||
|
// test bad field
|
||||||
|
termOrds = cache.getDocTermOrds(reader, "bogusfield", null);
|
||||||
|
assertTrue(termOrds.getValueCount() == 0);
|
||||||
|
|
||||||
|
FieldCache.DEFAULT.purgeByCacheKey(reader.getCoreCacheKey());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testEmptyIndex() throws Exception {
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
IndexWriter writer= new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())).setMaxBufferedDocs(500));
|
||||||
|
writer.close();
|
||||||
|
IndexReader r = DirectoryReader.open(dir);
|
||||||
|
LeafReader reader = SlowCompositeReaderWrapper.wrap(r);
|
||||||
|
TestUtil.checkReader(reader);
|
||||||
|
FieldCache.DEFAULT.getTerms(reader, "foobar", true);
|
||||||
|
FieldCache.DEFAULT.getTermsIndex(reader, "foobar");
|
||||||
|
FieldCache.DEFAULT.purgeByCacheKey(reader.getCoreCacheKey());
|
||||||
|
r.close();
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
private static String generateString(int i) {
|
||||||
|
String s = null;
|
||||||
|
if (i > 0 && random().nextInt(3) == 1) {
|
||||||
|
// reuse past string -- try to find one that's not null
|
||||||
|
for(int iter = 0; iter < 10 && s == null;iter++) {
|
||||||
|
s = unicodeStrings[random().nextInt(i)];
|
||||||
|
}
|
||||||
|
if (s == null) {
|
||||||
|
s = TestUtil.randomUnicodeString(random());
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
s = TestUtil.randomUnicodeString(random());
|
||||||
|
}
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testDocsWithField() throws Exception {
|
||||||
|
FieldCache cache = FieldCache.DEFAULT;
|
||||||
|
cache.purgeAllCaches();
|
||||||
|
assertEquals(0, cache.getCacheEntries().length);
|
||||||
|
cache.getNumerics(reader, "theDouble", FieldCache.DOUBLE_POINT_PARSER, true);
|
||||||
|
|
||||||
|
// The double[] takes one slots, and docsWithField should also
|
||||||
|
// have been populated:
|
||||||
|
assertEquals(2, cache.getCacheEntries().length);
|
||||||
|
Bits bits = cache.getDocsWithField(reader, "theDouble", FieldCache.DOUBLE_POINT_PARSER);
|
||||||
|
|
||||||
|
// No new entries should appear:
|
||||||
|
assertEquals(2, cache.getCacheEntries().length);
|
||||||
|
assertTrue(bits instanceof Bits.MatchAllBits);
|
||||||
|
|
||||||
|
NumericDocValues ints = cache.getNumerics(reader, "sparse", FieldCache.INT_POINT_PARSER, true);
|
||||||
|
assertEquals(4, cache.getCacheEntries().length);
|
||||||
|
Bits docsWithField = cache.getDocsWithField(reader, "sparse", FieldCache.INT_POINT_PARSER);
|
||||||
|
assertEquals(4, cache.getCacheEntries().length);
|
||||||
|
for (int i = 0; i < docsWithField.length(); i++) {
|
||||||
|
if (i%2 == 0) {
|
||||||
|
assertTrue(docsWithField.get(i));
|
||||||
|
assertEquals(i, ints.get(i));
|
||||||
|
} else {
|
||||||
|
assertFalse(docsWithField.get(i));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
NumericDocValues numInts = cache.getNumerics(reader, "numInt", FieldCache.INT_POINT_PARSER, random().nextBoolean());
|
||||||
|
docsWithField = cache.getDocsWithField(reader, "numInt", FieldCache.INT_POINT_PARSER);
|
||||||
|
for (int i = 0; i < docsWithField.length(); i++) {
|
||||||
|
if (i%2 == 0) {
|
||||||
|
assertTrue(docsWithField.get(i));
|
||||||
|
assertEquals(i, numInts.get(i));
|
||||||
|
} else {
|
||||||
|
assertFalse(docsWithField.get(i));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testGetDocsWithFieldThreadSafety() throws Exception {
|
||||||
|
final FieldCache cache = FieldCache.DEFAULT;
|
||||||
|
cache.purgeAllCaches();
|
||||||
|
|
||||||
|
int NUM_THREADS = 3;
|
||||||
|
Thread[] threads = new Thread[NUM_THREADS];
|
||||||
|
final AtomicBoolean failed = new AtomicBoolean();
|
||||||
|
final AtomicInteger iters = new AtomicInteger();
|
||||||
|
final int NUM_ITER = 200 * RANDOM_MULTIPLIER;
|
||||||
|
final CyclicBarrier restart = new CyclicBarrier(NUM_THREADS,
|
||||||
|
new Runnable() {
|
||||||
|
@Override
|
||||||
|
public void run() {
|
||||||
|
cache.purgeAllCaches();
|
||||||
|
iters.incrementAndGet();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
for(int threadIDX=0;threadIDX<NUM_THREADS;threadIDX++) {
|
||||||
|
threads[threadIDX] = new Thread() {
|
||||||
|
@Override
|
||||||
|
public void run() {
|
||||||
|
|
||||||
|
try {
|
||||||
|
while(!failed.get()) {
|
||||||
|
final int op = random().nextInt(3);
|
||||||
|
if (op == 0) {
|
||||||
|
// Purge all caches & resume, once all
|
||||||
|
// threads get here:
|
||||||
|
restart.await();
|
||||||
|
if (iters.get() >= NUM_ITER) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} else if (op == 1) {
|
||||||
|
Bits docsWithField = cache.getDocsWithField(reader, "sparse", FieldCache.INT_POINT_PARSER);
|
||||||
|
for (int i = 0; i < docsWithField.length(); i++) {
|
||||||
|
assertEquals(i%2 == 0, docsWithField.get(i));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
NumericDocValues ints = cache.getNumerics(reader, "sparse", FieldCache.INT_POINT_PARSER, true);
|
||||||
|
Bits docsWithField = cache.getDocsWithField(reader, "sparse", FieldCache.INT_POINT_PARSER);
|
||||||
|
for (int i = 0; i < docsWithField.length(); i++) {
|
||||||
|
if (i%2 == 0) {
|
||||||
|
assertTrue(docsWithField.get(i));
|
||||||
|
assertEquals(i, ints.get(i));
|
||||||
|
} else {
|
||||||
|
assertFalse(docsWithField.get(i));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (Throwable t) {
|
||||||
|
failed.set(true);
|
||||||
|
restart.reset();
|
||||||
|
throw new RuntimeException(t);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
threads[threadIDX].start();
|
||||||
|
}
|
||||||
|
|
||||||
|
for(int threadIDX=0;threadIDX<NUM_THREADS;threadIDX++) {
|
||||||
|
threads[threadIDX].join();
|
||||||
|
}
|
||||||
|
assertFalse(failed.get());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testDocValuesIntegration() throws Exception {
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
IndexWriterConfig iwc = newIndexWriterConfig(null);
|
||||||
|
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
|
||||||
|
Document doc = new Document();
|
||||||
|
doc.add(new BinaryDocValuesField("binary", new BytesRef("binary value")));
|
||||||
|
doc.add(new SortedDocValuesField("sorted", new BytesRef("sorted value")));
|
||||||
|
doc.add(new NumericDocValuesField("numeric", 42));
|
||||||
|
doc.add(new SortedSetDocValuesField("sortedset", new BytesRef("sortedset value1")));
|
||||||
|
doc.add(new SortedSetDocValuesField("sortedset", new BytesRef("sortedset value2")));
|
||||||
|
iw.addDocument(doc);
|
||||||
|
DirectoryReader ir = iw.getReader();
|
||||||
|
iw.close();
|
||||||
|
LeafReader ar = getOnlyLeafReader(ir);
|
||||||
|
|
||||||
|
// Binary type: can be retrieved via getTerms()
|
||||||
|
expectThrows(IllegalStateException.class, () -> {
|
||||||
|
FieldCache.DEFAULT.getNumerics(ar, "binary", FieldCache.INT_POINT_PARSER, false);
|
||||||
|
});
|
||||||
|
|
||||||
|
BinaryDocValues binary = FieldCache.DEFAULT.getTerms(ar, "binary", true);
|
||||||
|
final BytesRef term = binary.get(0);
|
||||||
|
assertEquals("binary value", term.utf8ToString());
|
||||||
|
|
||||||
|
expectThrows(IllegalStateException.class, () -> {
|
||||||
|
FieldCache.DEFAULT.getTermsIndex(ar, "binary");
|
||||||
|
});
|
||||||
|
|
||||||
|
expectThrows(IllegalStateException.class, () -> {
|
||||||
|
FieldCache.DEFAULT.getDocTermOrds(ar, "binary", null);
|
||||||
|
});
|
||||||
|
|
||||||
|
expectThrows(IllegalStateException.class, () -> {
|
||||||
|
new DocTermOrds(ar, null, "binary");
|
||||||
|
});
|
||||||
|
|
||||||
|
Bits bits = FieldCache.DEFAULT.getDocsWithField(ar, "binary", null);
|
||||||
|
assertTrue(bits.get(0));
|
||||||
|
|
||||||
|
// Sorted type: can be retrieved via getTerms(), getTermsIndex(), getDocTermOrds()
|
||||||
|
expectThrows(IllegalStateException.class, () -> {
|
||||||
|
FieldCache.DEFAULT.getNumerics(ar, "sorted", FieldCache.INT_POINT_PARSER, false);
|
||||||
|
});
|
||||||
|
|
||||||
|
expectThrows(IllegalStateException.class, () -> {
|
||||||
|
new DocTermOrds(ar, null, "sorted");
|
||||||
|
});
|
||||||
|
|
||||||
|
binary = FieldCache.DEFAULT.getTerms(ar, "sorted", true);
|
||||||
|
BytesRef scratch = binary.get(0);
|
||||||
|
assertEquals("sorted value", scratch.utf8ToString());
|
||||||
|
|
||||||
|
SortedDocValues sorted = FieldCache.DEFAULT.getTermsIndex(ar, "sorted");
|
||||||
|
assertEquals(0, sorted.getOrd(0));
|
||||||
|
assertEquals(1, sorted.getValueCount());
|
||||||
|
scratch = sorted.get(0);
|
||||||
|
assertEquals("sorted value", scratch.utf8ToString());
|
||||||
|
|
||||||
|
SortedSetDocValues sortedSet = FieldCache.DEFAULT.getDocTermOrds(ar, "sorted", null);
|
||||||
|
sortedSet.setDocument(0);
|
||||||
|
assertEquals(0, sortedSet.nextOrd());
|
||||||
|
assertEquals(SortedSetDocValues.NO_MORE_ORDS, sortedSet.nextOrd());
|
||||||
|
assertEquals(1, sortedSet.getValueCount());
|
||||||
|
|
||||||
|
bits = FieldCache.DEFAULT.getDocsWithField(ar, "sorted", null);
|
||||||
|
assertTrue(bits.get(0));
|
||||||
|
|
||||||
|
// Numeric type: can be retrieved via getInts() and so on
|
||||||
|
NumericDocValues numeric = FieldCache.DEFAULT.getNumerics(ar, "numeric", FieldCache.INT_POINT_PARSER, false);
|
||||||
|
assertEquals(42, numeric.get(0));
|
||||||
|
|
||||||
|
expectThrows(IllegalStateException.class, () -> {
|
||||||
|
FieldCache.DEFAULT.getTerms(ar, "numeric", true);
|
||||||
|
});
|
||||||
|
|
||||||
|
expectThrows(IllegalStateException.class, () -> {
|
||||||
|
FieldCache.DEFAULT.getTermsIndex(ar, "numeric");
|
||||||
|
});
|
||||||
|
|
||||||
|
expectThrows(IllegalStateException.class, () -> {
|
||||||
|
FieldCache.DEFAULT.getDocTermOrds(ar, "numeric", null);
|
||||||
|
});
|
||||||
|
|
||||||
|
expectThrows(IllegalStateException.class, () -> {
|
||||||
|
new DocTermOrds(ar, null, "numeric");
|
||||||
|
});
|
||||||
|
|
||||||
|
bits = FieldCache.DEFAULT.getDocsWithField(ar, "numeric", null);
|
||||||
|
assertTrue(bits.get(0));
|
||||||
|
|
||||||
|
// SortedSet type: can be retrieved via getDocTermOrds()
|
||||||
|
expectThrows(IllegalStateException.class, () -> {
|
||||||
|
FieldCache.DEFAULT.getNumerics(ar, "sortedset", FieldCache.INT_POINT_PARSER, false);
|
||||||
|
});
|
||||||
|
|
||||||
|
expectThrows(IllegalStateException.class, () -> {
|
||||||
|
FieldCache.DEFAULT.getTerms(ar, "sortedset", true);
|
||||||
|
});
|
||||||
|
|
||||||
|
expectThrows(IllegalStateException.class, () -> {
|
||||||
|
FieldCache.DEFAULT.getTermsIndex(ar, "sortedset");
|
||||||
|
});
|
||||||
|
|
||||||
|
expectThrows(IllegalStateException.class, () -> {
|
||||||
|
new DocTermOrds(ar, null, "sortedset");
|
||||||
|
});
|
||||||
|
|
||||||
|
sortedSet = FieldCache.DEFAULT.getDocTermOrds(ar, "sortedset", null);
|
||||||
|
sortedSet.setDocument(0);
|
||||||
|
assertEquals(0, sortedSet.nextOrd());
|
||||||
|
assertEquals(1, sortedSet.nextOrd());
|
||||||
|
assertEquals(SortedSetDocValues.NO_MORE_ORDS, sortedSet.nextOrd());
|
||||||
|
assertEquals(2, sortedSet.getValueCount());
|
||||||
|
|
||||||
|
bits = FieldCache.DEFAULT.getDocsWithField(ar, "sortedset", null);
|
||||||
|
assertTrue(bits.get(0));
|
||||||
|
|
||||||
|
ir.close();
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testNonexistantFields() throws Exception {
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
|
||||||
|
Document doc = new Document();
|
||||||
|
iw.addDocument(doc);
|
||||||
|
DirectoryReader ir = iw.getReader();
|
||||||
|
iw.close();
|
||||||
|
|
||||||
|
LeafReader ar = getOnlyLeafReader(ir);
|
||||||
|
|
||||||
|
final FieldCache cache = FieldCache.DEFAULT;
|
||||||
|
cache.purgeAllCaches();
|
||||||
|
assertEquals(0, cache.getCacheEntries().length);
|
||||||
|
|
||||||
|
NumericDocValues ints = cache.getNumerics(ar, "bogusints", FieldCache.INT_POINT_PARSER, true);
|
||||||
|
assertEquals(0, ints.get(0));
|
||||||
|
|
||||||
|
NumericDocValues longs = cache.getNumerics(ar, "boguslongs", FieldCache.LONG_POINT_PARSER, true);
|
||||||
|
assertEquals(0, longs.get(0));
|
||||||
|
|
||||||
|
NumericDocValues floats = cache.getNumerics(ar, "bogusfloats", FieldCache.FLOAT_POINT_PARSER, true);
|
||||||
|
assertEquals(0, floats.get(0));
|
||||||
|
|
||||||
|
NumericDocValues doubles = cache.getNumerics(ar, "bogusdoubles", FieldCache.DOUBLE_POINT_PARSER, true);
|
||||||
|
assertEquals(0, doubles.get(0));
|
||||||
|
|
||||||
|
BinaryDocValues binaries = cache.getTerms(ar, "bogusterms", true);
|
||||||
|
BytesRef scratch = binaries.get(0);
|
||||||
|
assertEquals(0, scratch.length);
|
||||||
|
|
||||||
|
SortedDocValues sorted = cache.getTermsIndex(ar, "bogustermsindex");
|
||||||
|
assertEquals(-1, sorted.getOrd(0));
|
||||||
|
scratch = sorted.get(0);
|
||||||
|
assertEquals(0, scratch.length);
|
||||||
|
|
||||||
|
SortedSetDocValues sortedSet = cache.getDocTermOrds(ar, "bogusmultivalued", null);
|
||||||
|
sortedSet.setDocument(0);
|
||||||
|
assertEquals(SortedSetDocValues.NO_MORE_ORDS, sortedSet.nextOrd());
|
||||||
|
|
||||||
|
Bits bits = cache.getDocsWithField(ar, "bogusbits", null);
|
||||||
|
assertFalse(bits.get(0));
|
||||||
|
|
||||||
|
// check that we cached nothing
|
||||||
|
assertEquals(0, cache.getCacheEntries().length);
|
||||||
|
ir.close();
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testNonIndexedFields() throws Exception {
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
|
||||||
|
Document doc = new Document();
|
||||||
|
doc.add(new StoredField("bogusbytes", "bogus"));
|
||||||
|
doc.add(new StoredField("bogusshorts", "bogus"));
|
||||||
|
doc.add(new StoredField("bogusints", "bogus"));
|
||||||
|
doc.add(new StoredField("boguslongs", "bogus"));
|
||||||
|
doc.add(new StoredField("bogusfloats", "bogus"));
|
||||||
|
doc.add(new StoredField("bogusdoubles", "bogus"));
|
||||||
|
doc.add(new StoredField("bogusterms", "bogus"));
|
||||||
|
doc.add(new StoredField("bogustermsindex", "bogus"));
|
||||||
|
doc.add(new StoredField("bogusmultivalued", "bogus"));
|
||||||
|
doc.add(new StoredField("bogusbits", "bogus"));
|
||||||
|
iw.addDocument(doc);
|
||||||
|
DirectoryReader ir = iw.getReader();
|
||||||
|
iw.close();
|
||||||
|
|
||||||
|
LeafReader ar = getOnlyLeafReader(ir);
|
||||||
|
|
||||||
|
final FieldCache cache = FieldCache.DEFAULT;
|
||||||
|
cache.purgeAllCaches();
|
||||||
|
assertEquals(0, cache.getCacheEntries().length);
|
||||||
|
|
||||||
|
NumericDocValues ints = cache.getNumerics(ar, "bogusints", FieldCache.INT_POINT_PARSER, true);
|
||||||
|
assertEquals(0, ints.get(0));
|
||||||
|
|
||||||
|
NumericDocValues longs = cache.getNumerics(ar, "boguslongs", FieldCache.LONG_POINT_PARSER, true);
|
||||||
|
assertEquals(0, longs.get(0));
|
||||||
|
|
||||||
|
NumericDocValues floats = cache.getNumerics(ar, "bogusfloats", FieldCache.FLOAT_POINT_PARSER, true);
|
||||||
|
assertEquals(0, floats.get(0));
|
||||||
|
|
||||||
|
NumericDocValues doubles = cache.getNumerics(ar, "bogusdoubles", FieldCache.DOUBLE_POINT_PARSER, true);
|
||||||
|
assertEquals(0, doubles.get(0));
|
||||||
|
|
||||||
|
BinaryDocValues binaries = cache.getTerms(ar, "bogusterms", true);
|
||||||
|
BytesRef scratch = binaries.get(0);
|
||||||
|
assertEquals(0, scratch.length);
|
||||||
|
|
||||||
|
SortedDocValues sorted = cache.getTermsIndex(ar, "bogustermsindex");
|
||||||
|
assertEquals(-1, sorted.getOrd(0));
|
||||||
|
scratch = sorted.get(0);
|
||||||
|
assertEquals(0, scratch.length);
|
||||||
|
|
||||||
|
SortedSetDocValues sortedSet = cache.getDocTermOrds(ar, "bogusmultivalued", null);
|
||||||
|
sortedSet.setDocument(0);
|
||||||
|
assertEquals(SortedSetDocValues.NO_MORE_ORDS, sortedSet.nextOrd());
|
||||||
|
|
||||||
|
Bits bits = cache.getDocsWithField(ar, "bogusbits", null);
|
||||||
|
assertFalse(bits.get(0));
|
||||||
|
|
||||||
|
// check that we cached nothing
|
||||||
|
assertEquals(0, cache.getCacheEntries().length);
|
||||||
|
ir.close();
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Make sure that the use of GrowableWriter doesn't prevent from using the full long range
|
||||||
|
public void testLongFieldCache() throws IOException {
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
IndexWriterConfig cfg = newIndexWriterConfig(new MockAnalyzer(random()));
|
||||||
|
cfg.setMergePolicy(newLogMergePolicy());
|
||||||
|
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, cfg);
|
||||||
|
Document doc = new Document();
|
||||||
|
LongPoint field = new LongPoint("f", 0L);
|
||||||
|
StoredField field2 = new StoredField("f", 0L);
|
||||||
|
doc.add(field);
|
||||||
|
doc.add(field2);
|
||||||
|
final long[] values = new long[TestUtil.nextInt(random(), 1, 10)];
|
||||||
|
for (int i = 0; i < values.length; ++i) {
|
||||||
|
final long v;
|
||||||
|
switch (random().nextInt(10)) {
|
||||||
|
case 0:
|
||||||
|
v = Long.MIN_VALUE;
|
||||||
|
break;
|
||||||
|
case 1:
|
||||||
|
v = 0;
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
v = Long.MAX_VALUE;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
v = TestUtil.nextLong(random(), -10, 10);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
values[i] = v;
|
||||||
|
if (v == 0 && random().nextBoolean()) {
|
||||||
|
// missing
|
||||||
|
iw.addDocument(new Document());
|
||||||
|
} else {
|
||||||
|
field.setLongValue(v);
|
||||||
|
field2.setLongValue(v);
|
||||||
|
iw.addDocument(doc);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
iw.forceMerge(1);
|
||||||
|
final DirectoryReader reader = iw.getReader();
|
||||||
|
final NumericDocValues longs = FieldCache.DEFAULT.getNumerics(getOnlyLeafReader(reader), "f", FieldCache.LONG_POINT_PARSER, false);
|
||||||
|
for (int i = 0; i < values.length; ++i) {
|
||||||
|
assertEquals(values[i], longs.get(i));
|
||||||
|
}
|
||||||
|
reader.close();
|
||||||
|
iw.close();
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Make sure that the use of GrowableWriter doesn't prevent from using the full int range
|
||||||
|
public void testIntFieldCache() throws IOException {
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
IndexWriterConfig cfg = newIndexWriterConfig(new MockAnalyzer(random()));
|
||||||
|
cfg.setMergePolicy(newLogMergePolicy());
|
||||||
|
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, cfg);
|
||||||
|
Document doc = new Document();
|
||||||
|
IntPoint field = new IntPoint("f", 0);
|
||||||
|
doc.add(field);
|
||||||
|
final int[] values = new int[TestUtil.nextInt(random(), 1, 10)];
|
||||||
|
for (int i = 0; i < values.length; ++i) {
|
||||||
|
final int v;
|
||||||
|
switch (random().nextInt(10)) {
|
||||||
|
case 0:
|
||||||
|
v = Integer.MIN_VALUE;
|
||||||
|
break;
|
||||||
|
case 1:
|
||||||
|
v = 0;
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
v = Integer.MAX_VALUE;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
v = TestUtil.nextInt(random(), -10, 10);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
values[i] = v;
|
||||||
|
if (v == 0 && random().nextBoolean()) {
|
||||||
|
// missing
|
||||||
|
iw.addDocument(new Document());
|
||||||
|
} else {
|
||||||
|
field.setIntValue(v);
|
||||||
|
iw.addDocument(doc);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
iw.forceMerge(1);
|
||||||
|
final DirectoryReader reader = iw.getReader();
|
||||||
|
final NumericDocValues ints = FieldCache.DEFAULT.getNumerics(getOnlyLeafReader(reader), "f", FieldCache.INT_POINT_PARSER, false);
|
||||||
|
for (int i = 0; i < values.length; ++i) {
|
||||||
|
assertEquals(values[i], ints.get(i));
|
||||||
|
}
|
||||||
|
reader.close();
|
||||||
|
iw.close();
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,70 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.solr.uninverting;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.MockAnalyzer;
|
||||||
|
import org.apache.lucene.document.Document;
|
||||||
|
import org.apache.lucene.document.IntPoint;
|
||||||
|
import org.apache.lucene.index.LeafReader;
|
||||||
|
import org.apache.lucene.index.DirectoryReader;
|
||||||
|
import org.apache.lucene.index.IndexWriter;
|
||||||
|
import org.apache.lucene.index.NumericDocValues;
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
|
||||||
|
public class TestFieldCacheReopen extends LuceneTestCase {
|
||||||
|
|
||||||
|
// TODO: make a version of this that tests the same thing with UninvertingReader.wrap()
|
||||||
|
|
||||||
|
// LUCENE-1579: Ensure that on a reopened reader, that any
|
||||||
|
// shared segments reuse the doc values arrays in
|
||||||
|
// FieldCache
|
||||||
|
public void testFieldCacheReuseAfterReopen() throws Exception {
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
IndexWriter writer = new IndexWriter(
|
||||||
|
dir,
|
||||||
|
newIndexWriterConfig(new MockAnalyzer(random())).
|
||||||
|
setMergePolicy(newLogMergePolicy(10))
|
||||||
|
);
|
||||||
|
Document doc = new Document();
|
||||||
|
doc.add(new IntPoint("number", 17));
|
||||||
|
writer.addDocument(doc);
|
||||||
|
writer.commit();
|
||||||
|
|
||||||
|
// Open reader1
|
||||||
|
DirectoryReader r = DirectoryReader.open(dir);
|
||||||
|
LeafReader r1 = getOnlyLeafReader(r);
|
||||||
|
final NumericDocValues ints = FieldCache.DEFAULT.getNumerics(r1, "number", FieldCache.INT_POINT_PARSER, false);
|
||||||
|
assertEquals(17, ints.get(0));
|
||||||
|
|
||||||
|
// Add new segment
|
||||||
|
writer.addDocument(doc);
|
||||||
|
writer.commit();
|
||||||
|
|
||||||
|
// Reopen reader1 --> reader2
|
||||||
|
DirectoryReader r2 = DirectoryReader.openIfChanged(r);
|
||||||
|
assertNotNull(r2);
|
||||||
|
r.close();
|
||||||
|
LeafReader sub0 = r2.leaves().get(0).reader();
|
||||||
|
final NumericDocValues ints2 = FieldCache.DEFAULT.getNumerics(sub0, "number", FieldCache.INT_POINT_PARSER, false);
|
||||||
|
r2.close();
|
||||||
|
assertTrue(ints == ints2);
|
||||||
|
|
||||||
|
writer.close();
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,164 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.solr.uninverting;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.MockAnalyzer;
|
||||||
|
import org.apache.lucene.document.Document;
|
||||||
|
import org.apache.lucene.document.Field;
|
||||||
|
import org.apache.lucene.document.LegacyDoubleField;
|
||||||
|
import org.apache.lucene.document.LegacyFloatField;
|
||||||
|
import org.apache.lucene.document.LegacyIntField;
|
||||||
|
import org.apache.lucene.document.LegacyLongField;
|
||||||
|
import org.apache.lucene.index.DirectoryReader;
|
||||||
|
import org.apache.lucene.index.IndexWriter;
|
||||||
|
import org.apache.lucene.index.LeafReader;
|
||||||
|
import org.apache.lucene.index.MultiReader;
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
import org.apache.solr.index.SlowCompositeReaderWrapper;
|
||||||
|
import org.apache.solr.uninverting.FieldCacheSanityChecker.Insanity;
|
||||||
|
import org.apache.solr.uninverting.FieldCacheSanityChecker.InsanityType;
|
||||||
|
|
||||||
|
public class TestFieldCacheSanityChecker extends LuceneTestCase {
|
||||||
|
|
||||||
|
protected LeafReader readerA;
|
||||||
|
protected LeafReader readerB;
|
||||||
|
protected LeafReader readerX;
|
||||||
|
protected LeafReader readerAclone;
|
||||||
|
protected Directory dirA, dirB;
|
||||||
|
private static final int NUM_DOCS = 1000;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setUp() throws Exception {
|
||||||
|
super.setUp();
|
||||||
|
dirA = newDirectory();
|
||||||
|
dirB = newDirectory();
|
||||||
|
|
||||||
|
IndexWriter wA = new IndexWriter(dirA, newIndexWriterConfig(new MockAnalyzer(random())));
|
||||||
|
IndexWriter wB = new IndexWriter(dirB, newIndexWriterConfig(new MockAnalyzer(random())));
|
||||||
|
|
||||||
|
long theLong = Long.MAX_VALUE;
|
||||||
|
double theDouble = Double.MAX_VALUE;
|
||||||
|
int theInt = Integer.MAX_VALUE;
|
||||||
|
float theFloat = Float.MAX_VALUE;
|
||||||
|
for (int i = 0; i < NUM_DOCS; i++){
|
||||||
|
Document doc = new Document();
|
||||||
|
doc.add(new LegacyLongField("theLong", theLong--, Field.Store.NO));
|
||||||
|
doc.add(new LegacyDoubleField("theDouble", theDouble--, Field.Store.NO));
|
||||||
|
doc.add(new LegacyIntField("theInt", theInt--, Field.Store.NO));
|
||||||
|
doc.add(new LegacyFloatField("theFloat", theFloat--, Field.Store.NO));
|
||||||
|
if (0 == i % 3) {
|
||||||
|
wA.addDocument(doc);
|
||||||
|
} else {
|
||||||
|
wB.addDocument(doc);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
wA.close();
|
||||||
|
wB.close();
|
||||||
|
DirectoryReader rA = DirectoryReader.open(dirA);
|
||||||
|
readerA = SlowCompositeReaderWrapper.wrap(rA);
|
||||||
|
readerAclone = SlowCompositeReaderWrapper.wrap(rA);
|
||||||
|
readerA = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(dirA));
|
||||||
|
readerB = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(dirB));
|
||||||
|
readerX = SlowCompositeReaderWrapper.wrap(new MultiReader(readerA, readerB));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void tearDown() throws Exception {
|
||||||
|
readerA.close();
|
||||||
|
readerAclone.close();
|
||||||
|
readerB.close();
|
||||||
|
readerX.close();
|
||||||
|
dirA.close();
|
||||||
|
dirB.close();
|
||||||
|
super.tearDown();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testSanity() throws IOException {
|
||||||
|
FieldCache cache = FieldCache.DEFAULT;
|
||||||
|
cache.purgeAllCaches();
|
||||||
|
|
||||||
|
cache.getNumerics(readerA, "theDouble", FieldCache.LEGACY_DOUBLE_PARSER, false);
|
||||||
|
cache.getNumerics(readerAclone, "theDouble", FieldCache.LEGACY_DOUBLE_PARSER, false);
|
||||||
|
cache.getNumerics(readerB, "theDouble", FieldCache.LEGACY_DOUBLE_PARSER, false);
|
||||||
|
|
||||||
|
cache.getNumerics(readerX, "theInt", FieldCache.LEGACY_INT_PARSER, false);
|
||||||
|
|
||||||
|
// // //
|
||||||
|
|
||||||
|
Insanity[] insanity =
|
||||||
|
FieldCacheSanityChecker.checkSanity(cache.getCacheEntries());
|
||||||
|
|
||||||
|
if (0 < insanity.length)
|
||||||
|
dumpArray(getTestClass().getName() + "#" + getTestName()
|
||||||
|
+ " INSANITY", insanity, System.err);
|
||||||
|
|
||||||
|
assertEquals("shouldn't be any cache insanity", 0, insanity.length);
|
||||||
|
cache.purgeAllCaches();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testInsanity1() throws IOException {
|
||||||
|
FieldCache cache = FieldCache.DEFAULT;
|
||||||
|
cache.purgeAllCaches();
|
||||||
|
|
||||||
|
cache.getNumerics(readerX, "theInt", FieldCache.LEGACY_INT_PARSER, false);
|
||||||
|
cache.getTerms(readerX, "theInt", false);
|
||||||
|
|
||||||
|
// // //
|
||||||
|
|
||||||
|
Insanity[] insanity =
|
||||||
|
FieldCacheSanityChecker.checkSanity(cache.getCacheEntries());
|
||||||
|
|
||||||
|
assertEquals("wrong number of cache errors", 1, insanity.length);
|
||||||
|
assertEquals("wrong type of cache error",
|
||||||
|
InsanityType.VALUEMISMATCH,
|
||||||
|
insanity[0].getType());
|
||||||
|
assertEquals("wrong number of entries in cache error", 2,
|
||||||
|
insanity[0].getCacheEntries().length);
|
||||||
|
|
||||||
|
// we expect bad things, don't let tearDown complain about them
|
||||||
|
cache.purgeAllCaches();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testInsanity2() throws IOException {
|
||||||
|
FieldCache cache = FieldCache.DEFAULT;
|
||||||
|
cache.purgeAllCaches();
|
||||||
|
|
||||||
|
cache.getTerms(readerA, "theInt", false);
|
||||||
|
cache.getTerms(readerB, "theInt", false);
|
||||||
|
cache.getTerms(readerX, "theInt", false);
|
||||||
|
|
||||||
|
|
||||||
|
// // //
|
||||||
|
|
||||||
|
Insanity[] insanity =
|
||||||
|
FieldCacheSanityChecker.checkSanity(cache.getCacheEntries());
|
||||||
|
|
||||||
|
assertEquals("wrong number of cache errors", 1, insanity.length);
|
||||||
|
assertEquals("wrong type of cache error",
|
||||||
|
InsanityType.SUBREADER,
|
||||||
|
insanity[0].getType());
|
||||||
|
assertEquals("wrong number of entries in cache error", 3,
|
||||||
|
insanity[0].getCacheEntries().length);
|
||||||
|
|
||||||
|
// we expect bad things, don't let tearDown complain about them
|
||||||
|
cache.purgeAllCaches();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,318 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.solr.uninverting;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.Comparator;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Objects;
|
||||||
|
import java.util.Random;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
import org.apache.lucene.document.Document;
|
||||||
|
import org.apache.lucene.document.Field;
|
||||||
|
import org.apache.lucene.document.IntPoint;
|
||||||
|
import org.apache.lucene.document.StoredField;
|
||||||
|
import org.apache.lucene.document.StringField;
|
||||||
|
import org.apache.lucene.index.DocValues;
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
|
import org.apache.lucene.index.NumericDocValues;
|
||||||
|
import org.apache.lucene.index.RandomIndexWriter;
|
||||||
|
import org.apache.lucene.search.ConstantScoreQuery;
|
||||||
|
import org.apache.lucene.search.ConstantScoreScorer;
|
||||||
|
import org.apache.lucene.search.ConstantScoreWeight;
|
||||||
|
import org.apache.lucene.search.FieldDoc;
|
||||||
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
|
import org.apache.lucene.search.Query;
|
||||||
|
import org.apache.lucene.search.Scorer;
|
||||||
|
import org.apache.lucene.search.Sort;
|
||||||
|
import org.apache.lucene.search.SortField;
|
||||||
|
import org.apache.lucene.search.TopFieldDocs;
|
||||||
|
import org.apache.lucene.search.Weight;
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.util.BitSetIterator;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.FixedBitSet;
|
||||||
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
import org.apache.lucene.util.TestUtil;
|
||||||
|
import org.apache.solr.uninverting.UninvertingReader.Type;
|
||||||
|
|
||||||
|
/** random sorting tests with uninversion */
|
||||||
|
public class TestFieldCacheSortRandom extends LuceneTestCase {
|
||||||
|
|
||||||
|
public void testRandomStringSort() throws Exception {
|
||||||
|
testRandomStringSort(SortField.Type.STRING);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testRandomStringValSort() throws Exception {
|
||||||
|
testRandomStringSort(SortField.Type.STRING_VAL);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void testRandomStringSort(SortField.Type type) throws Exception {
|
||||||
|
Random random = new Random(random().nextLong());
|
||||||
|
|
||||||
|
final int NUM_DOCS = atLeast(100);
|
||||||
|
final Directory dir = newDirectory();
|
||||||
|
final RandomIndexWriter writer = new RandomIndexWriter(random, dir);
|
||||||
|
final boolean allowDups = random.nextBoolean();
|
||||||
|
final Set<String> seen = new HashSet<>();
|
||||||
|
final int maxLength = TestUtil.nextInt(random, 5, 100);
|
||||||
|
if (VERBOSE) {
|
||||||
|
System.out.println("TEST: NUM_DOCS=" + NUM_DOCS + " maxLength=" + maxLength + " allowDups=" + allowDups);
|
||||||
|
}
|
||||||
|
|
||||||
|
int numDocs = 0;
|
||||||
|
final List<BytesRef> docValues = new ArrayList<>();
|
||||||
|
// TODO: deletions
|
||||||
|
while (numDocs < NUM_DOCS) {
|
||||||
|
final Document doc = new Document();
|
||||||
|
|
||||||
|
// 10% of the time, the document is missing the value:
|
||||||
|
final BytesRef br;
|
||||||
|
if (random().nextInt(10) != 7) {
|
||||||
|
final String s;
|
||||||
|
if (random.nextBoolean()) {
|
||||||
|
s = TestUtil.randomSimpleString(random, maxLength);
|
||||||
|
} else {
|
||||||
|
s = TestUtil.randomUnicodeString(random, maxLength);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!allowDups) {
|
||||||
|
if (seen.contains(s)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
seen.add(s);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (VERBOSE) {
|
||||||
|
System.out.println(" " + numDocs + ": s=" + s);
|
||||||
|
}
|
||||||
|
|
||||||
|
doc.add(new StringField("stringdv", s, Field.Store.NO));
|
||||||
|
docValues.add(new BytesRef(s));
|
||||||
|
|
||||||
|
} else {
|
||||||
|
br = null;
|
||||||
|
if (VERBOSE) {
|
||||||
|
System.out.println(" " + numDocs + ": <missing>");
|
||||||
|
}
|
||||||
|
docValues.add(null);
|
||||||
|
}
|
||||||
|
|
||||||
|
doc.add(new IntPoint("id", numDocs));
|
||||||
|
doc.add(new StoredField("id", numDocs));
|
||||||
|
writer.addDocument(doc);
|
||||||
|
numDocs++;
|
||||||
|
|
||||||
|
if (random.nextInt(40) == 17) {
|
||||||
|
// force flush
|
||||||
|
writer.getReader().close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Map<String,UninvertingReader.Type> mapping = new HashMap<>();
|
||||||
|
mapping.put("stringdv", Type.SORTED);
|
||||||
|
mapping.put("id", Type.INTEGER_POINT);
|
||||||
|
final IndexReader r = UninvertingReader.wrap(writer.getReader(), mapping);
|
||||||
|
writer.close();
|
||||||
|
if (VERBOSE) {
|
||||||
|
System.out.println(" reader=" + r);
|
||||||
|
}
|
||||||
|
|
||||||
|
final IndexSearcher s = newSearcher(r, false);
|
||||||
|
final int ITERS = atLeast(100);
|
||||||
|
for(int iter=0;iter<ITERS;iter++) {
|
||||||
|
final boolean reverse = random.nextBoolean();
|
||||||
|
|
||||||
|
final TopFieldDocs hits;
|
||||||
|
final SortField sf;
|
||||||
|
final boolean sortMissingLast;
|
||||||
|
final boolean missingIsNull;
|
||||||
|
sf = new SortField("stringdv", type, reverse);
|
||||||
|
sortMissingLast = random().nextBoolean();
|
||||||
|
missingIsNull = true;
|
||||||
|
|
||||||
|
if (sortMissingLast) {
|
||||||
|
sf.setMissingValue(SortField.STRING_LAST);
|
||||||
|
}
|
||||||
|
|
||||||
|
final Sort sort;
|
||||||
|
if (random.nextBoolean()) {
|
||||||
|
sort = new Sort(sf);
|
||||||
|
} else {
|
||||||
|
sort = new Sort(sf, SortField.FIELD_DOC);
|
||||||
|
}
|
||||||
|
final int hitCount = TestUtil.nextInt(random, 1, r.maxDoc() + 20);
|
||||||
|
final RandomQuery f = new RandomQuery(random.nextLong(), random.nextFloat(), docValues);
|
||||||
|
int queryType = random.nextInt(2);
|
||||||
|
if (queryType == 0) {
|
||||||
|
hits = s.search(new ConstantScoreQuery(f),
|
||||||
|
hitCount, sort, random.nextBoolean(), random.nextBoolean());
|
||||||
|
} else {
|
||||||
|
hits = s.search(f, hitCount, sort, random.nextBoolean(), random.nextBoolean());
|
||||||
|
}
|
||||||
|
|
||||||
|
if (VERBOSE) {
|
||||||
|
System.out.println("\nTEST: iter=" + iter + " " + hits.totalHits + " hits; topN=" + hitCount + "; reverse=" + reverse + "; sortMissingLast=" + sortMissingLast + " sort=" + sort);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compute expected results:
|
||||||
|
Collections.sort(f.matchValues, new Comparator<BytesRef>() {
|
||||||
|
@Override
|
||||||
|
public int compare(BytesRef a, BytesRef b) {
|
||||||
|
if (a == null) {
|
||||||
|
if (b == null) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
if (sortMissingLast) {
|
||||||
|
return 1;
|
||||||
|
} else {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
} else if (b == null) {
|
||||||
|
if (sortMissingLast) {
|
||||||
|
return -1;
|
||||||
|
} else {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
return a.compareTo(b);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
if (reverse) {
|
||||||
|
Collections.reverse(f.matchValues);
|
||||||
|
}
|
||||||
|
final List<BytesRef> expected = f.matchValues;
|
||||||
|
if (VERBOSE) {
|
||||||
|
System.out.println(" expected:");
|
||||||
|
for(int idx=0;idx<expected.size();idx++) {
|
||||||
|
BytesRef br = expected.get(idx);
|
||||||
|
if (br == null && missingIsNull == false) {
|
||||||
|
br = new BytesRef();
|
||||||
|
}
|
||||||
|
System.out.println(" " + idx + ": " + (br == null ? "<missing>" : br.utf8ToString()));
|
||||||
|
if (idx == hitCount-1) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (VERBOSE) {
|
||||||
|
System.out.println(" actual:");
|
||||||
|
for(int hitIDX=0;hitIDX<hits.scoreDocs.length;hitIDX++) {
|
||||||
|
final FieldDoc fd = (FieldDoc) hits.scoreDocs[hitIDX];
|
||||||
|
BytesRef br = (BytesRef) fd.fields[0];
|
||||||
|
|
||||||
|
System.out.println(" " + hitIDX + ": " + (br == null ? "<missing>" : br.utf8ToString()) + " id=" + s.doc(fd.doc).get("id"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for(int hitIDX=0;hitIDX<hits.scoreDocs.length;hitIDX++) {
|
||||||
|
final FieldDoc fd = (FieldDoc) hits.scoreDocs[hitIDX];
|
||||||
|
BytesRef br = expected.get(hitIDX);
|
||||||
|
if (br == null && missingIsNull == false) {
|
||||||
|
br = new BytesRef();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Normally, the old codecs (that don't support
|
||||||
|
// docsWithField via doc values) will always return
|
||||||
|
// an empty BytesRef for the missing case; however,
|
||||||
|
// if all docs in a given segment were missing, in
|
||||||
|
// that case it will return null! So we must map
|
||||||
|
// null here, too:
|
||||||
|
BytesRef br2 = (BytesRef) fd.fields[0];
|
||||||
|
if (br2 == null && missingIsNull == false) {
|
||||||
|
br2 = new BytesRef();
|
||||||
|
}
|
||||||
|
|
||||||
|
assertEquals(br, br2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
r.close();
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
private static class RandomQuery extends Query {
|
||||||
|
private final long seed;
|
||||||
|
private float density;
|
||||||
|
private final List<BytesRef> docValues;
|
||||||
|
public final List<BytesRef> matchValues = Collections.synchronizedList(new ArrayList<BytesRef>());
|
||||||
|
|
||||||
|
// density should be 0.0 ... 1.0
|
||||||
|
public RandomQuery(long seed, float density, List<BytesRef> docValues) {
|
||||||
|
this.seed = seed;
|
||||||
|
this.density = density;
|
||||||
|
this.docValues = docValues;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
|
||||||
|
return new ConstantScoreWeight(this) {
|
||||||
|
@Override
|
||||||
|
public Scorer scorer(LeafReaderContext context) throws IOException {
|
||||||
|
Random random = new Random(seed ^ context.docBase);
|
||||||
|
final int maxDoc = context.reader().maxDoc();
|
||||||
|
final NumericDocValues idSource = DocValues.getNumeric(context.reader(), "id");
|
||||||
|
assertNotNull(idSource);
|
||||||
|
final FixedBitSet bits = new FixedBitSet(maxDoc);
|
||||||
|
for(int docID=0;docID<maxDoc;docID++) {
|
||||||
|
if (random.nextFloat() <= density) {
|
||||||
|
bits.set(docID);
|
||||||
|
//System.out.println(" acc id=" + idSource.getInt(docID) + " docID=" + docID);
|
||||||
|
matchValues.add(docValues.get((int) idSource.get(docID)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return new ConstantScoreScorer(this, score(), new BitSetIterator(bits, bits.approximateCardinality()));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString(String field) {
|
||||||
|
return "RandomFilter(density=" + density + ")";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean equals(Object other) {
|
||||||
|
return sameClassAs(other) &&
|
||||||
|
equalsTo(getClass().cast(other));
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean equalsTo(RandomQuery other) {
|
||||||
|
return seed == other.seed &&
|
||||||
|
docValues == other.docValues &&
|
||||||
|
density == other.density;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
int h = classHash();
|
||||||
|
h = 31 * h + Objects.hash(seed, density);
|
||||||
|
h = 31 * h + System.identityHashCode(docValues);
|
||||||
|
return h;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,592 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.solr.uninverting;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
|
import org.apache.lucene.analysis.MockAnalyzer;
|
||||||
|
import org.apache.lucene.document.BinaryDocValuesField;
|
||||||
|
import org.apache.lucene.document.Document;
|
||||||
|
import org.apache.lucene.document.Field;
|
||||||
|
import org.apache.lucene.document.NumericDocValuesField;
|
||||||
|
import org.apache.lucene.document.SortedDocValuesField;
|
||||||
|
import org.apache.lucene.document.SortedSetDocValuesField;
|
||||||
|
import org.apache.lucene.document.StringField;
|
||||||
|
import org.apache.lucene.index.BinaryDocValues;
|
||||||
|
import org.apache.lucene.index.DirectoryReader;
|
||||||
|
import org.apache.lucene.index.DocValues;
|
||||||
|
import org.apache.lucene.index.IndexWriter;
|
||||||
|
import org.apache.lucene.index.IndexWriterConfig;
|
||||||
|
import org.apache.lucene.index.LeafReader;
|
||||||
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
|
import org.apache.lucene.index.RandomIndexWriter;
|
||||||
|
import org.apache.lucene.index.SortedDocValues;
|
||||||
|
import org.apache.lucene.index.SortedSetDocValues;
|
||||||
|
import org.apache.lucene.index.Term;
|
||||||
|
import org.apache.lucene.index.TermsEnum.SeekStatus;
|
||||||
|
import org.apache.lucene.index.TermsEnum;
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.util.Bits;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.Constants;
|
||||||
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
import org.apache.lucene.util.TestUtil;
|
||||||
|
import org.apache.solr.index.SlowCompositeReaderWrapper;
|
||||||
|
|
||||||
|
import static org.apache.lucene.index.SortedSetDocValues.NO_MORE_ORDS;
|
||||||
|
|
||||||
|
public class TestFieldCacheVsDocValues extends LuceneTestCase {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setUp() throws Exception {
|
||||||
|
super.setUp();
|
||||||
|
assumeFalse("test unsupported on J9 temporarily, see https://issues.apache.org/jira/browse/LUCENE-6522",
|
||||||
|
Constants.JAVA_VENDOR.startsWith("IBM"));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testByteMissingVsFieldCache() throws Exception {
|
||||||
|
int numIterations = atLeast(1);
|
||||||
|
for (int i = 0; i < numIterations; i++) {
|
||||||
|
doTestMissingVsFieldCache(Byte.MIN_VALUE, Byte.MAX_VALUE);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testShortMissingVsFieldCache() throws Exception {
|
||||||
|
int numIterations = atLeast(1);
|
||||||
|
for (int i = 0; i < numIterations; i++) {
|
||||||
|
doTestMissingVsFieldCache(Short.MIN_VALUE, Short.MAX_VALUE);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testIntMissingVsFieldCache() throws Exception {
|
||||||
|
int numIterations = atLeast(1);
|
||||||
|
for (int i = 0; i < numIterations; i++) {
|
||||||
|
doTestMissingVsFieldCache(Integer.MIN_VALUE, Integer.MAX_VALUE);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testLongMissingVsFieldCache() throws Exception {
|
||||||
|
int numIterations = atLeast(1);
|
||||||
|
for (int i = 0; i < numIterations; i++) {
|
||||||
|
doTestMissingVsFieldCache(Long.MIN_VALUE, Long.MAX_VALUE);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testSortedFixedLengthVsFieldCache() throws Exception {
|
||||||
|
int numIterations = atLeast(1);
|
||||||
|
for (int i = 0; i < numIterations; i++) {
|
||||||
|
int fixedLength = TestUtil.nextInt(random(), 1, 10);
|
||||||
|
doTestSortedVsFieldCache(fixedLength, fixedLength);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testSortedVariableLengthVsFieldCache() throws Exception {
|
||||||
|
int numIterations = atLeast(1);
|
||||||
|
for (int i = 0; i < numIterations; i++) {
|
||||||
|
doTestSortedVsFieldCache(1, 10);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testSortedSetFixedLengthVsUninvertedField() throws Exception {
|
||||||
|
int numIterations = atLeast(1);
|
||||||
|
for (int i = 0; i < numIterations; i++) {
|
||||||
|
int fixedLength = TestUtil.nextInt(random(), 1, 10);
|
||||||
|
doTestSortedSetVsUninvertedField(fixedLength, fixedLength);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testSortedSetVariableLengthVsUninvertedField() throws Exception {
|
||||||
|
int numIterations = atLeast(1);
|
||||||
|
for (int i = 0; i < numIterations; i++) {
|
||||||
|
doTestSortedSetVsUninvertedField(1, 10);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// LUCENE-4853
|
||||||
|
public void testHugeBinaryValues() throws Exception {
|
||||||
|
Analyzer analyzer = new MockAnalyzer(random());
|
||||||
|
// FSDirectory because SimpleText will consume gobbs of
|
||||||
|
// space when storing big binary values:
|
||||||
|
Directory d = newFSDirectory(createTempDir("hugeBinaryValues"));
|
||||||
|
boolean doFixed = random().nextBoolean();
|
||||||
|
int numDocs;
|
||||||
|
int fixedLength = 0;
|
||||||
|
if (doFixed) {
|
||||||
|
// Sometimes make all values fixed length since some
|
||||||
|
// codecs have different code paths for this:
|
||||||
|
numDocs = TestUtil.nextInt(random(), 10, 20);
|
||||||
|
fixedLength = TestUtil.nextInt(random(), 65537, 256 * 1024);
|
||||||
|
} else {
|
||||||
|
numDocs = TestUtil.nextInt(random(), 100, 200);
|
||||||
|
}
|
||||||
|
IndexWriter w = new IndexWriter(d, newIndexWriterConfig(analyzer));
|
||||||
|
List<byte[]> docBytes = new ArrayList<>();
|
||||||
|
long totalBytes = 0;
|
||||||
|
for(int docID=0;docID<numDocs;docID++) {
|
||||||
|
// we don't use RandomIndexWriter because it might add
|
||||||
|
// more docvalues than we expect !!!!
|
||||||
|
|
||||||
|
// Must be > 64KB in size to ensure more than 2 pages in
|
||||||
|
// PagedBytes would be needed:
|
||||||
|
int numBytes;
|
||||||
|
if (doFixed) {
|
||||||
|
numBytes = fixedLength;
|
||||||
|
} else if (docID == 0 || random().nextInt(5) == 3) {
|
||||||
|
numBytes = TestUtil.nextInt(random(), 65537, 3 * 1024 * 1024);
|
||||||
|
} else {
|
||||||
|
numBytes = TestUtil.nextInt(random(), 1, 1024 * 1024);
|
||||||
|
}
|
||||||
|
totalBytes += numBytes;
|
||||||
|
if (totalBytes > 5 * 1024*1024) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
byte[] bytes = new byte[numBytes];
|
||||||
|
random().nextBytes(bytes);
|
||||||
|
docBytes.add(bytes);
|
||||||
|
Document doc = new Document();
|
||||||
|
BytesRef b = new BytesRef(bytes);
|
||||||
|
b.length = bytes.length;
|
||||||
|
doc.add(new BinaryDocValuesField("field", b));
|
||||||
|
doc.add(new StringField("id", ""+docID, Field.Store.YES));
|
||||||
|
try {
|
||||||
|
w.addDocument(doc);
|
||||||
|
} catch (IllegalArgumentException iae) {
|
||||||
|
if (iae.getMessage().indexOf("is too large") == -1) {
|
||||||
|
throw iae;
|
||||||
|
} else {
|
||||||
|
// OK: some codecs can't handle binary DV > 32K
|
||||||
|
assertFalse(codecAcceptsHugeBinaryValues("field"));
|
||||||
|
w.rollback();
|
||||||
|
d.close();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
DirectoryReader r;
|
||||||
|
try {
|
||||||
|
r = DirectoryReader.open(w);
|
||||||
|
} catch (IllegalArgumentException iae) {
|
||||||
|
if (iae.getMessage().indexOf("is too large") == -1) {
|
||||||
|
throw iae;
|
||||||
|
} else {
|
||||||
|
assertFalse(codecAcceptsHugeBinaryValues("field"));
|
||||||
|
|
||||||
|
// OK: some codecs can't handle binary DV > 32K
|
||||||
|
w.rollback();
|
||||||
|
d.close();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
w.close();
|
||||||
|
|
||||||
|
LeafReader ar = SlowCompositeReaderWrapper.wrap(r);
|
||||||
|
TestUtil.checkReader(ar);
|
||||||
|
|
||||||
|
BinaryDocValues s = FieldCache.DEFAULT.getTerms(ar, "field", false);
|
||||||
|
for(int docID=0;docID<docBytes.size();docID++) {
|
||||||
|
Document doc = ar.document(docID);
|
||||||
|
BytesRef bytes = s.get(docID);
|
||||||
|
byte[] expected = docBytes.get(Integer.parseInt(doc.get("id")));
|
||||||
|
assertEquals(expected.length, bytes.length);
|
||||||
|
assertEquals(new BytesRef(expected), bytes);
|
||||||
|
}
|
||||||
|
|
||||||
|
assertTrue(codecAcceptsHugeBinaryValues("field"));
|
||||||
|
|
||||||
|
ar.close();
|
||||||
|
d.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
private static final int LARGE_BINARY_FIELD_LENGTH = (1 << 15) - 2;
|
||||||
|
|
||||||
|
// TODO: get this out of here and into the deprecated codecs (4.0, 4.2)
|
||||||
|
public void testHugeBinaryValueLimit() throws Exception {
|
||||||
|
// We only test DVFormats that have a limit
|
||||||
|
assumeFalse("test requires codec with limits on max binary field length", codecAcceptsHugeBinaryValues("field"));
|
||||||
|
Analyzer analyzer = new MockAnalyzer(random());
|
||||||
|
// FSDirectory because SimpleText will consume gobbs of
|
||||||
|
// space when storing big binary values:
|
||||||
|
Directory d = newFSDirectory(createTempDir("hugeBinaryValues"));
|
||||||
|
boolean doFixed = random().nextBoolean();
|
||||||
|
int numDocs;
|
||||||
|
int fixedLength = 0;
|
||||||
|
if (doFixed) {
|
||||||
|
// Sometimes make all values fixed length since some
|
||||||
|
// codecs have different code paths for this:
|
||||||
|
numDocs = TestUtil.nextInt(random(), 10, 20);
|
||||||
|
fixedLength = LARGE_BINARY_FIELD_LENGTH;
|
||||||
|
} else {
|
||||||
|
numDocs = TestUtil.nextInt(random(), 100, 200);
|
||||||
|
}
|
||||||
|
IndexWriter w = new IndexWriter(d, newIndexWriterConfig(analyzer));
|
||||||
|
List<byte[]> docBytes = new ArrayList<>();
|
||||||
|
long totalBytes = 0;
|
||||||
|
for(int docID=0;docID<numDocs;docID++) {
|
||||||
|
// we don't use RandomIndexWriter because it might add
|
||||||
|
// more docvalues than we expect !!!!
|
||||||
|
|
||||||
|
// Must be > 64KB in size to ensure more than 2 pages in
|
||||||
|
// PagedBytes would be needed:
|
||||||
|
int numBytes;
|
||||||
|
if (doFixed) {
|
||||||
|
numBytes = fixedLength;
|
||||||
|
} else if (docID == 0 || random().nextInt(5) == 3) {
|
||||||
|
numBytes = LARGE_BINARY_FIELD_LENGTH;
|
||||||
|
} else {
|
||||||
|
numBytes = TestUtil.nextInt(random(), 1, LARGE_BINARY_FIELD_LENGTH);
|
||||||
|
}
|
||||||
|
totalBytes += numBytes;
|
||||||
|
if (totalBytes > 5 * 1024*1024) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
byte[] bytes = new byte[numBytes];
|
||||||
|
random().nextBytes(bytes);
|
||||||
|
docBytes.add(bytes);
|
||||||
|
Document doc = new Document();
|
||||||
|
BytesRef b = new BytesRef(bytes);
|
||||||
|
b.length = bytes.length;
|
||||||
|
doc.add(new BinaryDocValuesField("field", b));
|
||||||
|
doc.add(new StringField("id", ""+docID, Field.Store.YES));
|
||||||
|
w.addDocument(doc);
|
||||||
|
}
|
||||||
|
|
||||||
|
DirectoryReader r = DirectoryReader.open(w);
|
||||||
|
w.close();
|
||||||
|
|
||||||
|
LeafReader ar = SlowCompositeReaderWrapper.wrap(r);
|
||||||
|
TestUtil.checkReader(ar
|
||||||
|
);
|
||||||
|
|
||||||
|
BinaryDocValues s = FieldCache.DEFAULT.getTerms(ar, "field", false);
|
||||||
|
for(int docID=0;docID<docBytes.size();docID++) {
|
||||||
|
Document doc = ar.document(docID);
|
||||||
|
BytesRef bytes = s.get(docID);
|
||||||
|
byte[] expected = docBytes.get(Integer.parseInt(doc.get("id")));
|
||||||
|
assertEquals(expected.length, bytes.length);
|
||||||
|
assertEquals(new BytesRef(expected), bytes);
|
||||||
|
}
|
||||||
|
|
||||||
|
ar.close();
|
||||||
|
d.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void doTestSortedVsFieldCache(int minLength, int maxLength) throws Exception {
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
|
||||||
|
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf);
|
||||||
|
Document doc = new Document();
|
||||||
|
Field idField = new StringField("id", "", Field.Store.NO);
|
||||||
|
Field indexedField = new StringField("indexed", "", Field.Store.NO);
|
||||||
|
Field dvField = new SortedDocValuesField("dv", new BytesRef());
|
||||||
|
doc.add(idField);
|
||||||
|
doc.add(indexedField);
|
||||||
|
doc.add(dvField);
|
||||||
|
|
||||||
|
// index some docs
|
||||||
|
int numDocs = atLeast(300);
|
||||||
|
for (int i = 0; i < numDocs; i++) {
|
||||||
|
idField.setStringValue(Integer.toString(i));
|
||||||
|
final int length;
|
||||||
|
if (minLength == maxLength) {
|
||||||
|
length = minLength; // fixed length
|
||||||
|
} else {
|
||||||
|
length = TestUtil.nextInt(random(), minLength, maxLength);
|
||||||
|
}
|
||||||
|
String value = TestUtil.randomSimpleString(random(), length);
|
||||||
|
indexedField.setStringValue(value);
|
||||||
|
dvField.setBytesValue(new BytesRef(value));
|
||||||
|
writer.addDocument(doc);
|
||||||
|
if (random().nextInt(31) == 0) {
|
||||||
|
writer.commit();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// delete some docs
|
||||||
|
int numDeletions = random().nextInt(numDocs/10);
|
||||||
|
for (int i = 0; i < numDeletions; i++) {
|
||||||
|
int id = random().nextInt(numDocs);
|
||||||
|
writer.deleteDocuments(new Term("id", Integer.toString(id)));
|
||||||
|
}
|
||||||
|
writer.close();
|
||||||
|
|
||||||
|
// compare
|
||||||
|
DirectoryReader ir = DirectoryReader.open(dir);
|
||||||
|
for (LeafReaderContext context : ir.leaves()) {
|
||||||
|
LeafReader r = context.reader();
|
||||||
|
SortedDocValues expected = FieldCache.DEFAULT.getTermsIndex(r, "indexed");
|
||||||
|
SortedDocValues actual = r.getSortedDocValues("dv");
|
||||||
|
assertEquals(r.maxDoc(), expected, actual);
|
||||||
|
}
|
||||||
|
ir.close();
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void doTestSortedSetVsUninvertedField(int minLength, int maxLength) throws Exception {
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
IndexWriterConfig conf = new IndexWriterConfig(new MockAnalyzer(random()));
|
||||||
|
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf);
|
||||||
|
|
||||||
|
// index some docs
|
||||||
|
int numDocs = atLeast(300);
|
||||||
|
for (int i = 0; i < numDocs; i++) {
|
||||||
|
Document doc = new Document();
|
||||||
|
Field idField = new StringField("id", Integer.toString(i), Field.Store.NO);
|
||||||
|
doc.add(idField);
|
||||||
|
final int length = TestUtil.nextInt(random(), minLength, maxLength);
|
||||||
|
int numValues = random().nextInt(17);
|
||||||
|
// create a random list of strings
|
||||||
|
List<String> values = new ArrayList<>();
|
||||||
|
for (int v = 0; v < numValues; v++) {
|
||||||
|
values.add(TestUtil.randomSimpleString(random(), minLength, length));
|
||||||
|
}
|
||||||
|
|
||||||
|
// add in any order to the indexed field
|
||||||
|
ArrayList<String> unordered = new ArrayList<>(values);
|
||||||
|
Collections.shuffle(unordered, random());
|
||||||
|
for (String v : values) {
|
||||||
|
doc.add(newStringField("indexed", v, Field.Store.NO));
|
||||||
|
}
|
||||||
|
|
||||||
|
// add in any order to the dv field
|
||||||
|
ArrayList<String> unordered2 = new ArrayList<>(values);
|
||||||
|
Collections.shuffle(unordered2, random());
|
||||||
|
for (String v : unordered2) {
|
||||||
|
doc.add(new SortedSetDocValuesField("dv", new BytesRef(v)));
|
||||||
|
}
|
||||||
|
|
||||||
|
writer.addDocument(doc);
|
||||||
|
if (random().nextInt(31) == 0) {
|
||||||
|
writer.commit();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// delete some docs
|
||||||
|
int numDeletions = random().nextInt(numDocs/10);
|
||||||
|
for (int i = 0; i < numDeletions; i++) {
|
||||||
|
int id = random().nextInt(numDocs);
|
||||||
|
writer.deleteDocuments(new Term("id", Integer.toString(id)));
|
||||||
|
}
|
||||||
|
|
||||||
|
// compare per-segment
|
||||||
|
DirectoryReader ir = writer.getReader();
|
||||||
|
for (LeafReaderContext context : ir.leaves()) {
|
||||||
|
LeafReader r = context.reader();
|
||||||
|
SortedSetDocValues expected = FieldCache.DEFAULT.getDocTermOrds(r, "indexed", null);
|
||||||
|
SortedSetDocValues actual = r.getSortedSetDocValues("dv");
|
||||||
|
assertEquals(r.maxDoc(), expected, actual);
|
||||||
|
}
|
||||||
|
ir.close();
|
||||||
|
|
||||||
|
writer.forceMerge(1);
|
||||||
|
|
||||||
|
// now compare again after the merge
|
||||||
|
ir = writer.getReader();
|
||||||
|
LeafReader ar = getOnlyLeafReader(ir);
|
||||||
|
SortedSetDocValues expected = FieldCache.DEFAULT.getDocTermOrds(ar, "indexed", null);
|
||||||
|
SortedSetDocValues actual = ar.getSortedSetDocValues("dv");
|
||||||
|
assertEquals(ir.maxDoc(), expected, actual);
|
||||||
|
ir.close();
|
||||||
|
|
||||||
|
writer.close();
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void doTestMissingVsFieldCache(LongProducer longs) throws Exception {
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
|
||||||
|
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf);
|
||||||
|
Field idField = new StringField("id", "", Field.Store.NO);
|
||||||
|
Field indexedField = newStringField("indexed", "", Field.Store.NO);
|
||||||
|
Field dvField = new NumericDocValuesField("dv", 0);
|
||||||
|
|
||||||
|
|
||||||
|
// index some docs
|
||||||
|
int numDocs = atLeast(300);
|
||||||
|
// numDocs should be always > 256 so that in case of a codec that optimizes
|
||||||
|
// for numbers of values <= 256, all storage layouts are tested
|
||||||
|
assert numDocs > 256;
|
||||||
|
for (int i = 0; i < numDocs; i++) {
|
||||||
|
idField.setStringValue(Integer.toString(i));
|
||||||
|
long value = longs.next();
|
||||||
|
indexedField.setStringValue(Long.toString(value));
|
||||||
|
dvField.setLongValue(value);
|
||||||
|
Document doc = new Document();
|
||||||
|
doc.add(idField);
|
||||||
|
// 1/4 of the time we neglect to add the fields
|
||||||
|
if (random().nextInt(4) > 0) {
|
||||||
|
doc.add(indexedField);
|
||||||
|
doc.add(dvField);
|
||||||
|
}
|
||||||
|
writer.addDocument(doc);
|
||||||
|
if (random().nextInt(31) == 0) {
|
||||||
|
writer.commit();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// delete some docs
|
||||||
|
int numDeletions = random().nextInt(numDocs/10);
|
||||||
|
for (int i = 0; i < numDeletions; i++) {
|
||||||
|
int id = random().nextInt(numDocs);
|
||||||
|
writer.deleteDocuments(new Term("id", Integer.toString(id)));
|
||||||
|
}
|
||||||
|
|
||||||
|
// merge some segments and ensure that at least one of them has more than
|
||||||
|
// 256 values
|
||||||
|
writer.forceMerge(numDocs / 256);
|
||||||
|
|
||||||
|
writer.close();
|
||||||
|
|
||||||
|
// compare
|
||||||
|
DirectoryReader ir = DirectoryReader.open(dir);
|
||||||
|
for (LeafReaderContext context : ir.leaves()) {
|
||||||
|
LeafReader r = context.reader();
|
||||||
|
Bits expected = FieldCache.DEFAULT.getDocsWithField(r, "indexed", null);
|
||||||
|
Bits actual = FieldCache.DEFAULT.getDocsWithField(r, "dv", null);
|
||||||
|
assertEquals(expected, actual);
|
||||||
|
}
|
||||||
|
ir.close();
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void doTestMissingVsFieldCache(final long minValue, final long maxValue) throws Exception {
|
||||||
|
doTestMissingVsFieldCache(new LongProducer() {
|
||||||
|
@Override
|
||||||
|
long next() {
|
||||||
|
return TestUtil.nextLong(random(), minValue, maxValue);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
static abstract class LongProducer {
|
||||||
|
abstract long next();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void assertEquals(Bits expected, Bits actual) throws Exception {
|
||||||
|
assertEquals(expected.length(), actual.length());
|
||||||
|
for (int i = 0; i < expected.length(); i++) {
|
||||||
|
assertEquals(expected.get(i), actual.get(i));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void assertEquals(int maxDoc, SortedDocValues expected, SortedDocValues actual) throws Exception {
|
||||||
|
assertEquals(maxDoc, DocValues.singleton(expected), DocValues.singleton(actual));
|
||||||
|
}
|
||||||
|
|
||||||
|
private void assertEquals(int maxDoc, SortedSetDocValues expected, SortedSetDocValues actual) throws Exception {
|
||||||
|
// can be null for the segment if no docs actually had any SortedDocValues
|
||||||
|
// in this case FC.getDocTermsOrds returns EMPTY
|
||||||
|
if (actual == null) {
|
||||||
|
assertEquals(expected.getValueCount(), 0);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
assertEquals(expected.getValueCount(), actual.getValueCount());
|
||||||
|
// compare ord lists
|
||||||
|
for (int i = 0; i < maxDoc; i++) {
|
||||||
|
expected.setDocument(i);
|
||||||
|
actual.setDocument(i);
|
||||||
|
long expectedOrd;
|
||||||
|
while ((expectedOrd = expected.nextOrd()) != NO_MORE_ORDS) {
|
||||||
|
assertEquals(expectedOrd, actual.nextOrd());
|
||||||
|
}
|
||||||
|
assertEquals(NO_MORE_ORDS, actual.nextOrd());
|
||||||
|
}
|
||||||
|
|
||||||
|
// compare ord dictionary
|
||||||
|
for (long i = 0; i < expected.getValueCount(); i++) {
|
||||||
|
final BytesRef expectedBytes = BytesRef.deepCopyOf(expected.lookupOrd(i));
|
||||||
|
final BytesRef actualBytes = actual.lookupOrd(i);
|
||||||
|
assertEquals(expectedBytes, actualBytes);
|
||||||
|
}
|
||||||
|
|
||||||
|
// compare termsenum
|
||||||
|
assertEquals(expected.getValueCount(), expected.termsEnum(), actual.termsEnum());
|
||||||
|
}
|
||||||
|
|
||||||
|
private void assertEquals(long numOrds, TermsEnum expected, TermsEnum actual) throws Exception {
|
||||||
|
BytesRef ref;
|
||||||
|
|
||||||
|
// sequential next() through all terms
|
||||||
|
while ((ref = expected.next()) != null) {
|
||||||
|
assertEquals(ref, actual.next());
|
||||||
|
assertEquals(expected.ord(), actual.ord());
|
||||||
|
assertEquals(expected.term(), actual.term());
|
||||||
|
}
|
||||||
|
assertNull(actual.next());
|
||||||
|
|
||||||
|
// sequential seekExact(ord) through all terms
|
||||||
|
for (long i = 0; i < numOrds; i++) {
|
||||||
|
expected.seekExact(i);
|
||||||
|
actual.seekExact(i);
|
||||||
|
assertEquals(expected.ord(), actual.ord());
|
||||||
|
assertEquals(expected.term(), actual.term());
|
||||||
|
}
|
||||||
|
|
||||||
|
// sequential seekExact(BytesRef) through all terms
|
||||||
|
for (long i = 0; i < numOrds; i++) {
|
||||||
|
expected.seekExact(i);
|
||||||
|
assertTrue(actual.seekExact(expected.term()));
|
||||||
|
assertEquals(expected.ord(), actual.ord());
|
||||||
|
assertEquals(expected.term(), actual.term());
|
||||||
|
}
|
||||||
|
|
||||||
|
// sequential seekCeil(BytesRef) through all terms
|
||||||
|
for (long i = 0; i < numOrds; i++) {
|
||||||
|
expected.seekExact(i);
|
||||||
|
assertEquals(SeekStatus.FOUND, actual.seekCeil(expected.term()));
|
||||||
|
assertEquals(expected.ord(), actual.ord());
|
||||||
|
assertEquals(expected.term(), actual.term());
|
||||||
|
}
|
||||||
|
|
||||||
|
// random seekExact(ord)
|
||||||
|
for (long i = 0; i < numOrds; i++) {
|
||||||
|
long randomOrd = TestUtil.nextLong(random(), 0, numOrds - 1);
|
||||||
|
expected.seekExact(randomOrd);
|
||||||
|
actual.seekExact(randomOrd);
|
||||||
|
assertEquals(expected.ord(), actual.ord());
|
||||||
|
assertEquals(expected.term(), actual.term());
|
||||||
|
}
|
||||||
|
|
||||||
|
// random seekExact(BytesRef)
|
||||||
|
for (long i = 0; i < numOrds; i++) {
|
||||||
|
long randomOrd = TestUtil.nextLong(random(), 0, numOrds - 1);
|
||||||
|
expected.seekExact(randomOrd);
|
||||||
|
actual.seekExact(expected.term());
|
||||||
|
assertEquals(expected.ord(), actual.ord());
|
||||||
|
assertEquals(expected.term(), actual.term());
|
||||||
|
}
|
||||||
|
|
||||||
|
// random seekCeil(BytesRef)
|
||||||
|
for (long i = 0; i < numOrds; i++) {
|
||||||
|
BytesRef target = new BytesRef(TestUtil.randomUnicodeString(random()));
|
||||||
|
SeekStatus expectedStatus = expected.seekCeil(target);
|
||||||
|
assertEquals(expectedStatus, actual.seekCeil(target));
|
||||||
|
if (expectedStatus != SeekStatus.END) {
|
||||||
|
assertEquals(expected.ord(), actual.ord());
|
||||||
|
assertEquals(expected.term(), actual.term());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
protected boolean codecAcceptsHugeBinaryValues(String field) {
|
||||||
|
String name = TestUtil.getDocValuesFormat(field);
|
||||||
|
return !(name.equals("Memory")); // Direct has a different type of limit
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,228 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.solr.uninverting;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Random;
|
||||||
|
import java.util.Set;
|
||||||
|
import java.util.concurrent.CountDownLatch;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.MockAnalyzer;
|
||||||
|
import org.apache.lucene.document.BinaryDocValuesField;
|
||||||
|
import org.apache.lucene.document.Document;
|
||||||
|
import org.apache.lucene.document.NumericDocValuesField;
|
||||||
|
import org.apache.lucene.document.SortedDocValuesField;
|
||||||
|
import org.apache.lucene.index.BinaryDocValues;
|
||||||
|
import org.apache.lucene.index.DirectoryReader;
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.index.IndexWriter;
|
||||||
|
import org.apache.lucene.index.LeafReader;
|
||||||
|
import org.apache.lucene.index.NumericDocValues;
|
||||||
|
import org.apache.lucene.index.RandomIndexWriter;
|
||||||
|
import org.apache.lucene.index.SortedDocValues;
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
import org.apache.lucene.util.TestUtil;
|
||||||
|
|
||||||
|
// TODO: what happened to this test... its not actually uninverting?
|
||||||
|
public class TestFieldCacheWithThreads extends LuceneTestCase {
|
||||||
|
|
||||||
|
public void test() throws Exception {
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy()));
|
||||||
|
|
||||||
|
final List<Long> numbers = new ArrayList<>();
|
||||||
|
final List<BytesRef> binary = new ArrayList<>();
|
||||||
|
final List<BytesRef> sorted = new ArrayList<>();
|
||||||
|
final int numDocs = atLeast(100);
|
||||||
|
for(int i=0;i<numDocs;i++) {
|
||||||
|
Document d = new Document();
|
||||||
|
long number = random().nextLong();
|
||||||
|
d.add(new NumericDocValuesField("number", number));
|
||||||
|
BytesRef bytes = new BytesRef(TestUtil.randomRealisticUnicodeString(random()));
|
||||||
|
d.add(new BinaryDocValuesField("bytes", bytes));
|
||||||
|
binary.add(bytes);
|
||||||
|
bytes = new BytesRef(TestUtil.randomRealisticUnicodeString(random()));
|
||||||
|
d.add(new SortedDocValuesField("sorted", bytes));
|
||||||
|
sorted.add(bytes);
|
||||||
|
w.addDocument(d);
|
||||||
|
numbers.add(number);
|
||||||
|
}
|
||||||
|
|
||||||
|
w.forceMerge(1);
|
||||||
|
final IndexReader r = DirectoryReader.open(w);
|
||||||
|
w.close();
|
||||||
|
|
||||||
|
assertEquals(1, r.leaves().size());
|
||||||
|
final LeafReader ar = r.leaves().get(0).reader();
|
||||||
|
|
||||||
|
int numThreads = TestUtil.nextInt(random(), 2, 5);
|
||||||
|
List<Thread> threads = new ArrayList<>();
|
||||||
|
final CountDownLatch startingGun = new CountDownLatch(1);
|
||||||
|
for(int t=0;t<numThreads;t++) {
|
||||||
|
final Random threadRandom = new Random(random().nextLong());
|
||||||
|
Thread thread = new Thread() {
|
||||||
|
@Override
|
||||||
|
public void run() {
|
||||||
|
try {
|
||||||
|
//NumericDocValues ndv = ar.getNumericDocValues("number");
|
||||||
|
NumericDocValues ndv = FieldCache.DEFAULT.getNumerics(ar, "number", FieldCache.LONG_POINT_PARSER, false);
|
||||||
|
//BinaryDocValues bdv = ar.getBinaryDocValues("bytes");
|
||||||
|
BinaryDocValues bdv = FieldCache.DEFAULT.getTerms(ar, "bytes", false);
|
||||||
|
SortedDocValues sdv = FieldCache.DEFAULT.getTermsIndex(ar, "sorted");
|
||||||
|
startingGun.await();
|
||||||
|
int iters = atLeast(1000);
|
||||||
|
for(int iter=0;iter<iters;iter++) {
|
||||||
|
int docID = threadRandom.nextInt(numDocs);
|
||||||
|
switch(threadRandom.nextInt(4)) {
|
||||||
|
case 0:
|
||||||
|
assertEquals(numbers.get(docID).longValue(), FieldCache.DEFAULT.getNumerics(ar, "number", FieldCache.INT_POINT_PARSER, false).get(docID));
|
||||||
|
break;
|
||||||
|
case 1:
|
||||||
|
assertEquals(numbers.get(docID).longValue(), FieldCache.DEFAULT.getNumerics(ar, "number", FieldCache.LONG_POINT_PARSER, false).get(docID));
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
assertEquals(numbers.get(docID).longValue(), FieldCache.DEFAULT.getNumerics(ar, "number", FieldCache.FLOAT_POINT_PARSER, false).get(docID));
|
||||||
|
break;
|
||||||
|
case 3:
|
||||||
|
assertEquals(numbers.get(docID).longValue(), FieldCache.DEFAULT.getNumerics(ar, "number", FieldCache.DOUBLE_POINT_PARSER, false).get(docID));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
BytesRef term = bdv.get(docID);
|
||||||
|
assertEquals(binary.get(docID), term);
|
||||||
|
term = sdv.get(docID);
|
||||||
|
assertEquals(sorted.get(docID), term);
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
thread.start();
|
||||||
|
threads.add(thread);
|
||||||
|
}
|
||||||
|
|
||||||
|
startingGun.countDown();
|
||||||
|
|
||||||
|
for(Thread thread : threads) {
|
||||||
|
thread.join();
|
||||||
|
}
|
||||||
|
|
||||||
|
r.close();
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void test2() throws Exception {
|
||||||
|
Random random = random();
|
||||||
|
final int NUM_DOCS = atLeast(100);
|
||||||
|
final Directory dir = newDirectory();
|
||||||
|
final RandomIndexWriter writer = new RandomIndexWriter(random, dir);
|
||||||
|
final boolean allowDups = random.nextBoolean();
|
||||||
|
final Set<String> seen = new HashSet<>();
|
||||||
|
if (VERBOSE) {
|
||||||
|
System.out.println("TEST: NUM_DOCS=" + NUM_DOCS + " allowDups=" + allowDups);
|
||||||
|
}
|
||||||
|
int numDocs = 0;
|
||||||
|
final List<BytesRef> docValues = new ArrayList<>();
|
||||||
|
|
||||||
|
// TODO: deletions
|
||||||
|
while (numDocs < NUM_DOCS) {
|
||||||
|
final String s;
|
||||||
|
if (random.nextBoolean()) {
|
||||||
|
s = TestUtil.randomSimpleString(random);
|
||||||
|
} else {
|
||||||
|
s = TestUtil.randomUnicodeString(random);
|
||||||
|
}
|
||||||
|
final BytesRef br = new BytesRef(s);
|
||||||
|
|
||||||
|
if (!allowDups) {
|
||||||
|
if (seen.contains(s)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
seen.add(s);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (VERBOSE) {
|
||||||
|
System.out.println(" " + numDocs + ": s=" + s);
|
||||||
|
}
|
||||||
|
|
||||||
|
final Document doc = new Document();
|
||||||
|
doc.add(new SortedDocValuesField("stringdv", br));
|
||||||
|
doc.add(new NumericDocValuesField("id", numDocs));
|
||||||
|
docValues.add(br);
|
||||||
|
writer.addDocument(doc);
|
||||||
|
numDocs++;
|
||||||
|
|
||||||
|
if (random.nextInt(40) == 17) {
|
||||||
|
// force flush
|
||||||
|
writer.getReader().close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
writer.forceMerge(1);
|
||||||
|
final DirectoryReader r = writer.getReader();
|
||||||
|
writer.close();
|
||||||
|
|
||||||
|
final LeafReader sr = getOnlyLeafReader(r);
|
||||||
|
|
||||||
|
final long END_TIME = System.nanoTime() + TimeUnit.NANOSECONDS.convert((TEST_NIGHTLY ? 30 : 1), TimeUnit.SECONDS);
|
||||||
|
|
||||||
|
final int NUM_THREADS = TestUtil.nextInt(random(), 1, 10);
|
||||||
|
Thread[] threads = new Thread[NUM_THREADS];
|
||||||
|
for(int thread=0;thread<NUM_THREADS;thread++) {
|
||||||
|
threads[thread] = new Thread() {
|
||||||
|
@Override
|
||||||
|
public void run() {
|
||||||
|
Random random = random();
|
||||||
|
final SortedDocValues stringDVDirect;
|
||||||
|
final NumericDocValues docIDToID;
|
||||||
|
try {
|
||||||
|
stringDVDirect = sr.getSortedDocValues("stringdv");
|
||||||
|
docIDToID = sr.getNumericDocValues("id");
|
||||||
|
assertNotNull(stringDVDirect);
|
||||||
|
} catch (IOException ioe) {
|
||||||
|
throw new RuntimeException(ioe);
|
||||||
|
}
|
||||||
|
while(System.nanoTime() < END_TIME) {
|
||||||
|
final SortedDocValues source;
|
||||||
|
source = stringDVDirect;
|
||||||
|
|
||||||
|
for(int iter=0;iter<100;iter++) {
|
||||||
|
final int docID = random.nextInt(sr.maxDoc());
|
||||||
|
BytesRef term = source.get(docID);
|
||||||
|
assertEquals(docValues.get((int) docIDToID.get(docID)), term);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
threads[thread].start();
|
||||||
|
}
|
||||||
|
|
||||||
|
for(Thread thread : threads) {
|
||||||
|
thread.join();
|
||||||
|
}
|
||||||
|
|
||||||
|
r.close();
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,497 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.solr.uninverting;
|
||||||
|
|
||||||
|
import java.io.ByteArrayOutputStream;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.PrintStream;
|
||||||
|
import java.util.concurrent.CyclicBarrier;
|
||||||
|
import java.util.concurrent.atomic.AtomicBoolean;
|
||||||
|
import java.util.concurrent.atomic.AtomicInteger;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.MockAnalyzer;
|
||||||
|
import org.apache.lucene.document.BinaryDocValuesField;
|
||||||
|
import org.apache.lucene.document.Document;
|
||||||
|
import org.apache.lucene.document.Field.Store;
|
||||||
|
import org.apache.lucene.document.Field;
|
||||||
|
import org.apache.lucene.document.LegacyDoubleField;
|
||||||
|
import org.apache.lucene.document.LegacyFloatField;
|
||||||
|
import org.apache.lucene.document.LegacyIntField;
|
||||||
|
import org.apache.lucene.document.LegacyLongField;
|
||||||
|
import org.apache.lucene.document.NumericDocValuesField;
|
||||||
|
import org.apache.lucene.document.SortedDocValuesField;
|
||||||
|
import org.apache.lucene.document.SortedSetDocValuesField;
|
||||||
|
import org.apache.lucene.document.StoredField;
|
||||||
|
import org.apache.lucene.index.DirectoryReader;
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.index.IndexWriter;
|
||||||
|
import org.apache.lucene.index.IndexWriterConfig;
|
||||||
|
import org.apache.lucene.index.LeafReader;
|
||||||
|
import org.apache.lucene.index.NumericDocValues;
|
||||||
|
import org.apache.lucene.index.RandomIndexWriter;
|
||||||
|
import org.apache.lucene.index.Terms;
|
||||||
|
import org.apache.lucene.index.TermsEnum;
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.util.Bits;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.IOUtils;
|
||||||
|
import org.apache.lucene.util.LegacyNumericUtils;
|
||||||
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
import org.apache.lucene.util.TestUtil;
|
||||||
|
import org.apache.solr.index.SlowCompositeReaderWrapper;
|
||||||
|
import org.junit.AfterClass;
|
||||||
|
import org.junit.BeforeClass;
|
||||||
|
|
||||||
|
/** random assortment of tests against legacy numerics */
|
||||||
|
public class TestLegacyFieldCache extends LuceneTestCase {
|
||||||
|
private static LeafReader reader;
|
||||||
|
private static int NUM_DOCS;
|
||||||
|
private static Directory directory;
|
||||||
|
|
||||||
|
@BeforeClass
|
||||||
|
public static void beforeClass() throws Exception {
|
||||||
|
NUM_DOCS = atLeast(500);
|
||||||
|
directory = newDirectory();
|
||||||
|
RandomIndexWriter writer= new RandomIndexWriter(random(), directory, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy()));
|
||||||
|
long theLong = Long.MAX_VALUE;
|
||||||
|
double theDouble = Double.MAX_VALUE;
|
||||||
|
int theInt = Integer.MAX_VALUE;
|
||||||
|
float theFloat = Float.MAX_VALUE;
|
||||||
|
if (VERBOSE) {
|
||||||
|
System.out.println("TEST: setUp");
|
||||||
|
}
|
||||||
|
for (int i = 0; i < NUM_DOCS; i++){
|
||||||
|
Document doc = new Document();
|
||||||
|
doc.add(new LegacyLongField("theLong", theLong--, Field.Store.NO));
|
||||||
|
doc.add(new LegacyDoubleField("theDouble", theDouble--, Field.Store.NO));
|
||||||
|
doc.add(new LegacyIntField("theInt", theInt--, Field.Store.NO));
|
||||||
|
doc.add(new LegacyFloatField("theFloat", theFloat--, Field.Store.NO));
|
||||||
|
if (i%2 == 0) {
|
||||||
|
doc.add(new LegacyIntField("sparse", i, Field.Store.NO));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (i%2 == 0) {
|
||||||
|
doc.add(new LegacyIntField("numInt", i, Field.Store.NO));
|
||||||
|
}
|
||||||
|
writer.addDocument(doc);
|
||||||
|
}
|
||||||
|
IndexReader r = writer.getReader();
|
||||||
|
reader = SlowCompositeReaderWrapper.wrap(r);
|
||||||
|
TestUtil.checkReader(reader);
|
||||||
|
writer.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
@AfterClass
|
||||||
|
public static void afterClass() throws Exception {
|
||||||
|
reader.close();
|
||||||
|
reader = null;
|
||||||
|
directory.close();
|
||||||
|
directory = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testInfoStream() throws Exception {
|
||||||
|
try {
|
||||||
|
FieldCache cache = FieldCache.DEFAULT;
|
||||||
|
ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
|
||||||
|
cache.setInfoStream(new PrintStream(bos, false, IOUtils.UTF_8));
|
||||||
|
cache.getNumerics(reader, "theDouble", FieldCache.LEGACY_DOUBLE_PARSER, false);
|
||||||
|
cache.getNumerics(reader, "theDouble", new FieldCache.Parser() {
|
||||||
|
@Override
|
||||||
|
public TermsEnum termsEnum(Terms terms) throws IOException {
|
||||||
|
return LegacyNumericUtils.filterPrefixCodedLongs(terms.iterator());
|
||||||
|
}
|
||||||
|
@Override
|
||||||
|
public long parseValue(BytesRef term) {
|
||||||
|
int val = (int) LegacyNumericUtils.prefixCodedToLong(term);
|
||||||
|
if (val<0) val ^= 0x7fffffff;
|
||||||
|
return val;
|
||||||
|
}
|
||||||
|
}, false);
|
||||||
|
assertTrue(bos.toString(IOUtils.UTF_8).indexOf("WARNING") != -1);
|
||||||
|
} finally {
|
||||||
|
FieldCache.DEFAULT.setInfoStream(null);
|
||||||
|
FieldCache.DEFAULT.purgeAllCaches();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void test() throws IOException {
|
||||||
|
FieldCache cache = FieldCache.DEFAULT;
|
||||||
|
NumericDocValues doubles = cache.getNumerics(reader, "theDouble", FieldCache.LEGACY_DOUBLE_PARSER, random().nextBoolean());
|
||||||
|
assertSame("Second request to cache return same array", doubles, cache.getNumerics(reader, "theDouble", FieldCache.LEGACY_DOUBLE_PARSER, random().nextBoolean()));
|
||||||
|
for (int i = 0; i < NUM_DOCS; i++) {
|
||||||
|
assertEquals(Double.doubleToLongBits(Double.MAX_VALUE - i), doubles.get(i));
|
||||||
|
}
|
||||||
|
|
||||||
|
NumericDocValues longs = cache.getNumerics(reader, "theLong", FieldCache.LEGACY_LONG_PARSER, random().nextBoolean());
|
||||||
|
assertSame("Second request to cache return same array", longs, cache.getNumerics(reader, "theLong", FieldCache.LEGACY_LONG_PARSER, random().nextBoolean()));
|
||||||
|
for (int i = 0; i < NUM_DOCS; i++) {
|
||||||
|
assertEquals(Long.MAX_VALUE - i, longs.get(i));
|
||||||
|
}
|
||||||
|
|
||||||
|
NumericDocValues ints = cache.getNumerics(reader, "theInt", FieldCache.LEGACY_INT_PARSER, random().nextBoolean());
|
||||||
|
assertSame("Second request to cache return same array", ints, cache.getNumerics(reader, "theInt", FieldCache.LEGACY_INT_PARSER, random().nextBoolean()));
|
||||||
|
for (int i = 0; i < NUM_DOCS; i++) {
|
||||||
|
assertEquals(Integer.MAX_VALUE - i, ints.get(i));
|
||||||
|
}
|
||||||
|
|
||||||
|
NumericDocValues floats = cache.getNumerics(reader, "theFloat", FieldCache.LEGACY_FLOAT_PARSER, random().nextBoolean());
|
||||||
|
assertSame("Second request to cache return same array", floats, cache.getNumerics(reader, "theFloat", FieldCache.LEGACY_FLOAT_PARSER, random().nextBoolean()));
|
||||||
|
for (int i = 0; i < NUM_DOCS; i++) {
|
||||||
|
assertEquals(Float.floatToIntBits(Float.MAX_VALUE - i), floats.get(i));
|
||||||
|
}
|
||||||
|
|
||||||
|
Bits docsWithField = cache.getDocsWithField(reader, "theLong", null);
|
||||||
|
assertSame("Second request to cache return same array", docsWithField, cache.getDocsWithField(reader, "theLong", null));
|
||||||
|
assertTrue("docsWithField(theLong) must be class Bits.MatchAllBits", docsWithField instanceof Bits.MatchAllBits);
|
||||||
|
assertTrue("docsWithField(theLong) Size: " + docsWithField.length() + " is not: " + NUM_DOCS, docsWithField.length() == NUM_DOCS);
|
||||||
|
for (int i = 0; i < docsWithField.length(); i++) {
|
||||||
|
assertTrue(docsWithField.get(i));
|
||||||
|
}
|
||||||
|
|
||||||
|
docsWithField = cache.getDocsWithField(reader, "sparse", null);
|
||||||
|
assertSame("Second request to cache return same array", docsWithField, cache.getDocsWithField(reader, "sparse", null));
|
||||||
|
assertFalse("docsWithField(sparse) must not be class Bits.MatchAllBits", docsWithField instanceof Bits.MatchAllBits);
|
||||||
|
assertTrue("docsWithField(sparse) Size: " + docsWithField.length() + " is not: " + NUM_DOCS, docsWithField.length() == NUM_DOCS);
|
||||||
|
for (int i = 0; i < docsWithField.length(); i++) {
|
||||||
|
assertEquals(i%2 == 0, docsWithField.get(i));
|
||||||
|
}
|
||||||
|
|
||||||
|
FieldCache.DEFAULT.purgeByCacheKey(reader.getCoreCacheKey());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testEmptyIndex() throws Exception {
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
IndexWriter writer= new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())).setMaxBufferedDocs(500));
|
||||||
|
writer.close();
|
||||||
|
IndexReader r = DirectoryReader.open(dir);
|
||||||
|
LeafReader reader = SlowCompositeReaderWrapper.wrap(r);
|
||||||
|
TestUtil.checkReader(reader);
|
||||||
|
FieldCache.DEFAULT.getTerms(reader, "foobar", true);
|
||||||
|
FieldCache.DEFAULT.getTermsIndex(reader, "foobar");
|
||||||
|
FieldCache.DEFAULT.purgeByCacheKey(reader.getCoreCacheKey());
|
||||||
|
r.close();
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testDocsWithField() throws Exception {
|
||||||
|
FieldCache cache = FieldCache.DEFAULT;
|
||||||
|
cache.purgeAllCaches();
|
||||||
|
assertEquals(0, cache.getCacheEntries().length);
|
||||||
|
cache.getNumerics(reader, "theDouble", FieldCache.LEGACY_DOUBLE_PARSER, true);
|
||||||
|
|
||||||
|
// The double[] takes one slots, and docsWithField should also
|
||||||
|
// have been populated:
|
||||||
|
assertEquals(2, cache.getCacheEntries().length);
|
||||||
|
Bits bits = cache.getDocsWithField(reader, "theDouble", FieldCache.LEGACY_DOUBLE_PARSER);
|
||||||
|
|
||||||
|
// No new entries should appear:
|
||||||
|
assertEquals(2, cache.getCacheEntries().length);
|
||||||
|
assertTrue(bits instanceof Bits.MatchAllBits);
|
||||||
|
|
||||||
|
NumericDocValues ints = cache.getNumerics(reader, "sparse", FieldCache.LEGACY_INT_PARSER, true);
|
||||||
|
assertEquals(4, cache.getCacheEntries().length);
|
||||||
|
Bits docsWithField = cache.getDocsWithField(reader, "sparse", FieldCache.LEGACY_INT_PARSER);
|
||||||
|
assertEquals(4, cache.getCacheEntries().length);
|
||||||
|
for (int i = 0; i < docsWithField.length(); i++) {
|
||||||
|
if (i%2 == 0) {
|
||||||
|
assertTrue(docsWithField.get(i));
|
||||||
|
assertEquals(i, ints.get(i));
|
||||||
|
} else {
|
||||||
|
assertFalse(docsWithField.get(i));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
NumericDocValues numInts = cache.getNumerics(reader, "numInt", FieldCache.LEGACY_INT_PARSER, random().nextBoolean());
|
||||||
|
docsWithField = cache.getDocsWithField(reader, "numInt", FieldCache.LEGACY_INT_PARSER);
|
||||||
|
for (int i = 0; i < docsWithField.length(); i++) {
|
||||||
|
if (i%2 == 0) {
|
||||||
|
assertTrue(docsWithField.get(i));
|
||||||
|
assertEquals(i, numInts.get(i));
|
||||||
|
} else {
|
||||||
|
assertFalse(docsWithField.get(i));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testGetDocsWithFieldThreadSafety() throws Exception {
|
||||||
|
final FieldCache cache = FieldCache.DEFAULT;
|
||||||
|
cache.purgeAllCaches();
|
||||||
|
|
||||||
|
int NUM_THREADS = 3;
|
||||||
|
Thread[] threads = new Thread[NUM_THREADS];
|
||||||
|
final AtomicBoolean failed = new AtomicBoolean();
|
||||||
|
final AtomicInteger iters = new AtomicInteger();
|
||||||
|
final int NUM_ITER = 200 * RANDOM_MULTIPLIER;
|
||||||
|
final CyclicBarrier restart = new CyclicBarrier(NUM_THREADS,
|
||||||
|
new Runnable() {
|
||||||
|
@Override
|
||||||
|
public void run() {
|
||||||
|
cache.purgeAllCaches();
|
||||||
|
iters.incrementAndGet();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
for(int threadIDX=0;threadIDX<NUM_THREADS;threadIDX++) {
|
||||||
|
threads[threadIDX] = new Thread() {
|
||||||
|
@Override
|
||||||
|
public void run() {
|
||||||
|
|
||||||
|
try {
|
||||||
|
while(!failed.get()) {
|
||||||
|
final int op = random().nextInt(3);
|
||||||
|
if (op == 0) {
|
||||||
|
// Purge all caches & resume, once all
|
||||||
|
// threads get here:
|
||||||
|
restart.await();
|
||||||
|
if (iters.get() >= NUM_ITER) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} else if (op == 1) {
|
||||||
|
Bits docsWithField = cache.getDocsWithField(reader, "sparse", null);
|
||||||
|
for (int i = 0; i < docsWithField.length(); i++) {
|
||||||
|
assertEquals(i%2 == 0, docsWithField.get(i));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
NumericDocValues ints = cache.getNumerics(reader, "sparse", FieldCache.LEGACY_INT_PARSER, true);
|
||||||
|
Bits docsWithField = cache.getDocsWithField(reader, "sparse", null);
|
||||||
|
for (int i = 0; i < docsWithField.length(); i++) {
|
||||||
|
if (i%2 == 0) {
|
||||||
|
assertTrue(docsWithField.get(i));
|
||||||
|
assertEquals(i, ints.get(i));
|
||||||
|
} else {
|
||||||
|
assertFalse(docsWithField.get(i));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (Throwable t) {
|
||||||
|
failed.set(true);
|
||||||
|
restart.reset();
|
||||||
|
throw new RuntimeException(t);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
threads[threadIDX].start();
|
||||||
|
}
|
||||||
|
|
||||||
|
for(int threadIDX=0;threadIDX<NUM_THREADS;threadIDX++) {
|
||||||
|
threads[threadIDX].join();
|
||||||
|
}
|
||||||
|
assertFalse(failed.get());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testDocValuesIntegration() throws Exception {
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
IndexWriterConfig iwc = newIndexWriterConfig(null);
|
||||||
|
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
|
||||||
|
Document doc = new Document();
|
||||||
|
doc.add(new BinaryDocValuesField("binary", new BytesRef("binary value")));
|
||||||
|
doc.add(new SortedDocValuesField("sorted", new BytesRef("sorted value")));
|
||||||
|
doc.add(new NumericDocValuesField("numeric", 42));
|
||||||
|
doc.add(new SortedSetDocValuesField("sortedset", new BytesRef("sortedset value1")));
|
||||||
|
doc.add(new SortedSetDocValuesField("sortedset", new BytesRef("sortedset value2")));
|
||||||
|
iw.addDocument(doc);
|
||||||
|
DirectoryReader ir = iw.getReader();
|
||||||
|
iw.close();
|
||||||
|
LeafReader ar = getOnlyLeafReader(ir);
|
||||||
|
|
||||||
|
// Binary type: can be retrieved via getTerms()
|
||||||
|
expectThrows(IllegalStateException.class, () -> {
|
||||||
|
FieldCache.DEFAULT.getNumerics(ar, "binary", FieldCache.LEGACY_INT_PARSER, false);
|
||||||
|
});
|
||||||
|
|
||||||
|
// Sorted type: can be retrieved via getTerms(), getTermsIndex(), getDocTermOrds()
|
||||||
|
expectThrows(IllegalStateException.class, () -> {
|
||||||
|
FieldCache.DEFAULT.getNumerics(ar, "sorted", FieldCache.LEGACY_INT_PARSER, false);
|
||||||
|
});
|
||||||
|
|
||||||
|
// Numeric type: can be retrieved via getInts() and so on
|
||||||
|
NumericDocValues numeric = FieldCache.DEFAULT.getNumerics(ar, "numeric", FieldCache.LEGACY_INT_PARSER, false);
|
||||||
|
assertEquals(42, numeric.get(0));
|
||||||
|
|
||||||
|
// SortedSet type: can be retrieved via getDocTermOrds()
|
||||||
|
expectThrows(IllegalStateException.class, () -> {
|
||||||
|
FieldCache.DEFAULT.getNumerics(ar, "sortedset", FieldCache.LEGACY_INT_PARSER, false);
|
||||||
|
});
|
||||||
|
|
||||||
|
ir.close();
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testNonexistantFields() throws Exception {
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
|
||||||
|
Document doc = new Document();
|
||||||
|
iw.addDocument(doc);
|
||||||
|
DirectoryReader ir = iw.getReader();
|
||||||
|
iw.close();
|
||||||
|
|
||||||
|
LeafReader ar = getOnlyLeafReader(ir);
|
||||||
|
|
||||||
|
final FieldCache cache = FieldCache.DEFAULT;
|
||||||
|
cache.purgeAllCaches();
|
||||||
|
assertEquals(0, cache.getCacheEntries().length);
|
||||||
|
|
||||||
|
NumericDocValues ints = cache.getNumerics(ar, "bogusints", FieldCache.LEGACY_INT_PARSER, true);
|
||||||
|
assertEquals(0, ints.get(0));
|
||||||
|
|
||||||
|
NumericDocValues longs = cache.getNumerics(ar, "boguslongs", FieldCache.LEGACY_LONG_PARSER, true);
|
||||||
|
assertEquals(0, longs.get(0));
|
||||||
|
|
||||||
|
NumericDocValues floats = cache.getNumerics(ar, "bogusfloats", FieldCache.LEGACY_FLOAT_PARSER, true);
|
||||||
|
assertEquals(0, floats.get(0));
|
||||||
|
|
||||||
|
NumericDocValues doubles = cache.getNumerics(ar, "bogusdoubles", FieldCache.LEGACY_DOUBLE_PARSER, true);
|
||||||
|
assertEquals(0, doubles.get(0));
|
||||||
|
|
||||||
|
// check that we cached nothing
|
||||||
|
assertEquals(0, cache.getCacheEntries().length);
|
||||||
|
ir.close();
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testNonIndexedFields() throws Exception {
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
|
||||||
|
Document doc = new Document();
|
||||||
|
doc.add(new StoredField("bogusbytes", "bogus"));
|
||||||
|
doc.add(new StoredField("bogusshorts", "bogus"));
|
||||||
|
doc.add(new StoredField("bogusints", "bogus"));
|
||||||
|
doc.add(new StoredField("boguslongs", "bogus"));
|
||||||
|
doc.add(new StoredField("bogusfloats", "bogus"));
|
||||||
|
doc.add(new StoredField("bogusdoubles", "bogus"));
|
||||||
|
doc.add(new StoredField("bogusbits", "bogus"));
|
||||||
|
iw.addDocument(doc);
|
||||||
|
DirectoryReader ir = iw.getReader();
|
||||||
|
iw.close();
|
||||||
|
|
||||||
|
LeafReader ar = getOnlyLeafReader(ir);
|
||||||
|
|
||||||
|
final FieldCache cache = FieldCache.DEFAULT;
|
||||||
|
cache.purgeAllCaches();
|
||||||
|
assertEquals(0, cache.getCacheEntries().length);
|
||||||
|
|
||||||
|
NumericDocValues ints = cache.getNumerics(ar, "bogusints", FieldCache.LEGACY_INT_PARSER, true);
|
||||||
|
assertEquals(0, ints.get(0));
|
||||||
|
|
||||||
|
NumericDocValues longs = cache.getNumerics(ar, "boguslongs", FieldCache.LEGACY_LONG_PARSER, true);
|
||||||
|
assertEquals(0, longs.get(0));
|
||||||
|
|
||||||
|
NumericDocValues floats = cache.getNumerics(ar, "bogusfloats", FieldCache.LEGACY_FLOAT_PARSER, true);
|
||||||
|
assertEquals(0, floats.get(0));
|
||||||
|
|
||||||
|
NumericDocValues doubles = cache.getNumerics(ar, "bogusdoubles", FieldCache.LEGACY_DOUBLE_PARSER, true);
|
||||||
|
assertEquals(0, doubles.get(0));
|
||||||
|
|
||||||
|
// check that we cached nothing
|
||||||
|
assertEquals(0, cache.getCacheEntries().length);
|
||||||
|
ir.close();
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Make sure that the use of GrowableWriter doesn't prevent from using the full long range
|
||||||
|
public void testLongFieldCache() throws IOException {
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
IndexWriterConfig cfg = newIndexWriterConfig(new MockAnalyzer(random()));
|
||||||
|
cfg.setMergePolicy(newLogMergePolicy());
|
||||||
|
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, cfg);
|
||||||
|
Document doc = new Document();
|
||||||
|
LegacyLongField field = new LegacyLongField("f", 0L, Store.YES);
|
||||||
|
doc.add(field);
|
||||||
|
final long[] values = new long[TestUtil.nextInt(random(), 1, 10)];
|
||||||
|
for (int i = 0; i < values.length; ++i) {
|
||||||
|
final long v;
|
||||||
|
switch (random().nextInt(10)) {
|
||||||
|
case 0:
|
||||||
|
v = Long.MIN_VALUE;
|
||||||
|
break;
|
||||||
|
case 1:
|
||||||
|
v = 0;
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
v = Long.MAX_VALUE;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
v = TestUtil.nextLong(random(), -10, 10);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
values[i] = v;
|
||||||
|
if (v == 0 && random().nextBoolean()) {
|
||||||
|
// missing
|
||||||
|
iw.addDocument(new Document());
|
||||||
|
} else {
|
||||||
|
field.setLongValue(v);
|
||||||
|
iw.addDocument(doc);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
iw.forceMerge(1);
|
||||||
|
final DirectoryReader reader = iw.getReader();
|
||||||
|
final NumericDocValues longs = FieldCache.DEFAULT.getNumerics(getOnlyLeafReader(reader), "f", FieldCache.LEGACY_LONG_PARSER, false);
|
||||||
|
for (int i = 0; i < values.length; ++i) {
|
||||||
|
assertEquals(values[i], longs.get(i));
|
||||||
|
}
|
||||||
|
reader.close();
|
||||||
|
iw.close();
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Make sure that the use of GrowableWriter doesn't prevent from using the full int range
|
||||||
|
public void testIntFieldCache() throws IOException {
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
IndexWriterConfig cfg = newIndexWriterConfig(new MockAnalyzer(random()));
|
||||||
|
cfg.setMergePolicy(newLogMergePolicy());
|
||||||
|
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, cfg);
|
||||||
|
Document doc = new Document();
|
||||||
|
LegacyIntField field = new LegacyIntField("f", 0, Store.YES);
|
||||||
|
doc.add(field);
|
||||||
|
final int[] values = new int[TestUtil.nextInt(random(), 1, 10)];
|
||||||
|
for (int i = 0; i < values.length; ++i) {
|
||||||
|
final int v;
|
||||||
|
switch (random().nextInt(10)) {
|
||||||
|
case 0:
|
||||||
|
v = Integer.MIN_VALUE;
|
||||||
|
break;
|
||||||
|
case 1:
|
||||||
|
v = 0;
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
v = Integer.MAX_VALUE;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
v = TestUtil.nextInt(random(), -10, 10);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
values[i] = v;
|
||||||
|
if (v == 0 && random().nextBoolean()) {
|
||||||
|
// missing
|
||||||
|
iw.addDocument(new Document());
|
||||||
|
} else {
|
||||||
|
field.setIntValue(v);
|
||||||
|
iw.addDocument(doc);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
iw.forceMerge(1);
|
||||||
|
final DirectoryReader reader = iw.getReader();
|
||||||
|
final NumericDocValues ints = FieldCache.DEFAULT.getNumerics(getOnlyLeafReader(reader), "f", FieldCache.LEGACY_INT_PARSER, false);
|
||||||
|
for (int i = 0; i < values.length; ++i) {
|
||||||
|
assertEquals(values[i], ints.get(i));
|
||||||
|
}
|
||||||
|
reader.close();
|
||||||
|
iw.close();
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,156 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.solr.uninverting;
|
||||||
|
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.MockAnalyzer;
|
||||||
|
import org.apache.lucene.document.Document;
|
||||||
|
import org.apache.lucene.document.FieldType;
|
||||||
|
import org.apache.lucene.document.LegacyIntField;
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.index.RandomIndexWriter;
|
||||||
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
|
import org.apache.lucene.search.LegacyNumericRangeQuery;
|
||||||
|
import org.apache.lucene.search.Query;
|
||||||
|
import org.apache.lucene.search.ScoreDoc;
|
||||||
|
import org.apache.lucene.search.Sort;
|
||||||
|
import org.apache.lucene.search.SortField;
|
||||||
|
import org.apache.lucene.search.TopDocs;
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
import org.apache.lucene.util.TestUtil;
|
||||||
|
import org.apache.solr.uninverting.UninvertingReader.Type;
|
||||||
|
import org.junit.AfterClass;
|
||||||
|
import org.junit.BeforeClass;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
public class TestNumericTerms32 extends LuceneTestCase {
|
||||||
|
// distance of entries
|
||||||
|
private static int distance;
|
||||||
|
// shift the starting of the values to the left, to also have negative values:
|
||||||
|
private static final int startOffset = - 1 << 15;
|
||||||
|
// number of docs to generate for testing
|
||||||
|
private static int noDocs;
|
||||||
|
|
||||||
|
private static Directory directory = null;
|
||||||
|
private static IndexReader reader = null;
|
||||||
|
private static IndexSearcher searcher = null;
|
||||||
|
|
||||||
|
@BeforeClass
|
||||||
|
public static void beforeClass() throws Exception {
|
||||||
|
noDocs = atLeast(4096);
|
||||||
|
distance = (1 << 30) / noDocs;
|
||||||
|
directory = newDirectory();
|
||||||
|
RandomIndexWriter writer = new RandomIndexWriter(random(), directory,
|
||||||
|
newIndexWriterConfig(new MockAnalyzer(random()))
|
||||||
|
.setMaxBufferedDocs(TestUtil.nextInt(random(), 100, 1000))
|
||||||
|
.setMergePolicy(newLogMergePolicy()));
|
||||||
|
|
||||||
|
final FieldType storedInt = new FieldType(LegacyIntField.TYPE_NOT_STORED);
|
||||||
|
storedInt.setStored(true);
|
||||||
|
storedInt.freeze();
|
||||||
|
|
||||||
|
final FieldType storedInt8 = new FieldType(storedInt);
|
||||||
|
storedInt8.setNumericPrecisionStep(8);
|
||||||
|
|
||||||
|
final FieldType storedInt4 = new FieldType(storedInt);
|
||||||
|
storedInt4.setNumericPrecisionStep(4);
|
||||||
|
|
||||||
|
final FieldType storedInt2 = new FieldType(storedInt);
|
||||||
|
storedInt2.setNumericPrecisionStep(2);
|
||||||
|
|
||||||
|
LegacyIntField
|
||||||
|
field8 = new LegacyIntField("field8", 0, storedInt8),
|
||||||
|
field4 = new LegacyIntField("field4", 0, storedInt4),
|
||||||
|
field2 = new LegacyIntField("field2", 0, storedInt2);
|
||||||
|
|
||||||
|
Document doc = new Document();
|
||||||
|
// add fields, that have a distance to test general functionality
|
||||||
|
doc.add(field8); doc.add(field4); doc.add(field2);
|
||||||
|
|
||||||
|
// Add a series of noDocs docs with increasing int values
|
||||||
|
for (int l=0; l<noDocs; l++) {
|
||||||
|
int val=distance*l+startOffset;
|
||||||
|
field8.setIntValue(val);
|
||||||
|
field4.setIntValue(val);
|
||||||
|
field2.setIntValue(val);
|
||||||
|
|
||||||
|
val=l-(noDocs/2);
|
||||||
|
writer.addDocument(doc);
|
||||||
|
}
|
||||||
|
|
||||||
|
Map<String,Type> map = new HashMap<>();
|
||||||
|
map.put("field2", Type.LEGACY_INTEGER);
|
||||||
|
map.put("field4", Type.LEGACY_INTEGER);
|
||||||
|
map.put("field8", Type.LEGACY_INTEGER);
|
||||||
|
reader = UninvertingReader.wrap(writer.getReader(), map);
|
||||||
|
searcher=newSearcher(reader);
|
||||||
|
writer.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
@AfterClass
|
||||||
|
public static void afterClass() throws Exception {
|
||||||
|
searcher = null;
|
||||||
|
TestUtil.checkReader(reader);
|
||||||
|
reader.close();
|
||||||
|
reader = null;
|
||||||
|
directory.close();
|
||||||
|
directory = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void testSorting(int precisionStep) throws Exception {
|
||||||
|
String field="field"+precisionStep;
|
||||||
|
// 10 random tests, the index order is ascending,
|
||||||
|
// so using a reverse sort field should retun descending documents
|
||||||
|
int num = TestUtil.nextInt(random(), 10, 20);
|
||||||
|
for (int i = 0; i < num; i++) {
|
||||||
|
int lower=(int)(random().nextDouble()*noDocs*distance)+startOffset;
|
||||||
|
int upper=(int)(random().nextDouble()*noDocs*distance)+startOffset;
|
||||||
|
if (lower>upper) {
|
||||||
|
int a=lower; lower=upper; upper=a;
|
||||||
|
}
|
||||||
|
Query tq= LegacyNumericRangeQuery.newIntRange(field, precisionStep, lower, upper, true, true);
|
||||||
|
TopDocs topDocs = searcher.search(tq, noDocs, new Sort(new SortField(field, SortField.Type.INT, true)));
|
||||||
|
if (topDocs.totalHits==0) continue;
|
||||||
|
ScoreDoc[] sd = topDocs.scoreDocs;
|
||||||
|
assertNotNull(sd);
|
||||||
|
int last = searcher.doc(sd[0].doc).getField(field).numericValue().intValue();
|
||||||
|
for (int j=1; j<sd.length; j++) {
|
||||||
|
int act = searcher.doc(sd[j].doc).getField(field).numericValue().intValue();
|
||||||
|
assertTrue("Docs should be sorted backwards", last>act );
|
||||||
|
last=act;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSorting_8bit() throws Exception {
|
||||||
|
testSorting(8);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSorting_4bit() throws Exception {
|
||||||
|
testSorting(4);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSorting_2bit() throws Exception {
|
||||||
|
testSorting(2);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,166 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.solr.uninverting;
|
||||||
|
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.MockAnalyzer;
|
||||||
|
import org.apache.lucene.document.Document;
|
||||||
|
import org.apache.lucene.document.FieldType;
|
||||||
|
import org.apache.lucene.document.LegacyLongField;
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.index.RandomIndexWriter;
|
||||||
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
|
import org.apache.lucene.search.LegacyNumericRangeQuery;
|
||||||
|
import org.apache.lucene.search.Query;
|
||||||
|
import org.apache.lucene.search.ScoreDoc;
|
||||||
|
import org.apache.lucene.search.Sort;
|
||||||
|
import org.apache.lucene.search.SortField;
|
||||||
|
import org.apache.lucene.search.TopDocs;
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
import org.apache.lucene.util.TestUtil;
|
||||||
|
import org.apache.solr.uninverting.UninvertingReader.Type;
|
||||||
|
import org.junit.AfterClass;
|
||||||
|
import org.junit.BeforeClass;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
public class TestNumericTerms64 extends LuceneTestCase {
|
||||||
|
// distance of entries
|
||||||
|
private static long distance;
|
||||||
|
// shift the starting of the values to the left, to also have negative values:
|
||||||
|
private static final long startOffset = - 1L << 31;
|
||||||
|
// number of docs to generate for testing
|
||||||
|
private static int noDocs;
|
||||||
|
|
||||||
|
private static Directory directory = null;
|
||||||
|
private static IndexReader reader = null;
|
||||||
|
private static IndexSearcher searcher = null;
|
||||||
|
|
||||||
|
@BeforeClass
|
||||||
|
public static void beforeClass() throws Exception {
|
||||||
|
noDocs = atLeast(4096);
|
||||||
|
distance = (1L << 60) / noDocs;
|
||||||
|
directory = newDirectory();
|
||||||
|
RandomIndexWriter writer = new RandomIndexWriter(random(), directory,
|
||||||
|
newIndexWriterConfig(new MockAnalyzer(random()))
|
||||||
|
.setMaxBufferedDocs(TestUtil.nextInt(random(), 100, 1000))
|
||||||
|
.setMergePolicy(newLogMergePolicy()));
|
||||||
|
|
||||||
|
final FieldType storedLong = new FieldType(LegacyLongField.TYPE_NOT_STORED);
|
||||||
|
storedLong.setStored(true);
|
||||||
|
storedLong.freeze();
|
||||||
|
|
||||||
|
final FieldType storedLong8 = new FieldType(storedLong);
|
||||||
|
storedLong8.setNumericPrecisionStep(8);
|
||||||
|
|
||||||
|
final FieldType storedLong4 = new FieldType(storedLong);
|
||||||
|
storedLong4.setNumericPrecisionStep(4);
|
||||||
|
|
||||||
|
final FieldType storedLong6 = new FieldType(storedLong);
|
||||||
|
storedLong6.setNumericPrecisionStep(6);
|
||||||
|
|
||||||
|
final FieldType storedLong2 = new FieldType(storedLong);
|
||||||
|
storedLong2.setNumericPrecisionStep(2);
|
||||||
|
|
||||||
|
LegacyLongField
|
||||||
|
field8 = new LegacyLongField("field8", 0L, storedLong8),
|
||||||
|
field6 = new LegacyLongField("field6", 0L, storedLong6),
|
||||||
|
field4 = new LegacyLongField("field4", 0L, storedLong4),
|
||||||
|
field2 = new LegacyLongField("field2", 0L, storedLong2);
|
||||||
|
|
||||||
|
Document doc = new Document();
|
||||||
|
// add fields, that have a distance to test general functionality
|
||||||
|
doc.add(field8); doc.add(field6); doc.add(field4); doc.add(field2);
|
||||||
|
|
||||||
|
// Add a series of noDocs docs with increasing long values, by updating the fields
|
||||||
|
for (int l=0; l<noDocs; l++) {
|
||||||
|
long val=distance*l+startOffset;
|
||||||
|
field8.setLongValue(val);
|
||||||
|
field6.setLongValue(val);
|
||||||
|
field4.setLongValue(val);
|
||||||
|
field2.setLongValue(val);
|
||||||
|
|
||||||
|
val=l-(noDocs/2);
|
||||||
|
writer.addDocument(doc);
|
||||||
|
}
|
||||||
|
Map<String,Type> map = new HashMap<>();
|
||||||
|
map.put("field2", Type.LEGACY_LONG);
|
||||||
|
map.put("field4", Type.LEGACY_LONG);
|
||||||
|
map.put("field6", Type.LEGACY_LONG);
|
||||||
|
map.put("field8", Type.LEGACY_LONG);
|
||||||
|
reader = UninvertingReader.wrap(writer.getReader(), map);
|
||||||
|
searcher=newSearcher(reader);
|
||||||
|
writer.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
@AfterClass
|
||||||
|
public static void afterClass() throws Exception {
|
||||||
|
searcher = null;
|
||||||
|
TestUtil.checkReader(reader);
|
||||||
|
reader.close();
|
||||||
|
reader = null;
|
||||||
|
directory.close();
|
||||||
|
directory = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void testSorting(int precisionStep) throws Exception {
|
||||||
|
String field="field"+precisionStep;
|
||||||
|
// 10 random tests, the index order is ascending,
|
||||||
|
// so using a reverse sort field should retun descending documents
|
||||||
|
int num = TestUtil.nextInt(random(), 10, 20);
|
||||||
|
for (int i = 0; i < num; i++) {
|
||||||
|
long lower=(long)(random().nextDouble()*noDocs*distance)+startOffset;
|
||||||
|
long upper=(long)(random().nextDouble()*noDocs*distance)+startOffset;
|
||||||
|
if (lower>upper) {
|
||||||
|
long a=lower; lower=upper; upper=a;
|
||||||
|
}
|
||||||
|
Query tq= LegacyNumericRangeQuery.newLongRange(field, precisionStep, lower, upper, true, true);
|
||||||
|
TopDocs topDocs = searcher.search(tq, noDocs, new Sort(new SortField(field, SortField.Type.LONG, true)));
|
||||||
|
if (topDocs.totalHits==0) continue;
|
||||||
|
ScoreDoc[] sd = topDocs.scoreDocs;
|
||||||
|
assertNotNull(sd);
|
||||||
|
long last=searcher.doc(sd[0].doc).getField(field).numericValue().longValue();
|
||||||
|
for (int j=1; j<sd.length; j++) {
|
||||||
|
long act=searcher.doc(sd[j].doc).getField(field).numericValue().longValue();
|
||||||
|
assertTrue("Docs should be sorted backwards", last>act );
|
||||||
|
last=act;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSorting_8bit() throws Exception {
|
||||||
|
testSorting(8);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSorting_6bit() throws Exception {
|
||||||
|
testSorting(6);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSorting_4bit() throws Exception {
|
||||||
|
testSorting(4);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSorting_2bit() throws Exception {
|
||||||
|
testSorting(2);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,395 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.solr.uninverting;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.EnumSet;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.LinkedHashMap;
|
||||||
|
import java.util.LinkedHashSet;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
import org.apache.lucene.document.Document;
|
||||||
|
import org.apache.lucene.document.Field.Store;
|
||||||
|
import org.apache.lucene.document.Field;
|
||||||
|
import org.apache.lucene.document.FieldType;
|
||||||
|
import org.apache.lucene.document.IntPoint;
|
||||||
|
import org.apache.lucene.document.LegacyIntField;
|
||||||
|
import org.apache.lucene.document.LegacyLongField;
|
||||||
|
import org.apache.lucene.document.NumericDocValuesField;
|
||||||
|
import org.apache.lucene.document.StoredField;
|
||||||
|
import org.apache.lucene.document.StringField;
|
||||||
|
import org.apache.lucene.index.DirectoryReader;
|
||||||
|
import org.apache.lucene.index.DocValues;
|
||||||
|
import org.apache.lucene.index.DocValuesType;
|
||||||
|
import org.apache.lucene.index.FieldInfo;
|
||||||
|
import org.apache.lucene.index.IndexWriter;
|
||||||
|
import org.apache.lucene.index.LeafReader;
|
||||||
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
|
import org.apache.lucene.index.SortedSetDocValues;
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.LegacyNumericUtils;
|
||||||
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
import org.apache.lucene.util.TestUtil;
|
||||||
|
import org.apache.solr.index.SlowCompositeReaderWrapper;
|
||||||
|
import org.apache.solr.uninverting.UninvertingReader.Type;
|
||||||
|
|
||||||
|
public class TestUninvertingReader extends LuceneTestCase {
|
||||||
|
|
||||||
|
public void testSortedSetInteger() throws IOException {
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
|
||||||
|
|
||||||
|
Document doc = new Document();
|
||||||
|
doc.add(new LegacyIntField("foo", 5, Field.Store.NO));
|
||||||
|
iw.addDocument(doc);
|
||||||
|
|
||||||
|
doc = new Document();
|
||||||
|
doc.add(new LegacyIntField("foo", 5, Field.Store.NO));
|
||||||
|
doc.add(new LegacyIntField("foo", -3, Field.Store.NO));
|
||||||
|
iw.addDocument(doc);
|
||||||
|
|
||||||
|
iw.forceMerge(1);
|
||||||
|
iw.close();
|
||||||
|
|
||||||
|
DirectoryReader ir = UninvertingReader.wrap(DirectoryReader.open(dir),
|
||||||
|
Collections.singletonMap("foo", Type.SORTED_SET_INTEGER));
|
||||||
|
LeafReader ar = ir.leaves().get(0).reader();
|
||||||
|
SortedSetDocValues v = ar.getSortedSetDocValues("foo");
|
||||||
|
assertEquals(2, v.getValueCount());
|
||||||
|
|
||||||
|
v.setDocument(0);
|
||||||
|
assertEquals(1, v.nextOrd());
|
||||||
|
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
|
||||||
|
|
||||||
|
v.setDocument(1);
|
||||||
|
assertEquals(0, v.nextOrd());
|
||||||
|
assertEquals(1, v.nextOrd());
|
||||||
|
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
|
||||||
|
|
||||||
|
BytesRef value = v.lookupOrd(0);
|
||||||
|
assertEquals(-3, LegacyNumericUtils.prefixCodedToInt(value));
|
||||||
|
|
||||||
|
value = v.lookupOrd(1);
|
||||||
|
assertEquals(5, LegacyNumericUtils.prefixCodedToInt(value));
|
||||||
|
TestUtil.checkReader(ir);
|
||||||
|
ir.close();
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testSortedSetFloat() throws IOException {
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
|
||||||
|
|
||||||
|
Document doc = new Document();
|
||||||
|
doc.add(new LegacyIntField("foo", Float.floatToRawIntBits(5f), Field.Store.NO));
|
||||||
|
iw.addDocument(doc);
|
||||||
|
|
||||||
|
doc = new Document();
|
||||||
|
doc.add(new LegacyIntField("foo", Float.floatToRawIntBits(5f), Field.Store.NO));
|
||||||
|
doc.add(new LegacyIntField("foo", Float.floatToRawIntBits(-3f), Field.Store.NO));
|
||||||
|
iw.addDocument(doc);
|
||||||
|
|
||||||
|
iw.forceMerge(1);
|
||||||
|
iw.close();
|
||||||
|
|
||||||
|
DirectoryReader ir = UninvertingReader.wrap(DirectoryReader.open(dir),
|
||||||
|
Collections.singletonMap("foo", Type.SORTED_SET_FLOAT));
|
||||||
|
LeafReader ar = ir.leaves().get(0).reader();
|
||||||
|
|
||||||
|
SortedSetDocValues v = ar.getSortedSetDocValues("foo");
|
||||||
|
assertEquals(2, v.getValueCount());
|
||||||
|
|
||||||
|
v.setDocument(0);
|
||||||
|
assertEquals(1, v.nextOrd());
|
||||||
|
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
|
||||||
|
|
||||||
|
v.setDocument(1);
|
||||||
|
assertEquals(0, v.nextOrd());
|
||||||
|
assertEquals(1, v.nextOrd());
|
||||||
|
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
|
||||||
|
|
||||||
|
BytesRef value = v.lookupOrd(0);
|
||||||
|
assertEquals(Float.floatToRawIntBits(-3f), LegacyNumericUtils.prefixCodedToInt(value));
|
||||||
|
|
||||||
|
value = v.lookupOrd(1);
|
||||||
|
assertEquals(Float.floatToRawIntBits(5f), LegacyNumericUtils.prefixCodedToInt(value));
|
||||||
|
TestUtil.checkReader(ir);
|
||||||
|
ir.close();
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testSortedSetLong() throws IOException {
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
|
||||||
|
|
||||||
|
Document doc = new Document();
|
||||||
|
doc.add(new LegacyLongField("foo", 5, Field.Store.NO));
|
||||||
|
iw.addDocument(doc);
|
||||||
|
|
||||||
|
doc = new Document();
|
||||||
|
doc.add(new LegacyLongField("foo", 5, Field.Store.NO));
|
||||||
|
doc.add(new LegacyLongField("foo", -3, Field.Store.NO));
|
||||||
|
iw.addDocument(doc);
|
||||||
|
|
||||||
|
iw.forceMerge(1);
|
||||||
|
iw.close();
|
||||||
|
|
||||||
|
DirectoryReader ir = UninvertingReader.wrap(DirectoryReader.open(dir),
|
||||||
|
Collections.singletonMap("foo", Type.SORTED_SET_LONG));
|
||||||
|
LeafReader ar = ir.leaves().get(0).reader();
|
||||||
|
SortedSetDocValues v = ar.getSortedSetDocValues("foo");
|
||||||
|
assertEquals(2, v.getValueCount());
|
||||||
|
|
||||||
|
v.setDocument(0);
|
||||||
|
assertEquals(1, v.nextOrd());
|
||||||
|
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
|
||||||
|
|
||||||
|
v.setDocument(1);
|
||||||
|
assertEquals(0, v.nextOrd());
|
||||||
|
assertEquals(1, v.nextOrd());
|
||||||
|
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
|
||||||
|
|
||||||
|
BytesRef value = v.lookupOrd(0);
|
||||||
|
assertEquals(-3, LegacyNumericUtils.prefixCodedToLong(value));
|
||||||
|
|
||||||
|
value = v.lookupOrd(1);
|
||||||
|
assertEquals(5, LegacyNumericUtils.prefixCodedToLong(value));
|
||||||
|
TestUtil.checkReader(ir);
|
||||||
|
ir.close();
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testSortedSetDouble() throws IOException {
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
|
||||||
|
|
||||||
|
Document doc = new Document();
|
||||||
|
doc.add(new LegacyLongField("foo", Double.doubleToRawLongBits(5d), Field.Store.NO));
|
||||||
|
iw.addDocument(doc);
|
||||||
|
|
||||||
|
doc = new Document();
|
||||||
|
doc.add(new LegacyLongField("foo", Double.doubleToRawLongBits(5d), Field.Store.NO));
|
||||||
|
doc.add(new LegacyLongField("foo", Double.doubleToRawLongBits(-3d), Field.Store.NO));
|
||||||
|
iw.addDocument(doc);
|
||||||
|
|
||||||
|
iw.forceMerge(1);
|
||||||
|
iw.close();
|
||||||
|
|
||||||
|
DirectoryReader ir = UninvertingReader.wrap(DirectoryReader.open(dir),
|
||||||
|
Collections.singletonMap("foo", Type.SORTED_SET_DOUBLE));
|
||||||
|
LeafReader ar = ir.leaves().get(0).reader();
|
||||||
|
SortedSetDocValues v = ar.getSortedSetDocValues("foo");
|
||||||
|
assertEquals(2, v.getValueCount());
|
||||||
|
|
||||||
|
v.setDocument(0);
|
||||||
|
assertEquals(1, v.nextOrd());
|
||||||
|
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
|
||||||
|
|
||||||
|
v.setDocument(1);
|
||||||
|
assertEquals(0, v.nextOrd());
|
||||||
|
assertEquals(1, v.nextOrd());
|
||||||
|
assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd());
|
||||||
|
|
||||||
|
BytesRef value = v.lookupOrd(0);
|
||||||
|
assertEquals(Double.doubleToRawLongBits(-3d), LegacyNumericUtils.prefixCodedToLong(value));
|
||||||
|
|
||||||
|
value = v.lookupOrd(1);
|
||||||
|
assertEquals(Double.doubleToRawLongBits(5d), LegacyNumericUtils.prefixCodedToLong(value));
|
||||||
|
TestUtil.checkReader(ir);
|
||||||
|
ir.close();
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/** Tests {@link Type#SORTED_SET_INTEGER} using Integer based fields, with and w/o precision steps */
|
||||||
|
public void testSortedSetIntegerManyValues() throws IOException {
|
||||||
|
final Directory dir = newDirectory();
|
||||||
|
final IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
|
||||||
|
|
||||||
|
final FieldType NO_TRIE_TYPE = new FieldType(LegacyIntField.TYPE_NOT_STORED);
|
||||||
|
NO_TRIE_TYPE.setNumericPrecisionStep(Integer.MAX_VALUE);
|
||||||
|
|
||||||
|
final Map<String,Type> UNINVERT_MAP = new LinkedHashMap<String,Type>();
|
||||||
|
UNINVERT_MAP.put("notrie_single", Type.SORTED_SET_INTEGER);
|
||||||
|
UNINVERT_MAP.put("notrie_multi", Type.SORTED_SET_INTEGER);
|
||||||
|
UNINVERT_MAP.put("trie_single", Type.SORTED_SET_INTEGER);
|
||||||
|
UNINVERT_MAP.put("trie_multi", Type.SORTED_SET_INTEGER);
|
||||||
|
final Set<String> MULTI_VALUES = new LinkedHashSet<String>();
|
||||||
|
MULTI_VALUES.add("trie_multi");
|
||||||
|
MULTI_VALUES.add("notrie_multi");
|
||||||
|
|
||||||
|
|
||||||
|
final int NUM_DOCS = TestUtil.nextInt(random(), 200, 1500);
|
||||||
|
final int MIN = TestUtil.nextInt(random(), 10, 100);
|
||||||
|
final int MAX = MIN + TestUtil.nextInt(random(), 10, 100);
|
||||||
|
final long EXPECTED_VALSET_SIZE = 1 + MAX - MIN;
|
||||||
|
|
||||||
|
{ // (at least) one doc should have every value, so that at least one segment has every value
|
||||||
|
final Document doc = new Document();
|
||||||
|
for (int i = MIN; i <= MAX; i++) {
|
||||||
|
doc.add(new LegacyIntField("trie_multi", i, Field.Store.NO));
|
||||||
|
doc.add(new LegacyIntField("notrie_multi", i, NO_TRIE_TYPE));
|
||||||
|
}
|
||||||
|
iw.addDocument(doc);
|
||||||
|
}
|
||||||
|
|
||||||
|
// now add some more random docs (note: starting at i=1 because of previously added doc)
|
||||||
|
for (int i = 1; i < NUM_DOCS; i++) {
|
||||||
|
final Document doc = new Document();
|
||||||
|
if (0 != TestUtil.nextInt(random(), 0, 9)) {
|
||||||
|
int val = TestUtil.nextInt(random(), MIN, MAX);
|
||||||
|
doc.add(new LegacyIntField("trie_single", val, Field.Store.NO));
|
||||||
|
doc.add(new LegacyIntField("notrie_single", val, NO_TRIE_TYPE));
|
||||||
|
}
|
||||||
|
if (0 != TestUtil.nextInt(random(), 0, 9)) {
|
||||||
|
int numMulti = atLeast(1);
|
||||||
|
while (0 < numMulti--) {
|
||||||
|
int val = TestUtil.nextInt(random(), MIN, MAX);
|
||||||
|
doc.add(new LegacyIntField("trie_multi", val, Field.Store.NO));
|
||||||
|
doc.add(new LegacyIntField("notrie_multi", val, NO_TRIE_TYPE));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
iw.addDocument(doc);
|
||||||
|
}
|
||||||
|
|
||||||
|
iw.close();
|
||||||
|
|
||||||
|
final DirectoryReader ir = UninvertingReader.wrap(DirectoryReader.open(dir), UNINVERT_MAP);
|
||||||
|
TestUtil.checkReader(ir);
|
||||||
|
|
||||||
|
final int NUM_LEAVES = ir.leaves().size();
|
||||||
|
|
||||||
|
// check the leaves: no more then total set size
|
||||||
|
for (LeafReaderContext rc : ir.leaves()) {
|
||||||
|
final LeafReader ar = rc.reader();
|
||||||
|
for (String f : UNINVERT_MAP.keySet()) {
|
||||||
|
final SortedSetDocValues v = DocValues.getSortedSet(ar, f);
|
||||||
|
final long valSetSize = v.getValueCount();
|
||||||
|
assertTrue(f + ": Expected no more then " + EXPECTED_VALSET_SIZE + " values per segment, got " +
|
||||||
|
valSetSize + " from: " + ar.toString(),
|
||||||
|
valSetSize <= EXPECTED_VALSET_SIZE);
|
||||||
|
|
||||||
|
if (1 == NUM_LEAVES && MULTI_VALUES.contains(f)) {
|
||||||
|
// tighter check on multi fields in single segment index since we know one doc has all of them
|
||||||
|
assertEquals(f + ": Single segment LeafReader's value set should have had exactly expected size",
|
||||||
|
EXPECTED_VALSET_SIZE, valSetSize);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// check the composite of all leaves: exact expectation of set size
|
||||||
|
final LeafReader composite = SlowCompositeReaderWrapper.wrap(ir);
|
||||||
|
TestUtil.checkReader(composite);
|
||||||
|
|
||||||
|
for (String f : MULTI_VALUES) {
|
||||||
|
final SortedSetDocValues v = composite.getSortedSetDocValues(f);
|
||||||
|
final long valSetSize = v.getValueCount();
|
||||||
|
assertEquals(f + ": Composite reader value set should have had exactly expected size",
|
||||||
|
EXPECTED_VALSET_SIZE, valSetSize);
|
||||||
|
}
|
||||||
|
|
||||||
|
ir.close();
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testSortedSetEmptyIndex() throws IOException {
|
||||||
|
final Directory dir = newDirectory();
|
||||||
|
final IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
|
||||||
|
iw.close();
|
||||||
|
|
||||||
|
final Map<String,Type> UNINVERT_MAP = new LinkedHashMap<String,Type>();
|
||||||
|
for (Type t : EnumSet.allOf(Type.class)) {
|
||||||
|
UNINVERT_MAP.put(t.name(), t);
|
||||||
|
}
|
||||||
|
|
||||||
|
final DirectoryReader ir = UninvertingReader.wrap(DirectoryReader.open(dir), UNINVERT_MAP);
|
||||||
|
TestUtil.checkReader(ir);
|
||||||
|
|
||||||
|
final LeafReader composite = SlowCompositeReaderWrapper.wrap(ir);
|
||||||
|
TestUtil.checkReader(composite);
|
||||||
|
|
||||||
|
for (String f : UNINVERT_MAP.keySet()) {
|
||||||
|
// check the leaves
|
||||||
|
// (normally there are none for an empty index, so this is really just future
|
||||||
|
// proofing in case that changes for some reason)
|
||||||
|
for (LeafReaderContext rc : ir.leaves()) {
|
||||||
|
final LeafReader ar = rc.reader();
|
||||||
|
assertNull(f + ": Expected no doc values from empty index (leaf)",
|
||||||
|
ar.getSortedSetDocValues(f));
|
||||||
|
}
|
||||||
|
|
||||||
|
// check the composite
|
||||||
|
assertNull(f + ": Expected no doc values from empty index (composite)",
|
||||||
|
composite.getSortedSetDocValues(f));
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
ir.close();
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testFieldInfos() throws IOException {
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
|
||||||
|
|
||||||
|
Document doc = new Document();
|
||||||
|
BytesRef idBytes = new BytesRef("id");
|
||||||
|
doc.add(new StringField("id", idBytes, Store.YES));
|
||||||
|
doc.add(new LegacyIntField("int", 5, Store.YES));
|
||||||
|
doc.add(new NumericDocValuesField("dv", 5));
|
||||||
|
doc.add(new IntPoint("dint", 5));
|
||||||
|
doc.add(new StoredField("stored", 5)); // not indexed
|
||||||
|
iw.addDocument(doc);
|
||||||
|
|
||||||
|
iw.forceMerge(1);
|
||||||
|
iw.close();
|
||||||
|
|
||||||
|
Map<String, Type> uninvertingMap = new HashMap<>();
|
||||||
|
uninvertingMap.put("int", Type.LEGACY_INTEGER);
|
||||||
|
uninvertingMap.put("dv", Type.LEGACY_INTEGER);
|
||||||
|
uninvertingMap.put("dint", Type.INTEGER_POINT);
|
||||||
|
|
||||||
|
DirectoryReader ir = UninvertingReader.wrap(DirectoryReader.open(dir),
|
||||||
|
uninvertingMap);
|
||||||
|
LeafReader leafReader = ir.leaves().get(0).reader();
|
||||||
|
|
||||||
|
FieldInfo intFInfo = leafReader.getFieldInfos().fieldInfo("int");
|
||||||
|
assertEquals(DocValuesType.NUMERIC, intFInfo.getDocValuesType());
|
||||||
|
assertEquals(0, intFInfo.getPointDimensionCount());
|
||||||
|
assertEquals(0, intFInfo.getPointNumBytes());
|
||||||
|
|
||||||
|
FieldInfo dintFInfo = leafReader.getFieldInfos().fieldInfo("dint");
|
||||||
|
assertEquals(DocValuesType.NUMERIC, dintFInfo.getDocValuesType());
|
||||||
|
assertEquals(1, dintFInfo.getPointDimensionCount());
|
||||||
|
assertEquals(4, dintFInfo.getPointNumBytes());
|
||||||
|
|
||||||
|
FieldInfo dvFInfo = leafReader.getFieldInfos().fieldInfo("dv");
|
||||||
|
assertEquals(DocValuesType.NUMERIC, dvFInfo.getDocValuesType());
|
||||||
|
|
||||||
|
FieldInfo storedFInfo = leafReader.getFieldInfos().fieldInfo("stored");
|
||||||
|
assertEquals(DocValuesType.NONE, storedFInfo.getDocValuesType());
|
||||||
|
|
||||||
|
TestUtil.checkReader(ir);
|
||||||
|
ir.close();
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -22,25 +22,24 @@ import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.index.IndexableField;
|
import org.apache.lucene.index.IndexableField;
|
||||||
import org.apache.lucene.index.LeafReader;
|
import org.apache.lucene.index.LeafReader;
|
||||||
import org.apache.lucene.index.NumericDocValues;
|
import org.apache.lucene.index.NumericDocValues;
|
||||||
import org.apache.lucene.index.SlowCompositeReaderWrapper;
|
|
||||||
import org.apache.lucene.search.similarities.ClassicSimilarity;
|
import org.apache.lucene.search.similarities.ClassicSimilarity;
|
||||||
import org.apache.lucene.util.TestUtil;
|
import org.apache.lucene.util.TestUtil;
|
||||||
import org.apache.solr.SolrTestCaseJ4;
|
import org.apache.solr.SolrTestCaseJ4;
|
||||||
import org.apache.solr.common.SolrDocument;
|
import org.apache.solr.common.SolrDocument;
|
||||||
import org.apache.solr.common.params.CommonParams;
|
|
||||||
import org.apache.solr.common.SolrException;
|
import org.apache.solr.common.SolrException;
|
||||||
import org.apache.solr.common.SolrInputDocument;
|
import org.apache.solr.common.SolrInputDocument;
|
||||||
import org.apache.solr.common.SolrInputField;
|
import org.apache.solr.common.SolrInputField;
|
||||||
|
import org.apache.solr.common.params.CommonParams;
|
||||||
import org.apache.solr.core.SolrCore;
|
import org.apache.solr.core.SolrCore;
|
||||||
|
import org.apache.solr.index.SlowCompositeReaderWrapper;
|
||||||
|
import org.apache.solr.request.SolrQueryRequest;
|
||||||
import org.apache.solr.response.ResultContext;
|
import org.apache.solr.response.ResultContext;
|
||||||
import org.apache.solr.search.SolrIndexSearcher;
|
import org.apache.solr.response.SolrQueryResponse;
|
||||||
import org.apache.solr.search.DocList;
|
|
||||||
import org.apache.solr.schema.CopyField;
|
import org.apache.solr.schema.CopyField;
|
||||||
import org.apache.solr.schema.FieldType;
|
import org.apache.solr.schema.FieldType;
|
||||||
import org.apache.solr.schema.IndexSchema;
|
import org.apache.solr.schema.IndexSchema;
|
||||||
import org.apache.solr.request.SolrQueryRequest;
|
import org.apache.solr.search.DocList;
|
||||||
import org.apache.solr.response.SolrQueryResponse;
|
import org.apache.solr.search.SolrIndexSearcher;
|
||||||
|
|
||||||
import org.junit.BeforeClass;
|
import org.junit.BeforeClass;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue