diff --git a/lucene/misc/src/java/org/apache/lucene/uninverting/UninvertingReader.java b/lucene/misc/src/java/org/apache/lucene/uninverting/UninvertingReader.java index a4a518ad71a..12fa719e78c 100644 --- a/lucene/misc/src/java/org/apache/lucene/uninverting/UninvertingReader.java +++ b/lucene/misc/src/java/org/apache/lucene/uninverting/UninvertingReader.java @@ -220,11 +220,13 @@ public class UninvertingReader extends FilterAtomicReader { public NumericDocValues getNumericDocValues(String field) throws IOException { Type v = mapping.get(field); if (v != null) { - switch (mapping.get(field)) { + switch (v) { case INTEGER: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.NUMERIC_UTILS_INT_PARSER, true); case FLOAT: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.NUMERIC_UTILS_FLOAT_PARSER, true); case LONG: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.NUMERIC_UTILS_LONG_PARSER, true); case DOUBLE: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.NUMERIC_UTILS_DOUBLE_PARSER, true); + default: + throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + v); } } return super.getNumericDocValues(field); @@ -232,8 +234,11 @@ public class UninvertingReader extends FilterAtomicReader { @Override public BinaryDocValues getBinaryDocValues(String field) throws IOException { - if (mapping.get(field) == Type.BINARY) { + Type v = mapping.get(field); + if (v == Type.BINARY) { return FieldCache.DEFAULT.getTerms(in, field, true); + } else if (v != null && v != Type.SORTED) { + throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + v); } else { return in.getBinaryDocValues(field); } @@ -241,8 +246,11 @@ public class UninvertingReader extends FilterAtomicReader { @Override public SortedDocValues getSortedDocValues(String field) throws IOException { - if (mapping.get(field) == Type.SORTED) { + Type v = mapping.get(field); + if (v == Type.SORTED) { return FieldCache.DEFAULT.getTermsIndex(in, field); + } else if (v != null) { + throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + v); } else { return in.getSortedDocValues(field); } @@ -252,7 +260,7 @@ public class UninvertingReader extends FilterAtomicReader { public SortedSetDocValues getSortedSetDocValues(String field) throws IOException { Type v = mapping.get(field); if (v != null) { - switch (mapping.get(field)) { + switch (v) { case SORTED_SET_INTEGER: case SORTED_SET_FLOAT: return FieldCache.DEFAULT.getDocTermOrds(in, field, FieldCache.INT32_TERM_PREFIX); @@ -261,6 +269,10 @@ public class UninvertingReader extends FilterAtomicReader { return FieldCache.DEFAULT.getDocTermOrds(in, field, FieldCache.INT64_TERM_PREFIX); case SORTED_SET_BINARY: return FieldCache.DEFAULT.getDocTermOrds(in, field, null); + default: + if (v != Type.SORTED) { + throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + v); + } } } return in.getSortedSetDocValues(field); diff --git a/solr/contrib/analysis-extras/src/java/org/apache/solr/schema/ICUCollationField.java b/solr/contrib/analysis-extras/src/java/org/apache/solr/schema/ICUCollationField.java index a04e9791582..7e25e57cd19 100644 --- a/solr/contrib/analysis-extras/src/java/org/apache/solr/schema/ICUCollationField.java +++ b/solr/contrib/analysis-extras/src/java/org/apache/solr/schema/ICUCollationField.java @@ -38,6 +38,7 @@ import org.apache.lucene.search.FieldCacheRangeFilter; import org.apache.lucene.search.Query; import org.apache.lucene.search.SortField; import org.apache.lucene.search.TermRangeQuery; +import org.apache.lucene.uninverting.UninvertingReader.Type; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.Version; import org.apache.lucene.analysis.util.ResourceLoader; @@ -224,6 +225,15 @@ public class ICUCollationField extends FieldType { public SortField getSortField(SchemaField field, boolean top) { return getStringSort(field, top); } + + @Override + public Type getUninversionType(SchemaField sf) { + if (sf.multiValued()) { + return Type.SORTED_SET_BINARY; + } else { + return Type.SORTED; + } + } @Override public Analyzer getIndexAnalyzer() { diff --git a/solr/core/src/java/org/apache/solr/core/SolrCore.java b/solr/core/src/java/org/apache/solr/core/SolrCore.java index 567fc3009f9..9d5127809b5 100644 --- a/solr/core/src/java/org/apache/solr/core/SolrCore.java +++ b/solr/core/src/java/org/apache/solr/core/SolrCore.java @@ -1461,7 +1461,7 @@ public final class SolrCore implements SolrInfoMBean, Closeable { if (newestSearcher != null && (nrt || indexDirFile.equals(newIndexDirFile))) { DirectoryReader newReader; - DirectoryReader currentReader = newestSearcher.get().getIndexReader(); + DirectoryReader currentReader = newestSearcher.get().getRawReader(); // SolrCore.verbose("start reopen from",previousSearcher,"writer=",writer); diff --git a/solr/core/src/java/org/apache/solr/request/DocValuesFacets.java b/solr/core/src/java/org/apache/solr/request/DocValuesFacets.java index c931c59e4c4..6fe4584f0a6 100644 --- a/solr/core/src/java/org/apache/solr/request/DocValuesFacets.java +++ b/solr/core/src/java/org/apache/solr/request/DocValuesFacets.java @@ -60,10 +60,13 @@ public class DocValuesFacets { SchemaField schemaField = searcher.getSchema().getField(fieldName); FieldType ft = schemaField.getType(); NamedList res = new NamedList<>(); + + // TODO: remove multiValuedFieldCache(), check dv type / uninversion type? + final boolean multiValued = schemaField.multiValued() || ft.multiValuedFieldCache(); final SortedSetDocValues si; // for term lookups only OrdinalMap ordinalMap = null; // for mapping per-segment ords to global ones - if (schemaField.multiValued()) { + if (multiValued) { si = searcher.getAtomicReader().getSortedSetDocValues(fieldName); if (si instanceof MultiSortedSetDocValues) { ordinalMap = ((MultiSortedSetDocValues)si).mapping; @@ -126,7 +129,7 @@ public class DocValuesFacets { disi = dis.iterator(); } if (disi != null) { - if (schemaField.multiValued()) { + if (multiValued) { SortedSetDocValues sub = leaf.reader().getSortedSetDocValues(fieldName); if (sub == null) { sub = DocValues.EMPTY_SORTED_SET; diff --git a/solr/core/src/java/org/apache/solr/request/SimpleFacets.java b/solr/core/src/java/org/apache/solr/request/SimpleFacets.java index 6c1bbed5729..e0e43c861d6 100644 --- a/solr/core/src/java/org/apache/solr/request/SimpleFacets.java +++ b/solr/core/src/java/org/apache/solr/request/SimpleFacets.java @@ -378,18 +378,13 @@ public class SimpleFacets { final boolean multiToken = sf.multiValued() || ft.multiValuedFieldCache(); - if (method == null && ft.getNumericType() != null && !sf.multiValued()) { + if (ft.getNumericType() != null && !sf.multiValued()) { // the per-segment approach is optimal for numeric field types since there // are no global ords to merge and no need to create an expensive // top-level reader method = FacetMethod.FCS; } - if (ft.getNumericType() != null && sf.hasDocValues()) { - // only fcs is able to leverage the numeric field caches - method = FacetMethod.FCS; - } - if (method == null) { // TODO: default to per-segment or not? method = FacetMethod.FC; @@ -430,14 +425,7 @@ public class SimpleFacets { } break; case FC: - if (sf.hasDocValues()) { - counts = DocValuesFacets.getCounts(searcher, base, field, offset,limit, mincount, missing, sort, prefix); - } else if (multiToken || TrieField.getMainValuePrefix(ft) != null) { - UnInvertedField uif = UnInvertedField.getUnInvertedField(field, searcher); - counts = uif.getCounts(searcher, base, offset, limit, mincount,missing,sort,prefix); - } else { - counts = getFieldCacheCounts(searcher, base, field, offset,limit, mincount, missing, sort, prefix); - } + counts = DocValuesFacets.getCounts(searcher, base, field, offset,limit, mincount, missing, sort, prefix); break; default: throw new AssertionError(); @@ -622,152 +610,6 @@ public class SimpleFacets { return docs.andNotSize(hasVal); } - - /** - * Use the Lucene FieldCache to get counts for each unique field value in docs. - * The field must have at most one indexed token per document. - */ - public static NamedList getFieldCacheCounts(SolrIndexSearcher searcher, DocSet docs, String fieldName, int offset, int limit, int mincount, boolean missing, String sort, String prefix) throws IOException { - // TODO: If the number of terms is high compared to docs.size(), and zeros==false, - // we should use an alternate strategy to avoid - // 1) creating another huge int[] for the counts - // 2) looping over that huge int[] looking for the rare non-zeros. - // - // Yet another variation: if docs.size() is small and termvectors are stored, - // then use them instead of the FieldCache. - // - - // TODO: this function is too big and could use some refactoring, but - // we also need a facet cache, and refactoring of SimpleFacets instead of - // trying to pass all the various params around. - - FieldType ft = searcher.getSchema().getFieldType(fieldName); - NamedList res = new NamedList<>(); - - SortedDocValues si = DocValues.getSorted(searcher.getAtomicReader(), fieldName); - - final BytesRef br = new BytesRef(); - - final BytesRef prefixRef; - if (prefix == null) { - prefixRef = null; - } else if (prefix.length()==0) { - prefix = null; - prefixRef = null; - } else { - prefixRef = new BytesRef(prefix); - } - - int startTermIndex, endTermIndex; - if (prefix!=null) { - startTermIndex = si.lookupTerm(prefixRef); - if (startTermIndex<0) startTermIndex=-startTermIndex-1; - prefixRef.append(UnicodeUtil.BIG_TERM); - endTermIndex = si.lookupTerm(prefixRef); - assert endTermIndex < 0; - endTermIndex = -endTermIndex-1; - } else { - startTermIndex=-1; - endTermIndex=si.getValueCount(); - } - - final int nTerms=endTermIndex-startTermIndex; - int missingCount = -1; - final CharsRef charsRef = new CharsRef(10); - if (nTerms>0 && docs.size() >= mincount) { - - // count collection array only needs to be as big as the number of terms we are - // going to collect counts for. - final int[] counts = new int[nTerms]; - - DocIterator iter = docs.iterator(); - - while (iter.hasNext()) { - int term = si.getOrd(iter.nextDoc()); - int arrIdx = term-startTermIndex; - if (arrIdx>=0 && arrIdx=0 ? limit : Integer.MAX_VALUE; - - if (sort.equals(FacetParams.FACET_SORT_COUNT) || sort.equals(FacetParams.FACET_SORT_COUNT_LEGACY)) { - int maxsize = limit>0 ? offset+limit : Integer.MAX_VALUE-1; - maxsize = Math.min(maxsize, nTerms); - LongPriorityQueue queue = new LongPriorityQueue(Math.min(maxsize,1000), maxsize, Long.MIN_VALUE); - - int min=mincount-1; // the smallest value in the top 'N' values - for (int i=(startTermIndex==-1)?1:0; imin) { - // NOTE: we use c>min rather than c>=min as an optimization because we are going in - // index order, so we already know that the keys are ordered. This can be very - // important if a lot of the counts are repeated (like zero counts would be). - - // smaller term numbers sort higher, so subtract the term number instead - long pair = (((long)c)<<32) + (Integer.MAX_VALUE - i); - boolean displaced = queue.insert(pair); - if (displaced) min=(int)(queue.top() >>> 32); - } - } - - // if we are deep paging, we don't have to order the highest "offset" counts. - int collectCount = Math.max(0, queue.size() - off); - assert collectCount <= lim; - - // the start and end indexes of our list "sorted" (starting with the highest value) - int sortedIdxStart = queue.size() - (collectCount - 1); - int sortedIdxEnd = queue.size() + 1; - final long[] sorted = queue.sort(collectCount); - - for (int i=sortedIdxStart; i>> 32); - int tnum = Integer.MAX_VALUE - (int)pair; - si.lookupOrd(startTermIndex+tnum, br); - ft.indexedToReadable(br, charsRef); - res.add(charsRef.toString(), c); - } - - } else { - // add results in index order - int i=(startTermIndex==-1)?1:0; - if (mincount<=0) { - // if mincount<=0, then we won't discard any terms and we know exactly - // where to start. - i+=off; - off=0; - } - - for (; i=0) continue; - if (--lim<0) break; - si.lookupOrd(startTermIndex+i, br); - ft.indexedToReadable(br, charsRef); - res.add(charsRef.toString(), c); - } - } - } - - if (missing) { - if (missingCount < 0) { - missingCount = getFieldMissingCount(searcher,docs,fieldName); - } - res.add(null, missingCount); - } - - return res; - } - - /** * Returns a list of terms in the specified field along with the * corresponding count of documents in the set that match that constraint. diff --git a/solr/core/src/java/org/apache/solr/schema/AbstractSpatialFieldType.java b/solr/core/src/java/org/apache/solr/schema/AbstractSpatialFieldType.java index 3b1ddd19850..df9cf7d2dee 100644 --- a/solr/core/src/java/org/apache/solr/schema/AbstractSpatialFieldType.java +++ b/solr/core/src/java/org/apache/solr/schema/AbstractSpatialFieldType.java @@ -40,6 +40,7 @@ import org.apache.lucene.spatial.SpatialStrategy; import org.apache.lucene.spatial.query.SpatialArgs; import org.apache.lucene.spatial.query.SpatialArgsParser; import org.apache.lucene.spatial.query.SpatialOperation; +import org.apache.lucene.uninverting.UninvertingReader.Type; import org.apache.solr.common.SolrException; import org.apache.solr.common.params.SolrParams; import org.apache.solr.response.TextResponseWriter; @@ -128,6 +129,11 @@ public abstract class AbstractSpatialFieldType extend throw new IllegalStateException("instead call createFields() because isPolyField() is true"); } + @Override + public Type getUninversionType(SchemaField sf) { + return null; + } + @Override public List createFields(SchemaField field, Object val, float boost) { String shapeStr = null; diff --git a/solr/core/src/java/org/apache/solr/schema/BinaryField.java b/solr/core/src/java/org/apache/solr/schema/BinaryField.java index ad22555a9f1..20c03ac4e41 100644 --- a/solr/core/src/java/org/apache/solr/schema/BinaryField.java +++ b/solr/core/src/java/org/apache/solr/schema/BinaryField.java @@ -23,6 +23,7 @@ import java.nio.ByteBuffer; import org.apache.lucene.document.Field; import org.apache.lucene.index.StorableField; import org.apache.lucene.search.SortField; +import org.apache.lucene.uninverting.UninvertingReader.Type; import org.apache.lucene.util.BytesRef; import org.apache.solr.common.util.Base64; import org.apache.solr.response.TextResponseWriter; @@ -44,6 +45,15 @@ public class BinaryField extends FieldType { throw new RuntimeException("Cannot sort on a Binary field"); } + @Override + public Type getUninversionType(SchemaField sf) { + // TODO: maybe just return null? + if (sf.multiValued()) { + return Type.SORTED_SET_BINARY; + } else { + return Type.BINARY; + } + } @Override public String toExternal(StorableField f) { diff --git a/solr/core/src/java/org/apache/solr/schema/BoolField.java b/solr/core/src/java/org/apache/solr/schema/BoolField.java index 4c0f84a0927..3fd7caa2adb 100644 --- a/solr/core/src/java/org/apache/solr/schema/BoolField.java +++ b/solr/core/src/java/org/apache/solr/schema/BoolField.java @@ -34,6 +34,7 @@ import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.docvalues.BoolDocValues; import org.apache.lucene.queries.function.valuesource.OrdFieldSource; import org.apache.lucene.search.SortField; +import org.apache.lucene.uninverting.UninvertingReader.Type; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.CharsRef; import org.apache.lucene.util.mutable.MutableValue; @@ -51,6 +52,15 @@ public class BoolField extends PrimitiveFieldType { return getStringSort(field,reverse); } + @Override + public Type getUninversionType(SchemaField sf) { + if (sf.multiValued()) { + return Type.SORTED_SET_BINARY; + } else { + return Type.SORTED; + } + } + @Override public ValueSource getValueSource(SchemaField field, QParser qparser) { field.checkFieldCacheSource(qparser); diff --git a/solr/core/src/java/org/apache/solr/schema/CollationField.java b/solr/core/src/java/org/apache/solr/schema/CollationField.java index 37797b9bf81..621c7026ff8 100644 --- a/solr/core/src/java/org/apache/solr/schema/CollationField.java +++ b/solr/core/src/java/org/apache/solr/schema/CollationField.java @@ -42,6 +42,7 @@ import org.apache.lucene.search.FieldCacheRangeFilter; import org.apache.lucene.search.Query; import org.apache.lucene.search.SortField; import org.apache.lucene.search.TermRangeQuery; +import org.apache.lucene.uninverting.UninvertingReader.Type; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.Version; import org.apache.lucene.analysis.util.ResourceLoader; @@ -199,6 +200,15 @@ public class CollationField extends FieldType { public SortField getSortField(SchemaField field, boolean top) { return getStringSort(field, top); } + + @Override + public Type getUninversionType(SchemaField sf) { + if (sf.multiValued()) { + return Type.SORTED_SET_BINARY; + } else { + return Type.SORTED; + } + } @Override public Analyzer getIndexAnalyzer() { diff --git a/solr/core/src/java/org/apache/solr/schema/CurrencyField.java b/solr/core/src/java/org/apache/solr/schema/CurrencyField.java index 7379e0fa31b..ca0c785676d 100644 --- a/solr/core/src/java/org/apache/solr/schema/CurrencyField.java +++ b/solr/core/src/java/org/apache/solr/schema/CurrencyField.java @@ -26,6 +26,7 @@ import org.apache.lucene.search.Query; import org.apache.lucene.search.SortField; import org.apache.lucene.search.Filter; import org.apache.lucene.search.FieldValueFilter; +import org.apache.lucene.uninverting.UninvertingReader.Type; import org.apache.lucene.queries.ChainedFilter; import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException.ErrorCode; @@ -343,6 +344,11 @@ public class CurrencyField extends FieldType implements SchemaAware, ResourceLoa // Convert all values to default currency for sorting. return (new RawCurrencyValueSource(field, defaultCurrency, null)).getSortField(reverse); } + + @Override + public Type getUninversionType(SchemaField sf) { + return null; + } @Override public void write(TextResponseWriter writer, String name, StorableField field) throws IOException { diff --git a/solr/core/src/java/org/apache/solr/schema/EnumField.java b/solr/core/src/java/org/apache/solr/schema/EnumField.java index 9fef1126a63..ed9321bce1e 100644 --- a/solr/core/src/java/org/apache/solr/schema/EnumField.java +++ b/solr/core/src/java/org/apache/solr/schema/EnumField.java @@ -22,6 +22,7 @@ import org.apache.lucene.index.StorableField; import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.valuesource.EnumFieldSource; import org.apache.lucene.search.*; +import org.apache.lucene.uninverting.UninvertingReader.Type; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.CharsRef; import org.apache.lucene.util.NumericUtils; @@ -182,6 +183,15 @@ public class EnumField extends PrimitiveFieldType { sf.setMissingValue(missingValue); return sf; } + + @Override + public Type getUninversionType(SchemaField sf) { + if (sf.multiValued()) { + return Type.SORTED_SET_INTEGER; + } else { + return Type.INTEGER; + } + } /** * {@inheritDoc} diff --git a/solr/core/src/java/org/apache/solr/schema/ExternalFileField.java b/solr/core/src/java/org/apache/solr/schema/ExternalFileField.java index e445c60a0ec..98e76af4d4d 100644 --- a/solr/core/src/java/org/apache/solr/schema/ExternalFileField.java +++ b/solr/core/src/java/org/apache/solr/schema/ExternalFileField.java @@ -19,6 +19,7 @@ package org.apache.solr.schema; import org.apache.lucene.index.StorableField; import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.search.SortField; +import org.apache.lucene.uninverting.UninvertingReader.Type; import org.apache.solr.common.SolrException; import org.apache.solr.response.TextResponseWriter; import org.apache.solr.search.QParser; @@ -90,6 +91,11 @@ public class ExternalFileField extends FieldType implements SchemaAware { FileFloatSource source = getFileFloatSource(field); return source.getSortField(reverse); } + + @Override + public Type getUninversionType(SchemaField sf) { + return null; + } @Override public ValueSource getValueSource(SchemaField field, QParser parser) { diff --git a/solr/core/src/java/org/apache/solr/schema/FieldType.java b/solr/core/src/java/org/apache/solr/schema/FieldType.java index 1f62f8d7022..42b939516c4 100644 --- a/solr/core/src/java/org/apache/solr/schema/FieldType.java +++ b/solr/core/src/java/org/apache/solr/schema/FieldType.java @@ -41,6 +41,7 @@ import org.apache.lucene.search.SortField; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TermRangeQuery; import org.apache.lucene.search.similarities.Similarity; +import org.apache.lucene.uninverting.UninvertingReader; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.CharsRef; import org.apache.lucene.util.UnicodeUtil; @@ -446,6 +447,16 @@ public abstract class FieldType extends FieldProperties { query.setRewriteMethod(sf.getType().getRewriteMethod(parser, sf)); return query; } + + /** + * DocValues is not enabled for a field, but its indexed, docvalues can be constructed + * on the fly (uninverted, aka fieldcache) on the first request to sort, facet, etc. + * This specifies the structure to use. + * + * @param sf field instance + * @return type to uninvert, or {@code null} (to disallow uninversion for the field) + */ + public abstract UninvertingReader.Type getUninversionType(SchemaField sf); /** * Default analyzer for types that only produce 1 verbatim token... diff --git a/solr/core/src/java/org/apache/solr/schema/GeoHashField.java b/solr/core/src/java/org/apache/solr/schema/GeoHashField.java index e8d7b95a43f..bbe7a72566c 100644 --- a/solr/core/src/java/org/apache/solr/schema/GeoHashField.java +++ b/solr/core/src/java/org/apache/solr/schema/GeoHashField.java @@ -22,6 +22,8 @@ import org.apache.lucene.queries.function.valuesource.LiteralValueSource; import org.apache.lucene.index.StorableField; import org.apache.lucene.search.Query; import org.apache.lucene.search.SortField; +import org.apache.lucene.uninverting.UninvertingReader.Type; + import com.spatial4j.core.context.SpatialContext; import com.spatial4j.core.io.GeohashUtils; import com.spatial4j.core.shape.Point; @@ -47,6 +49,15 @@ public class GeoHashField extends FieldType implements SpatialQueryable { public SortField getSortField(SchemaField field, boolean top) { return getStringSort(field, top); } + + @Override + public Type getUninversionType(SchemaField sf) { + if (sf.multiValued()) { + return Type.SORTED_SET_BINARY; + } else { + return Type.SORTED; + } + } //QUESTION: Should we do a fast and crude one? Or actually check distances //Fast and crude could use EdgeNGrams, but that would require a different diff --git a/solr/core/src/java/org/apache/solr/schema/IndexSchema.java b/solr/core/src/java/org/apache/solr/schema/IndexSchema.java index d7f3124802e..304ba87fe75 100644 --- a/solr/core/src/java/org/apache/solr/schema/IndexSchema.java +++ b/solr/core/src/java/org/apache/solr/schema/IndexSchema.java @@ -19,10 +19,14 @@ package org.apache.solr.schema; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.AnalyzerWrapper; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.MultiFields; import org.apache.lucene.index.StorableField; import org.apache.lucene.index.StoredDocument; import org.apache.lucene.search.similarities.Similarity; +import org.apache.lucene.uninverting.UninvertingReader; import org.apache.lucene.util.Version; import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException.ErrorCode; @@ -357,6 +361,22 @@ public class IndexSchema { indexAnalyzer = new SolrIndexAnalyzer(); queryAnalyzer = new SolrQueryAnalyzer(); } + + public Map getUninversionMap(DirectoryReader reader) { + Map map = new HashMap<>(); + for (FieldInfo f : MultiFields.getMergedFieldInfos(reader)) { + if (f.hasDocValues() == false && f.isIndexed()) { + SchemaField sf = getFieldOrNull(f.name); + if (sf != null) { + UninvertingReader.Type type = sf.getType().getUninversionType(sf); + if (type != null) { + map.put(f.name, type); + } + } + } + } + return map; + } /** * Writes the schema in schema.xml format to the given writer diff --git a/solr/core/src/java/org/apache/solr/schema/LatLonType.java b/solr/core/src/java/org/apache/solr/schema/LatLonType.java index 2763c8439cb..de5bc61b1c1 100644 --- a/solr/core/src/java/org/apache/solr/schema/LatLonType.java +++ b/solr/core/src/java/org/apache/solr/schema/LatLonType.java @@ -41,6 +41,7 @@ import org.apache.lucene.search.Query; import org.apache.lucene.search.Scorer; import org.apache.lucene.search.SortField; import org.apache.lucene.search.Weight; +import org.apache.lucene.uninverting.UninvertingReader.Type; import org.apache.lucene.util.Bits; import org.apache.solr.common.SolrException; import org.apache.solr.response.TextResponseWriter; @@ -241,6 +242,11 @@ public class LatLonType extends AbstractSubTypeFieldType implements SpatialQuery public SortField getSortField(SchemaField field, boolean top) { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Sorting not supported on LatLonType " + field.getName()); } + + @Override + public Type getUninversionType(SchemaField sf) { + return null; + } diff --git a/solr/core/src/java/org/apache/solr/schema/PointType.java b/solr/core/src/java/org/apache/solr/schema/PointType.java index 3f6ffa52e6c..6cba8b4ce46 100644 --- a/solr/core/src/java/org/apache/solr/schema/PointType.java +++ b/solr/core/src/java/org/apache/solr/schema/PointType.java @@ -25,6 +25,7 @@ import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.SortField; +import org.apache.lucene.uninverting.UninvertingReader.Type; import org.apache.solr.common.SolrException; import org.apache.solr.common.params.MapSolrParams; import org.apache.solr.common.params.SolrParams; @@ -119,6 +120,11 @@ public class PointType extends CoordinateFieldType implements SpatialQueryable { public SortField getSortField(SchemaField field, boolean top) { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Sorting not supported on PointType " + field.getName()); } + + @Override + public Type getUninversionType(SchemaField sf) { + return null; + } @Override /** diff --git a/solr/core/src/java/org/apache/solr/schema/PreAnalyzedField.java b/solr/core/src/java/org/apache/solr/schema/PreAnalyzedField.java index 11ffc929b4b..d60e8647027 100644 --- a/solr/core/src/java/org/apache/solr/schema/PreAnalyzedField.java +++ b/solr/core/src/java/org/apache/solr/schema/PreAnalyzedField.java @@ -30,12 +30,17 @@ import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.document.Field; import org.apache.lucene.index.StorableField; import org.apache.lucene.index.FieldInfo.IndexOptions; +import org.apache.lucene.queries.function.ValueSource; +import org.apache.lucene.queries.function.valuesource.SortedSetFieldSource; import org.apache.lucene.search.SortField; +import org.apache.lucene.search.SortedSetSortField; +import org.apache.lucene.uninverting.UninvertingReader.Type; import org.apache.lucene.util.AttributeFactory; import org.apache.lucene.util.AttributeSource; import org.apache.lucene.util.AttributeSource.State; import org.apache.solr.analysis.SolrAnalyzer; import org.apache.solr.response.TextResponseWriter; +import org.apache.solr.search.QParser; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -112,10 +117,20 @@ public class PreAnalyzedField extends FieldType { } return f; } - + @Override public SortField getSortField(SchemaField field, boolean top) { - return getStringSort(field, top); + return new SortedSetSortField(field.getName(), top); + } + + @Override + public ValueSource getValueSource(SchemaField field, QParser parser) { + return new SortedSetFieldSource(field.getName()); + } + + @Override + public Type getUninversionType(SchemaField sf) { + return Type.SORTED_SET_BINARY; } @Override diff --git a/solr/core/src/java/org/apache/solr/schema/RandomSortField.java b/solr/core/src/java/org/apache/solr/schema/RandomSortField.java index c0d0ec00d92..aa516fd82dc 100644 --- a/solr/core/src/java/org/apache/solr/schema/RandomSortField.java +++ b/solr/core/src/java/org/apache/solr/schema/RandomSortField.java @@ -30,6 +30,7 @@ import org.apache.lucene.queries.function.FunctionValues; import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.docvalues.IntDocValues; import org.apache.lucene.search.*; +import org.apache.lucene.uninverting.UninvertingReader.Type; import org.apache.solr.response.TextResponseWriter; import org.apache.solr.search.QParser; @@ -92,6 +93,11 @@ public class RandomSortField extends FieldType { public SortField getSortField(SchemaField field, boolean reverse) { return new SortField(field.getName(), randomComparatorSource, reverse); } + + @Override + public Type getUninversionType(SchemaField sf) { + return null; + } @Override public ValueSource getValueSource(SchemaField field, QParser qparser) { diff --git a/solr/core/src/java/org/apache/solr/schema/StrField.java b/solr/core/src/java/org/apache/solr/schema/StrField.java index 9fc4320eda1..7e6a91333b1 100644 --- a/solr/core/src/java/org/apache/solr/schema/StrField.java +++ b/solr/core/src/java/org/apache/solr/schema/StrField.java @@ -28,6 +28,7 @@ import org.apache.lucene.document.SortedSetDocValuesField; import org.apache.lucene.index.StorableField; import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.search.SortField; +import org.apache.lucene.uninverting.UninvertingReader.Type; import org.apache.lucene.util.BytesRef; import org.apache.solr.response.TextResponseWriter; import org.apache.solr.search.QParser; @@ -62,6 +63,15 @@ public class StrField extends PrimitiveFieldType { return getStringSort(field,reverse); } + @Override + public Type getUninversionType(SchemaField sf) { + if (sf.multiValued()) { + return Type.SORTED_SET_BINARY; + } else { + return Type.SORTED; + } + } + @Override public void write(TextResponseWriter writer, String name, StorableField f) throws IOException { writer.writeStr(name, f.stringValue(), true); diff --git a/solr/core/src/java/org/apache/solr/schema/TextField.java b/solr/core/src/java/org/apache/solr/schema/TextField.java index 669dea6572d..46e5868e18d 100644 --- a/solr/core/src/java/org/apache/solr/schema/TextField.java +++ b/solr/core/src/java/org/apache/solr/schema/TextField.java @@ -18,10 +18,13 @@ package org.apache.solr.schema; import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; +import org.apache.lucene.queries.function.ValueSource; +import org.apache.lucene.queries.function.valuesource.SortedSetFieldSource; import org.apache.lucene.search.*; import org.apache.lucene.index.StorableField; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.uninverting.UninvertingReader.Type; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.QueryBuilder; import org.apache.solr.common.SolrException; @@ -93,7 +96,17 @@ public class TextField extends FieldType { @Override public SortField getSortField(SchemaField field, boolean reverse) { /* :TODO: maybe warn if isTokenized(), but doesn't use LimitTokenCountFilter in it's chain? */ - return getStringSort(field, reverse); + return new SortedSetSortField(field.getName(), reverse); + } + + @Override + public ValueSource getValueSource(SchemaField field, QParser parser) { + return new SortedSetFieldSource(field.getName()); + } + + @Override + public Type getUninversionType(SchemaField sf) { + return Type.SORTED_SET_BINARY; } @Override diff --git a/solr/core/src/java/org/apache/solr/schema/TrieDateField.java b/solr/core/src/java/org/apache/solr/schema/TrieDateField.java index 967522bc066..aaa1c5259ee 100644 --- a/solr/core/src/java/org/apache/solr/schema/TrieDateField.java +++ b/solr/core/src/java/org/apache/solr/schema/TrieDateField.java @@ -30,6 +30,7 @@ import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.search.SortField; import org.apache.lucene.search.Query; import org.apache.lucene.search.NumericRangeQuery; +import org.apache.lucene.uninverting.UninvertingReader.Type; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.CharsRef; @@ -364,6 +365,11 @@ public class TrieDateField extends PrimitiveFieldType implements DateValueFieldT return wrappedField.getSortField(field, top); } + @Override + public Type getUninversionType(SchemaField sf) { + return wrappedField.getUninversionType(sf); + } + @Override public Object marshalSortValue(Object value) { return value; diff --git a/solr/core/src/java/org/apache/solr/schema/TrieField.java b/solr/core/src/java/org/apache/solr/schema/TrieField.java index 97c8a09a238..34c65caa6d0 100644 --- a/solr/core/src/java/org/apache/solr/schema/TrieField.java +++ b/solr/core/src/java/org/apache/solr/schema/TrieField.java @@ -43,6 +43,7 @@ import org.apache.lucene.search.FieldCacheRangeFilter; import org.apache.lucene.search.NumericRangeQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.SortField; +import org.apache.lucene.uninverting.UninvertingReader.Type; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.CharsRef; import org.apache.lucene.util.NumericUtils; @@ -194,6 +195,39 @@ public class TrieField extends PrimitiveFieldType { throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown type for trie field: " + field.name); } } + + @Override + public Type getUninversionType(SchemaField sf) { + if (sf.multiValued()) { + switch (type) { + case INTEGER: + return Type.SORTED_SET_INTEGER; + case LONG: + case DATE: + return Type.SORTED_SET_LONG; + case FLOAT: + return Type.SORTED_SET_FLOAT; + case DOUBLE: + return Type.SORTED_SET_DOUBLE; + default: + throw new AssertionError(); + } + } else { + switch (type) { + case INTEGER: + return Type.INTEGER; + case LONG: + case DATE: + return Type.LONG; + case FLOAT: + return Type.FLOAT; + case DOUBLE: + return Type.DOUBLE; + default: + throw new AssertionError(); + } + } + } @Override public ValueSource getValueSource(SchemaField field, QParser qparser) { diff --git a/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java b/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java index 176c0df733f..dcb808092db 100644 --- a/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java +++ b/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java @@ -89,6 +89,7 @@ import org.apache.lucene.search.TopScoreDocCollector; import org.apache.lucene.search.TotalHitCountCollector; import org.apache.lucene.search.Weight; import org.apache.lucene.store.Directory; +import org.apache.lucene.uninverting.UninvertingReader; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.FixedBitSet; @@ -169,6 +170,9 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable,SolrIn private DirectoryFactory directoryFactory; private final AtomicReader atomicReader; + // only for addIndexes etc (no fieldcache) + private final DirectoryReader rawReader; + private String path; private final boolean reserveDirectory; private final boolean createdDirectory; @@ -184,18 +188,27 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable,SolrIn } return reader; } + + // TODO: wrap elsewhere and return a "map" from the schema that overrides get() ? + // this reader supports reopen + private static DirectoryReader wrapReader(SolrCore core, DirectoryReader reader) { + assert reader != null; + assert !reader.getClass().getSimpleName().startsWith("Uninverting"); // nocommit + return UninvertingReader.wrap(reader, core.getLatestSchema().getUninversionMap(reader)); + } public SolrIndexSearcher(SolrCore core, String path, IndexSchema schema, SolrIndexConfig config, String name, boolean enableCache, DirectoryFactory directoryFactory) throws IOException { // we don't need to reserve the directory because we get it from the factory - this(core, path, schema, config, name, null, true, enableCache, false, directoryFactory); + this(core, path, schema, config, name, getReader(core, config, directoryFactory, path), true, enableCache, false, directoryFactory); } public SolrIndexSearcher(SolrCore core, String path, IndexSchema schema, SolrIndexConfig config, String name, DirectoryReader r, boolean closeReader, boolean enableCache, boolean reserveDirectory, DirectoryFactory directoryFactory) throws IOException { - super(r == null ? getReader(core, config, directoryFactory, path) : r); + super(wrapReader(core, r)); this.path = path; this.directoryFactory = directoryFactory; this.reader = (DirectoryReader) super.readerContext.reader(); + this.rawReader = r; this.atomicReader = SlowCompositeReaderWrapper.wrap(this.reader); this.core = core; this.schema = schema; @@ -303,6 +316,11 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable,SolrIn return atomicReader; } + /** Raw reader (no fieldcaches etc). Useful for operations like addIndexes */ + public final DirectoryReader getRawReader() { + return rawReader; + } + @Override public final DirectoryReader getIndexReader() { assert reader == super.getIndexReader(); diff --git a/solr/core/src/java/org/apache/solr/update/SolrIndexSplitter.java b/solr/core/src/java/org/apache/solr/update/SolrIndexSplitter.java index 1210fd667f6..86913b5263d 100644 --- a/solr/core/src/java/org/apache/solr/update/SolrIndexSplitter.java +++ b/solr/core/src/java/org/apache/solr/update/SolrIndexSplitter.java @@ -89,7 +89,7 @@ public class SolrIndexSplitter { public void split() throws IOException { - List leaves = searcher.getTopReaderContext().leaves(); + List leaves = searcher.getRawReader().leaves(); List segmentDocSets = new ArrayList<>(leaves.size()); log.info("SolrIndexSplitter: partitions=" + numPieces + " segments="+leaves.size()); diff --git a/solr/core/src/test-files/solr/collection1/conf/schema.xml b/solr/core/src/test-files/solr/collection1/conf/schema.xml index 219361c89ad..3ee60eb048c 100644 --- a/solr/core/src/test-files/solr/collection1/conf/schema.xml +++ b/solr/core/src/test-files/solr/collection1/conf/schema.xml @@ -500,7 +500,7 @@ - + diff --git a/solr/core/src/test/org/apache/solr/core/TestMergePolicyConfig.java b/solr/core/src/test/org/apache/solr/core/TestMergePolicyConfig.java index 85c93ff872f..2507ae66b02 100644 --- a/solr/core/src/test/org/apache/solr/core/TestMergePolicyConfig.java +++ b/solr/core/src/test/org/apache/solr/core/TestMergePolicyConfig.java @@ -196,7 +196,7 @@ public class TestMergePolicyConfig extends SolrTestCaseJ4 { public static void assertCompoundSegments(SolrCore core, boolean compound) { RefCounted searcherRef = core.getRegisteredSearcher(); try { - assertCompoundSegments(searcherRef.get().getIndexReader(), compound); + assertCompoundSegments(searcherRef.get().getRawReader(), compound); } finally { searcherRef.decref(); } diff --git a/solr/core/src/test/org/apache/solr/search/function/SortByFunctionTest.java b/solr/core/src/test/org/apache/solr/search/function/SortByFunctionTest.java index 4264d822e1f..6f11d9e49b9 100644 --- a/solr/core/src/test/org/apache/solr/search/function/SortByFunctionTest.java +++ b/solr/core/src/test/org/apache/solr/search/function/SortByFunctionTest.java @@ -99,11 +99,11 @@ public class SortByFunctionTest extends AbstractSolrTestCase { public void testSortJoinDocFreq() throws Exception { - assertU(adoc("id", "4", "id_s", "D", "links_mfacet", "A", "links_mfacet", "B", "links_mfacet", "C" ) ); - assertU(adoc("id", "3", "id_s", "C", "links_mfacet", "A", "links_mfacet", "B" ) ); + assertU(adoc("id", "4", "id_s1", "D", "links_mfacet", "A", "links_mfacet", "B", "links_mfacet", "C" ) ); + assertU(adoc("id", "3", "id_s1", "C", "links_mfacet", "A", "links_mfacet", "B" ) ); assertU(commit()); // Make sure it uses two readers - assertU(adoc("id", "2", "id_s", "B", "links_mfacet", "A" ) ); - assertU(adoc("id", "1", "id_s", "A" ) ); + assertU(adoc("id", "2", "id_s1", "B", "links_mfacet", "A" ) ); + assertU(adoc("id", "1", "id_s1", "A" ) ); assertU(commit()); assertQ(req("q", "links_mfacet:B", "fl", "id", "sort", "id asc"), @@ -112,7 +112,7 @@ public class SortByFunctionTest extends AbstractSolrTestCase { "//result/doc[2]/int[@name='id'][.='4']" ); - assertQ(req("q", "*:*", "fl", "id", "sort", "joindf(id_s, links_mfacet) desc"), + assertQ(req("q", "*:*", "fl", "id", "sort", "joindf(id_s1, links_mfacet) desc"), "//*[@numFound='4']", "//result/doc[1]/int[@name='id'][.='1']", "//result/doc[2]/int[@name='id'][.='2']", @@ -120,7 +120,7 @@ public class SortByFunctionTest extends AbstractSolrTestCase { "//result/doc[4]/int[@name='id'][.='4']" ); - assertQ(req("q", "*:*", "fl", "id", "sort", "joindf(id_s, links_mfacet) asc"), + assertQ(req("q", "*:*", "fl", "id", "sort", "joindf(id_s1, links_mfacet) asc"), "//*[@numFound='4']", "//result/doc[1]/int[@name='id'][.='4']", "//result/doc[2]/int[@name='id'][.='3']",