LUCENE-5666: get solr started

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5666@1594254 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2014-05-13 15:49:10 +00:00
parent a2e4ce4196
commit 1489085807
28 changed files with 262 additions and 181 deletions

View File

@ -220,11 +220,13 @@ public class UninvertingReader extends FilterAtomicReader {
public NumericDocValues getNumericDocValues(String field) throws IOException {
Type v = mapping.get(field);
if (v != null) {
switch (mapping.get(field)) {
switch (v) {
case INTEGER: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.NUMERIC_UTILS_INT_PARSER, true);
case FLOAT: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.NUMERIC_UTILS_FLOAT_PARSER, true);
case LONG: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.NUMERIC_UTILS_LONG_PARSER, true);
case DOUBLE: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.NUMERIC_UTILS_DOUBLE_PARSER, true);
default:
throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + v);
}
}
return super.getNumericDocValues(field);
@ -232,8 +234,11 @@ public class UninvertingReader extends FilterAtomicReader {
@Override
public BinaryDocValues getBinaryDocValues(String field) throws IOException {
if (mapping.get(field) == Type.BINARY) {
Type v = mapping.get(field);
if (v == Type.BINARY) {
return FieldCache.DEFAULT.getTerms(in, field, true);
} else if (v != null && v != Type.SORTED) {
throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + v);
} else {
return in.getBinaryDocValues(field);
}
@ -241,8 +246,11 @@ public class UninvertingReader extends FilterAtomicReader {
@Override
public SortedDocValues getSortedDocValues(String field) throws IOException {
if (mapping.get(field) == Type.SORTED) {
Type v = mapping.get(field);
if (v == Type.SORTED) {
return FieldCache.DEFAULT.getTermsIndex(in, field);
} else if (v != null) {
throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + v);
} else {
return in.getSortedDocValues(field);
}
@ -252,7 +260,7 @@ public class UninvertingReader extends FilterAtomicReader {
public SortedSetDocValues getSortedSetDocValues(String field) throws IOException {
Type v = mapping.get(field);
if (v != null) {
switch (mapping.get(field)) {
switch (v) {
case SORTED_SET_INTEGER:
case SORTED_SET_FLOAT:
return FieldCache.DEFAULT.getDocTermOrds(in, field, FieldCache.INT32_TERM_PREFIX);
@ -261,6 +269,10 @@ public class UninvertingReader extends FilterAtomicReader {
return FieldCache.DEFAULT.getDocTermOrds(in, field, FieldCache.INT64_TERM_PREFIX);
case SORTED_SET_BINARY:
return FieldCache.DEFAULT.getDocTermOrds(in, field, null);
default:
if (v != Type.SORTED) {
throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + v);
}
}
}
return in.getSortedSetDocValues(field);

View File

@ -38,6 +38,7 @@ import org.apache.lucene.search.FieldCacheRangeFilter;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.uninverting.UninvertingReader.Type;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.Version;
import org.apache.lucene.analysis.util.ResourceLoader;
@ -224,6 +225,15 @@ public class ICUCollationField extends FieldType {
public SortField getSortField(SchemaField field, boolean top) {
return getStringSort(field, top);
}
@Override
public Type getUninversionType(SchemaField sf) {
if (sf.multiValued()) {
return Type.SORTED_SET_BINARY;
} else {
return Type.SORTED;
}
}
@Override
public Analyzer getIndexAnalyzer() {

View File

@ -1461,7 +1461,7 @@ public final class SolrCore implements SolrInfoMBean, Closeable {
if (newestSearcher != null && (nrt || indexDirFile.equals(newIndexDirFile))) {
DirectoryReader newReader;
DirectoryReader currentReader = newestSearcher.get().getIndexReader();
DirectoryReader currentReader = newestSearcher.get().getRawReader();
// SolrCore.verbose("start reopen from",previousSearcher,"writer=",writer);

View File

@ -60,10 +60,13 @@ public class DocValuesFacets {
SchemaField schemaField = searcher.getSchema().getField(fieldName);
FieldType ft = schemaField.getType();
NamedList<Integer> res = new NamedList<>();
// TODO: remove multiValuedFieldCache(), check dv type / uninversion type?
final boolean multiValued = schemaField.multiValued() || ft.multiValuedFieldCache();
final SortedSetDocValues si; // for term lookups only
OrdinalMap ordinalMap = null; // for mapping per-segment ords to global ones
if (schemaField.multiValued()) {
if (multiValued) {
si = searcher.getAtomicReader().getSortedSetDocValues(fieldName);
if (si instanceof MultiSortedSetDocValues) {
ordinalMap = ((MultiSortedSetDocValues)si).mapping;
@ -126,7 +129,7 @@ public class DocValuesFacets {
disi = dis.iterator();
}
if (disi != null) {
if (schemaField.multiValued()) {
if (multiValued) {
SortedSetDocValues sub = leaf.reader().getSortedSetDocValues(fieldName);
if (sub == null) {
sub = DocValues.EMPTY_SORTED_SET;

View File

@ -378,18 +378,13 @@ public class SimpleFacets {
final boolean multiToken = sf.multiValued() || ft.multiValuedFieldCache();
if (method == null && ft.getNumericType() != null && !sf.multiValued()) {
if (ft.getNumericType() != null && !sf.multiValued()) {
// the per-segment approach is optimal for numeric field types since there
// are no global ords to merge and no need to create an expensive
// top-level reader
method = FacetMethod.FCS;
}
if (ft.getNumericType() != null && sf.hasDocValues()) {
// only fcs is able to leverage the numeric field caches
method = FacetMethod.FCS;
}
if (method == null) {
// TODO: default to per-segment or not?
method = FacetMethod.FC;
@ -430,14 +425,7 @@ public class SimpleFacets {
}
break;
case FC:
if (sf.hasDocValues()) {
counts = DocValuesFacets.getCounts(searcher, base, field, offset,limit, mincount, missing, sort, prefix);
} else if (multiToken || TrieField.getMainValuePrefix(ft) != null) {
UnInvertedField uif = UnInvertedField.getUnInvertedField(field, searcher);
counts = uif.getCounts(searcher, base, offset, limit, mincount,missing,sort,prefix);
} else {
counts = getFieldCacheCounts(searcher, base, field, offset,limit, mincount, missing, sort, prefix);
}
counts = DocValuesFacets.getCounts(searcher, base, field, offset,limit, mincount, missing, sort, prefix);
break;
default:
throw new AssertionError();
@ -622,152 +610,6 @@ public class SimpleFacets {
return docs.andNotSize(hasVal);
}
/**
* Use the Lucene FieldCache to get counts for each unique field value in <code>docs</code>.
* The field must have at most one indexed token per document.
*/
public static NamedList<Integer> getFieldCacheCounts(SolrIndexSearcher searcher, DocSet docs, String fieldName, int offset, int limit, int mincount, boolean missing, String sort, String prefix) throws IOException {
// TODO: If the number of terms is high compared to docs.size(), and zeros==false,
// we should use an alternate strategy to avoid
// 1) creating another huge int[] for the counts
// 2) looping over that huge int[] looking for the rare non-zeros.
//
// Yet another variation: if docs.size() is small and termvectors are stored,
// then use them instead of the FieldCache.
//
// TODO: this function is too big and could use some refactoring, but
// we also need a facet cache, and refactoring of SimpleFacets instead of
// trying to pass all the various params around.
FieldType ft = searcher.getSchema().getFieldType(fieldName);
NamedList<Integer> res = new NamedList<>();
SortedDocValues si = DocValues.getSorted(searcher.getAtomicReader(), fieldName);
final BytesRef br = new BytesRef();
final BytesRef prefixRef;
if (prefix == null) {
prefixRef = null;
} else if (prefix.length()==0) {
prefix = null;
prefixRef = null;
} else {
prefixRef = new BytesRef(prefix);
}
int startTermIndex, endTermIndex;
if (prefix!=null) {
startTermIndex = si.lookupTerm(prefixRef);
if (startTermIndex<0) startTermIndex=-startTermIndex-1;
prefixRef.append(UnicodeUtil.BIG_TERM);
endTermIndex = si.lookupTerm(prefixRef);
assert endTermIndex < 0;
endTermIndex = -endTermIndex-1;
} else {
startTermIndex=-1;
endTermIndex=si.getValueCount();
}
final int nTerms=endTermIndex-startTermIndex;
int missingCount = -1;
final CharsRef charsRef = new CharsRef(10);
if (nTerms>0 && docs.size() >= mincount) {
// count collection array only needs to be as big as the number of terms we are
// going to collect counts for.
final int[] counts = new int[nTerms];
DocIterator iter = docs.iterator();
while (iter.hasNext()) {
int term = si.getOrd(iter.nextDoc());
int arrIdx = term-startTermIndex;
if (arrIdx>=0 && arrIdx<nTerms) counts[arrIdx]++;
}
if (startTermIndex == -1) {
missingCount = counts[0];
}
// IDEA: we could also maintain a count of "other"... everything that fell outside
// of the top 'N'
int off=offset;
int lim=limit>=0 ? limit : Integer.MAX_VALUE;
if (sort.equals(FacetParams.FACET_SORT_COUNT) || sort.equals(FacetParams.FACET_SORT_COUNT_LEGACY)) {
int maxsize = limit>0 ? offset+limit : Integer.MAX_VALUE-1;
maxsize = Math.min(maxsize, nTerms);
LongPriorityQueue queue = new LongPriorityQueue(Math.min(maxsize,1000), maxsize, Long.MIN_VALUE);
int min=mincount-1; // the smallest value in the top 'N' values
for (int i=(startTermIndex==-1)?1:0; i<nTerms; i++) {
int c = counts[i];
if (c>min) {
// NOTE: we use c>min rather than c>=min as an optimization because we are going in
// index order, so we already know that the keys are ordered. This can be very
// important if a lot of the counts are repeated (like zero counts would be).
// smaller term numbers sort higher, so subtract the term number instead
long pair = (((long)c)<<32) + (Integer.MAX_VALUE - i);
boolean displaced = queue.insert(pair);
if (displaced) min=(int)(queue.top() >>> 32);
}
}
// if we are deep paging, we don't have to order the highest "offset" counts.
int collectCount = Math.max(0, queue.size() - off);
assert collectCount <= lim;
// the start and end indexes of our list "sorted" (starting with the highest value)
int sortedIdxStart = queue.size() - (collectCount - 1);
int sortedIdxEnd = queue.size() + 1;
final long[] sorted = queue.sort(collectCount);
for (int i=sortedIdxStart; i<sortedIdxEnd; i++) {
long pair = sorted[i];
int c = (int)(pair >>> 32);
int tnum = Integer.MAX_VALUE - (int)pair;
si.lookupOrd(startTermIndex+tnum, br);
ft.indexedToReadable(br, charsRef);
res.add(charsRef.toString(), c);
}
} else {
// add results in index order
int i=(startTermIndex==-1)?1:0;
if (mincount<=0) {
// if mincount<=0, then we won't discard any terms and we know exactly
// where to start.
i+=off;
off=0;
}
for (; i<nTerms; i++) {
int c = counts[i];
if (c<mincount || --off>=0) continue;
if (--lim<0) break;
si.lookupOrd(startTermIndex+i, br);
ft.indexedToReadable(br, charsRef);
res.add(charsRef.toString(), c);
}
}
}
if (missing) {
if (missingCount < 0) {
missingCount = getFieldMissingCount(searcher,docs,fieldName);
}
res.add(null, missingCount);
}
return res;
}
/**
* Returns a list of terms in the specified field along with the
* corresponding count of documents in the set that match that constraint.

View File

@ -40,6 +40,7 @@ import org.apache.lucene.spatial.SpatialStrategy;
import org.apache.lucene.spatial.query.SpatialArgs;
import org.apache.lucene.spatial.query.SpatialArgsParser;
import org.apache.lucene.spatial.query.SpatialOperation;
import org.apache.lucene.uninverting.UninvertingReader.Type;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.response.TextResponseWriter;
@ -128,6 +129,11 @@ public abstract class AbstractSpatialFieldType<T extends SpatialStrategy> extend
throw new IllegalStateException("instead call createFields() because isPolyField() is true");
}
@Override
public Type getUninversionType(SchemaField sf) {
return null;
}
@Override
public List<StorableField> createFields(SchemaField field, Object val, float boost) {
String shapeStr = null;

View File

@ -23,6 +23,7 @@ import java.nio.ByteBuffer;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.StorableField;
import org.apache.lucene.search.SortField;
import org.apache.lucene.uninverting.UninvertingReader.Type;
import org.apache.lucene.util.BytesRef;
import org.apache.solr.common.util.Base64;
import org.apache.solr.response.TextResponseWriter;
@ -44,6 +45,15 @@ public class BinaryField extends FieldType {
throw new RuntimeException("Cannot sort on a Binary field");
}
@Override
public Type getUninversionType(SchemaField sf) {
// TODO: maybe just return null?
if (sf.multiValued()) {
return Type.SORTED_SET_BINARY;
} else {
return Type.BINARY;
}
}
@Override
public String toExternal(StorableField f) {

View File

@ -34,6 +34,7 @@ import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.queries.function.docvalues.BoolDocValues;
import org.apache.lucene.queries.function.valuesource.OrdFieldSource;
import org.apache.lucene.search.SortField;
import org.apache.lucene.uninverting.UninvertingReader.Type;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.mutable.MutableValue;
@ -51,6 +52,15 @@ public class BoolField extends PrimitiveFieldType {
return getStringSort(field,reverse);
}
@Override
public Type getUninversionType(SchemaField sf) {
if (sf.multiValued()) {
return Type.SORTED_SET_BINARY;
} else {
return Type.SORTED;
}
}
@Override
public ValueSource getValueSource(SchemaField field, QParser qparser) {
field.checkFieldCacheSource(qparser);

View File

@ -42,6 +42,7 @@ import org.apache.lucene.search.FieldCacheRangeFilter;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.uninverting.UninvertingReader.Type;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.Version;
import org.apache.lucene.analysis.util.ResourceLoader;
@ -199,6 +200,15 @@ public class CollationField extends FieldType {
public SortField getSortField(SchemaField field, boolean top) {
return getStringSort(field, top);
}
@Override
public Type getUninversionType(SchemaField sf) {
if (sf.multiValued()) {
return Type.SORTED_SET_BINARY;
} else {
return Type.SORTED;
}
}
@Override
public Analyzer getIndexAnalyzer() {

View File

@ -26,6 +26,7 @@ import org.apache.lucene.search.Query;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.FieldValueFilter;
import org.apache.lucene.uninverting.UninvertingReader.Type;
import org.apache.lucene.queries.ChainedFilter;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
@ -343,6 +344,11 @@ public class CurrencyField extends FieldType implements SchemaAware, ResourceLoa
// Convert all values to default currency for sorting.
return (new RawCurrencyValueSource(field, defaultCurrency, null)).getSortField(reverse);
}
@Override
public Type getUninversionType(SchemaField sf) {
return null;
}
@Override
public void write(TextResponseWriter writer, String name, StorableField field) throws IOException {

View File

@ -22,6 +22,7 @@ import org.apache.lucene.index.StorableField;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.queries.function.valuesource.EnumFieldSource;
import org.apache.lucene.search.*;
import org.apache.lucene.uninverting.UninvertingReader.Type;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.NumericUtils;
@ -182,6 +183,15 @@ public class EnumField extends PrimitiveFieldType {
sf.setMissingValue(missingValue);
return sf;
}
@Override
public Type getUninversionType(SchemaField sf) {
if (sf.multiValued()) {
return Type.SORTED_SET_INTEGER;
} else {
return Type.INTEGER;
}
}
/**
* {@inheritDoc}

View File

@ -19,6 +19,7 @@ package org.apache.solr.schema;
import org.apache.lucene.index.StorableField;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.search.SortField;
import org.apache.lucene.uninverting.UninvertingReader.Type;
import org.apache.solr.common.SolrException;
import org.apache.solr.response.TextResponseWriter;
import org.apache.solr.search.QParser;
@ -90,6 +91,11 @@ public class ExternalFileField extends FieldType implements SchemaAware {
FileFloatSource source = getFileFloatSource(field);
return source.getSortField(reverse);
}
@Override
public Type getUninversionType(SchemaField sf) {
return null;
}
@Override
public ValueSource getValueSource(SchemaField field, QParser parser) {

View File

@ -41,6 +41,7 @@ import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.uninverting.UninvertingReader;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.UnicodeUtil;
@ -446,6 +447,16 @@ public abstract class FieldType extends FieldProperties {
query.setRewriteMethod(sf.getType().getRewriteMethod(parser, sf));
return query;
}
/**
* DocValues is not enabled for a field, but its indexed, docvalues can be constructed
* on the fly (uninverted, aka fieldcache) on the first request to sort, facet, etc.
* This specifies the structure to use.
*
* @param sf field instance
* @return type to uninvert, or {@code null} (to disallow uninversion for the field)
*/
public abstract UninvertingReader.Type getUninversionType(SchemaField sf);
/**
* Default analyzer for types that only produce 1 verbatim token...

View File

@ -22,6 +22,8 @@ import org.apache.lucene.queries.function.valuesource.LiteralValueSource;
import org.apache.lucene.index.StorableField;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.SortField;
import org.apache.lucene.uninverting.UninvertingReader.Type;
import com.spatial4j.core.context.SpatialContext;
import com.spatial4j.core.io.GeohashUtils;
import com.spatial4j.core.shape.Point;
@ -47,6 +49,15 @@ public class GeoHashField extends FieldType implements SpatialQueryable {
public SortField getSortField(SchemaField field, boolean top) {
return getStringSort(field, top);
}
@Override
public Type getUninversionType(SchemaField sf) {
if (sf.multiValued()) {
return Type.SORTED_SET_BINARY;
} else {
return Type.SORTED;
}
}
//QUESTION: Should we do a fast and crude one? Or actually check distances
//Fast and crude could use EdgeNGrams, but that would require a different

View File

@ -19,10 +19,14 @@ package org.apache.solr.schema;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.AnalyzerWrapper;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.StorableField;
import org.apache.lucene.index.StoredDocument;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.uninverting.UninvertingReader;
import org.apache.lucene.util.Version;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
@ -357,6 +361,22 @@ public class IndexSchema {
indexAnalyzer = new SolrIndexAnalyzer();
queryAnalyzer = new SolrQueryAnalyzer();
}
public Map<String,UninvertingReader.Type> getUninversionMap(DirectoryReader reader) {
Map<String,UninvertingReader.Type> map = new HashMap<>();
for (FieldInfo f : MultiFields.getMergedFieldInfos(reader)) {
if (f.hasDocValues() == false && f.isIndexed()) {
SchemaField sf = getFieldOrNull(f.name);
if (sf != null) {
UninvertingReader.Type type = sf.getType().getUninversionType(sf);
if (type != null) {
map.put(f.name, type);
}
}
}
}
return map;
}
/**
* Writes the schema in schema.xml format to the given writer

View File

@ -41,6 +41,7 @@ import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.Weight;
import org.apache.lucene.uninverting.UninvertingReader.Type;
import org.apache.lucene.util.Bits;
import org.apache.solr.common.SolrException;
import org.apache.solr.response.TextResponseWriter;
@ -241,6 +242,11 @@ public class LatLonType extends AbstractSubTypeFieldType implements SpatialQuery
public SortField getSortField(SchemaField field, boolean top) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Sorting not supported on LatLonType " + field.getName());
}
@Override
public Type getUninversionType(SchemaField sf) {
return null;
}

View File

@ -25,6 +25,7 @@ import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.SortField;
import org.apache.lucene.uninverting.UninvertingReader.Type;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.MapSolrParams;
import org.apache.solr.common.params.SolrParams;
@ -119,6 +120,11 @@ public class PointType extends CoordinateFieldType implements SpatialQueryable {
public SortField getSortField(SchemaField field, boolean top) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Sorting not supported on PointType " + field.getName());
}
@Override
public Type getUninversionType(SchemaField sf) {
return null;
}
@Override
/**

View File

@ -30,12 +30,17 @@ import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.StorableField;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.queries.function.valuesource.SortedSetFieldSource;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.SortedSetSortField;
import org.apache.lucene.uninverting.UninvertingReader.Type;
import org.apache.lucene.util.AttributeFactory;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.AttributeSource.State;
import org.apache.solr.analysis.SolrAnalyzer;
import org.apache.solr.response.TextResponseWriter;
import org.apache.solr.search.QParser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -112,10 +117,20 @@ public class PreAnalyzedField extends FieldType {
}
return f;
}
@Override
public SortField getSortField(SchemaField field, boolean top) {
return getStringSort(field, top);
return new SortedSetSortField(field.getName(), top);
}
@Override
public ValueSource getValueSource(SchemaField field, QParser parser) {
return new SortedSetFieldSource(field.getName());
}
@Override
public Type getUninversionType(SchemaField sf) {
return Type.SORTED_SET_BINARY;
}
@Override

View File

@ -30,6 +30,7 @@ import org.apache.lucene.queries.function.FunctionValues;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.queries.function.docvalues.IntDocValues;
import org.apache.lucene.search.*;
import org.apache.lucene.uninverting.UninvertingReader.Type;
import org.apache.solr.response.TextResponseWriter;
import org.apache.solr.search.QParser;
@ -92,6 +93,11 @@ public class RandomSortField extends FieldType {
public SortField getSortField(SchemaField field, boolean reverse) {
return new SortField(field.getName(), randomComparatorSource, reverse);
}
@Override
public Type getUninversionType(SchemaField sf) {
return null;
}
@Override
public ValueSource getValueSource(SchemaField field, QParser qparser) {

View File

@ -28,6 +28,7 @@ import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.index.StorableField;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.search.SortField;
import org.apache.lucene.uninverting.UninvertingReader.Type;
import org.apache.lucene.util.BytesRef;
import org.apache.solr.response.TextResponseWriter;
import org.apache.solr.search.QParser;
@ -62,6 +63,15 @@ public class StrField extends PrimitiveFieldType {
return getStringSort(field,reverse);
}
@Override
public Type getUninversionType(SchemaField sf) {
if (sf.multiValued()) {
return Type.SORTED_SET_BINARY;
} else {
return Type.SORTED;
}
}
@Override
public void write(TextResponseWriter writer, String name, StorableField f) throws IOException {
writer.writeStr(name, f.stringValue(), true);

View File

@ -18,10 +18,13 @@
package org.apache.solr.schema;
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.queries.function.valuesource.SortedSetFieldSource;
import org.apache.lucene.search.*;
import org.apache.lucene.index.StorableField;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.uninverting.UninvertingReader.Type;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.QueryBuilder;
import org.apache.solr.common.SolrException;
@ -93,7 +96,17 @@ public class TextField extends FieldType {
@Override
public SortField getSortField(SchemaField field, boolean reverse) {
/* :TODO: maybe warn if isTokenized(), but doesn't use LimitTokenCountFilter in it's chain? */
return getStringSort(field, reverse);
return new SortedSetSortField(field.getName(), reverse);
}
@Override
public ValueSource getValueSource(SchemaField field, QParser parser) {
return new SortedSetFieldSource(field.getName());
}
@Override
public Type getUninversionType(SchemaField sf) {
return Type.SORTED_SET_BINARY;
}
@Override

View File

@ -30,6 +30,7 @@ import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.NumericRangeQuery;
import org.apache.lucene.uninverting.UninvertingReader.Type;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
@ -364,6 +365,11 @@ public class TrieDateField extends PrimitiveFieldType implements DateValueFieldT
return wrappedField.getSortField(field, top);
}
@Override
public Type getUninversionType(SchemaField sf) {
return wrappedField.getUninversionType(sf);
}
@Override
public Object marshalSortValue(Object value) {
return value;

View File

@ -43,6 +43,7 @@ import org.apache.lucene.search.FieldCacheRangeFilter;
import org.apache.lucene.search.NumericRangeQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.SortField;
import org.apache.lucene.uninverting.UninvertingReader.Type;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.NumericUtils;
@ -194,6 +195,39 @@ public class TrieField extends PrimitiveFieldType {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown type for trie field: " + field.name);
}
}
@Override
public Type getUninversionType(SchemaField sf) {
if (sf.multiValued()) {
switch (type) {
case INTEGER:
return Type.SORTED_SET_INTEGER;
case LONG:
case DATE:
return Type.SORTED_SET_LONG;
case FLOAT:
return Type.SORTED_SET_FLOAT;
case DOUBLE:
return Type.SORTED_SET_DOUBLE;
default:
throw new AssertionError();
}
} else {
switch (type) {
case INTEGER:
return Type.INTEGER;
case LONG:
case DATE:
return Type.LONG;
case FLOAT:
return Type.FLOAT;
case DOUBLE:
return Type.DOUBLE;
default:
throw new AssertionError();
}
}
}
@Override
public ValueSource getValueSource(SchemaField field, QParser qparser) {

View File

@ -89,6 +89,7 @@ import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.search.TotalHitCountCollector;
import org.apache.lucene.search.Weight;
import org.apache.lucene.store.Directory;
import org.apache.lucene.uninverting.UninvertingReader;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet;
@ -169,6 +170,9 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable,SolrIn
private DirectoryFactory directoryFactory;
private final AtomicReader atomicReader;
// only for addIndexes etc (no fieldcache)
private final DirectoryReader rawReader;
private String path;
private final boolean reserveDirectory;
private final boolean createdDirectory;
@ -184,18 +188,27 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable,SolrIn
}
return reader;
}
// TODO: wrap elsewhere and return a "map" from the schema that overrides get() ?
// this reader supports reopen
private static DirectoryReader wrapReader(SolrCore core, DirectoryReader reader) {
assert reader != null;
assert !reader.getClass().getSimpleName().startsWith("Uninverting"); // nocommit
return UninvertingReader.wrap(reader, core.getLatestSchema().getUninversionMap(reader));
}
public SolrIndexSearcher(SolrCore core, String path, IndexSchema schema, SolrIndexConfig config, String name, boolean enableCache, DirectoryFactory directoryFactory) throws IOException {
// we don't need to reserve the directory because we get it from the factory
this(core, path, schema, config, name, null, true, enableCache, false, directoryFactory);
this(core, path, schema, config, name, getReader(core, config, directoryFactory, path), true, enableCache, false, directoryFactory);
}
public SolrIndexSearcher(SolrCore core, String path, IndexSchema schema, SolrIndexConfig config, String name, DirectoryReader r, boolean closeReader, boolean enableCache, boolean reserveDirectory, DirectoryFactory directoryFactory) throws IOException {
super(r == null ? getReader(core, config, directoryFactory, path) : r);
super(wrapReader(core, r));
this.path = path;
this.directoryFactory = directoryFactory;
this.reader = (DirectoryReader) super.readerContext.reader();
this.rawReader = r;
this.atomicReader = SlowCompositeReaderWrapper.wrap(this.reader);
this.core = core;
this.schema = schema;
@ -303,6 +316,11 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable,SolrIn
return atomicReader;
}
/** Raw reader (no fieldcaches etc). Useful for operations like addIndexes */
public final DirectoryReader getRawReader() {
return rawReader;
}
@Override
public final DirectoryReader getIndexReader() {
assert reader == super.getIndexReader();

View File

@ -89,7 +89,7 @@ public class SolrIndexSplitter {
public void split() throws IOException {
List<AtomicReaderContext> leaves = searcher.getTopReaderContext().leaves();
List<AtomicReaderContext> leaves = searcher.getRawReader().leaves();
List<FixedBitSet[]> segmentDocSets = new ArrayList<>(leaves.size());
log.info("SolrIndexSplitter: partitions=" + numPieces + " segments="+leaves.size());

View File

@ -500,7 +500,7 @@
<field name="cat" type="string" indexed="true" stored="true" multiValued="true"/>
<field name="price" type="float" indexed="true" stored="true"/>
<field name="price" type="float" indexed="true" stored="true" multiValued="false"/>
<field name="inStock" type="boolean" indexed="true" stored="true" />
<field name="subword" type="subword" indexed="true" stored="true"/>

View File

@ -196,7 +196,7 @@ public class TestMergePolicyConfig extends SolrTestCaseJ4 {
public static void assertCompoundSegments(SolrCore core, boolean compound) {
RefCounted<SolrIndexSearcher> searcherRef = core.getRegisteredSearcher();
try {
assertCompoundSegments(searcherRef.get().getIndexReader(), compound);
assertCompoundSegments(searcherRef.get().getRawReader(), compound);
} finally {
searcherRef.decref();
}

View File

@ -99,11 +99,11 @@ public class SortByFunctionTest extends AbstractSolrTestCase {
public void testSortJoinDocFreq() throws Exception
{
assertU(adoc("id", "4", "id_s", "D", "links_mfacet", "A", "links_mfacet", "B", "links_mfacet", "C" ) );
assertU(adoc("id", "3", "id_s", "C", "links_mfacet", "A", "links_mfacet", "B" ) );
assertU(adoc("id", "4", "id_s1", "D", "links_mfacet", "A", "links_mfacet", "B", "links_mfacet", "C" ) );
assertU(adoc("id", "3", "id_s1", "C", "links_mfacet", "A", "links_mfacet", "B" ) );
assertU(commit()); // Make sure it uses two readers
assertU(adoc("id", "2", "id_s", "B", "links_mfacet", "A" ) );
assertU(adoc("id", "1", "id_s", "A" ) );
assertU(adoc("id", "2", "id_s1", "B", "links_mfacet", "A" ) );
assertU(adoc("id", "1", "id_s1", "A" ) );
assertU(commit());
assertQ(req("q", "links_mfacet:B", "fl", "id", "sort", "id asc"),
@ -112,7 +112,7 @@ public class SortByFunctionTest extends AbstractSolrTestCase {
"//result/doc[2]/int[@name='id'][.='4']"
);
assertQ(req("q", "*:*", "fl", "id", "sort", "joindf(id_s, links_mfacet) desc"),
assertQ(req("q", "*:*", "fl", "id", "sort", "joindf(id_s1, links_mfacet) desc"),
"//*[@numFound='4']",
"//result/doc[1]/int[@name='id'][.='1']",
"//result/doc[2]/int[@name='id'][.='2']",
@ -120,7 +120,7 @@ public class SortByFunctionTest extends AbstractSolrTestCase {
"//result/doc[4]/int[@name='id'][.='4']"
);
assertQ(req("q", "*:*", "fl", "id", "sort", "joindf(id_s, links_mfacet) asc"),
assertQ(req("q", "*:*", "fl", "id", "sort", "joindf(id_s1, links_mfacet) asc"),
"//*[@numFound='4']",
"//result/doc[1]/int[@name='id'][.='4']",
"//result/doc[2]/int[@name='id'][.='3']",