SOLR-11610: Move SOLR to PayloadDecoder

This commit is contained in:
Alan Woodward 2017-11-06 11:52:13 +00:00
parent 5c9bcc9e90
commit 1a80bc76b1
9 changed files with 32 additions and 137 deletions

View File

@ -150,6 +150,9 @@ Other Changes
* SOLR-11628: Add documentation of maxRamMB for filter cache and query result cache. (shalin) * SOLR-11628: Add documentation of maxRamMB for filter cache and query result cache. (shalin)
* SOLR-11610: Refactored payload handling to use lucene's PayloadDecoder
framework (Alan Woodward)
================== 7.1.0 ================== ================== 7.1.0 ==================
Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release. Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release.

View File

@ -49,9 +49,10 @@ import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.MultiFields; import org.apache.lucene.index.MultiFields;
import org.apache.lucene.queries.payloads.PayloadDecoder;
import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.search.similarities.Similarity;
import org.apache.solr.uninverting.UninvertingReader;
import org.apache.lucene.util.Version; import org.apache.lucene.util.Version;
import org.apache.solr.common.MapSerializable;
import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode; import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.params.CommonParams;
@ -62,14 +63,15 @@ import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.Pair; import org.apache.solr.common.util.Pair;
import org.apache.solr.common.util.SimpleOrderedMap; import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.core.Config; import org.apache.solr.core.Config;
import org.apache.solr.common.MapSerializable;
import org.apache.solr.core.SolrConfig; import org.apache.solr.core.SolrConfig;
import org.apache.solr.core.SolrResourceLoader; import org.apache.solr.core.SolrResourceLoader;
import org.apache.solr.request.LocalSolrQueryRequest; import org.apache.solr.request.LocalSolrQueryRequest;
import org.apache.solr.response.SchemaXmlWriter; import org.apache.solr.response.SchemaXmlWriter;
import org.apache.solr.response.SolrQueryResponse; import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.search.similarities.SchemaSimilarityFactory; import org.apache.solr.search.similarities.SchemaSimilarityFactory;
import org.apache.solr.uninverting.UninvertingReader;
import org.apache.solr.util.DOMUtil; import org.apache.solr.util.DOMUtil;
import org.apache.solr.util.PayloadUtils;
import org.apache.solr.util.plugin.SolrCoreAware; import org.apache.solr.util.plugin.SolrCoreAware;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
@ -150,6 +152,8 @@ public class IndexSchema {
protected DynamicCopy[] dynamicCopyFields; protected DynamicCopy[] dynamicCopyFields;
public DynamicCopy[] getDynamicCopyFields() { return dynamicCopyFields; } public DynamicCopy[] getDynamicCopyFields() { return dynamicCopyFields; }
private Map<FieldType, PayloadDecoder> decoders = new HashMap<>(); // cache to avoid scanning token filters repeatedly, unnecessarily
/** /**
* keys are all fields copied to, count is num of copyField * keys are all fields copied to, count is num of copyField
* directives that target them. * directives that target them.
@ -1937,4 +1941,12 @@ public class IndexSchema {
null != rootType && null != rootType &&
rootType.getTypeName().equals(uniqueKeyFieldType.getTypeName())); rootType.getTypeName().equals(uniqueKeyFieldType.getTypeName()));
} }
public PayloadDecoder getPayloadDecoder(String field) {
FieldType ft = getFieldType(field);
if (ft == null)
return null;
return decoders.computeIfAbsent(ft, f -> PayloadUtils.getPayloadDecoder(ft));
}
} }

View File

@ -27,10 +27,10 @@ import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.queries.function.FunctionValues; import org.apache.lucene.queries.function.FunctionValues;
import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.queries.function.docvalues.FloatDocValues; import org.apache.lucene.queries.function.docvalues.FloatDocValues;
import org.apache.lucene.queries.payloads.PayloadDecoder;
import org.apache.lucene.queries.payloads.PayloadFunction; import org.apache.lucene.queries.payloads.PayloadFunction;
import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.solr.util.PayloadDecoder;
public class FloatPayloadValueSource extends ValueSource { public class FloatPayloadValueSource extends ValueSource {
protected final String field; protected final String field;
@ -167,7 +167,7 @@ public class FloatPayloadValueSource extends ValueSource {
docs.nextPosition(); docs.nextPosition();
BytesRef payload = docs.getPayload(); BytesRef payload = docs.getPayload();
if (payload != null) { if (payload != null) {
float payloadVal = decoder.decode(atDoc, docs.startOffset(), docs.endOffset(), payload); float payloadVal = decoder.computePayloadFactor(payload);
// payloadFunction = null represents "first" // payloadFunction = null represents "first"
if (payloadFunction == null) return payloadVal; if (payloadFunction == null) return payloadVal;

View File

@ -20,6 +20,7 @@ package org.apache.solr.search;
import java.io.IOException; import java.io.IOException;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.queries.payloads.PayloadDecoder;
import org.apache.lucene.queries.payloads.PayloadFunction; import org.apache.lucene.queries.payloads.PayloadFunction;
import org.apache.lucene.queries.payloads.PayloadScoreQuery; import org.apache.lucene.queries.payloads.PayloadScoreQuery;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
@ -83,7 +84,8 @@ public class PayloadScoreQParserPlugin extends QParserPlugin {
PayloadFunction payloadFunction = PayloadUtils.getPayloadFunction(func); PayloadFunction payloadFunction = PayloadUtils.getPayloadFunction(func);
if (payloadFunction == null) throw new SyntaxError("Unknown payload function: " + func); if (payloadFunction == null) throw new SyntaxError("Unknown payload function: " + func);
return new PayloadScoreQuery(query, payloadFunction, includeSpanScore); PayloadDecoder payloadDecoder = req.getCore().getLatestSchema().getPayloadDecoder(field);
return new PayloadScoreQuery(query, payloadFunction, payloadDecoder, includeSpanScore);
} }
}; };
} }

View File

@ -34,6 +34,7 @@ import org.apache.lucene.queries.function.docvalues.BoolDocValues;
import org.apache.lucene.queries.function.docvalues.DoubleDocValues; import org.apache.lucene.queries.function.docvalues.DoubleDocValues;
import org.apache.lucene.queries.function.docvalues.LongDocValues; import org.apache.lucene.queries.function.docvalues.LongDocValues;
import org.apache.lucene.queries.function.valuesource.*; import org.apache.lucene.queries.function.valuesource.*;
import org.apache.lucene.queries.payloads.PayloadDecoder;
import org.apache.lucene.queries.payloads.PayloadFunction; import org.apache.lucene.queries.payloads.PayloadFunction;
import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
@ -49,6 +50,7 @@ import org.apache.solr.common.util.NamedList;
import org.apache.solr.request.SolrRequestInfo; import org.apache.solr.request.SolrRequestInfo;
import org.apache.solr.schema.CurrencyFieldType; import org.apache.solr.schema.CurrencyFieldType;
import org.apache.solr.schema.FieldType; import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SchemaField; import org.apache.solr.schema.SchemaField;
import org.apache.solr.schema.StrField; import org.apache.solr.schema.StrField;
import org.apache.solr.schema.TextField; import org.apache.solr.schema.TextField;
@ -77,7 +79,6 @@ import org.apache.solr.search.function.distance.StringDistanceFunction;
import org.apache.solr.search.function.distance.VectorDistanceFunction; import org.apache.solr.search.function.distance.VectorDistanceFunction;
import org.apache.solr.search.join.ChildFieldValueSourceParser; import org.apache.solr.search.join.ChildFieldValueSourceParser;
import org.apache.solr.util.DateMathParser; import org.apache.solr.util.DateMathParser;
import org.apache.solr.util.PayloadDecoder;
import org.apache.solr.util.PayloadUtils; import org.apache.solr.util.PayloadUtils;
import org.apache.solr.util.plugin.NamedListInitializedPlugin; import org.apache.solr.util.plugin.NamedListInitializedPlugin;
import org.locationtech.spatial4j.distance.DistanceUtils; import org.locationtech.spatial4j.distance.DistanceUtils;
@ -737,8 +738,8 @@ public abstract class ValueSourceParser implements NamedListInitializedPlugin {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Invalid payload function: " + func); throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Invalid payload function: " + func);
} }
FieldType fieldType = fp.getReq().getCore().getLatestSchema().getFieldTypeNoEx(tinfo.field); IndexSchema schema = fp.getReq().getCore().getLatestSchema();
PayloadDecoder decoder = PayloadUtils.getPayloadDecoder(fieldType); PayloadDecoder decoder = schema.getPayloadDecoder(tinfo.field);
if (decoder==null) { if (decoder==null) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "No payload decoder found for field: " + tinfo.field); throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "No payload decoder found for field: " + tinfo.field);

View File

@ -1,80 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search.similarities;
import java.io.IOException;
import org.apache.lucene.index.FieldInvertState;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.CollectionStatistics;
import org.apache.lucene.search.TermStatistics;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.util.BytesRef;
import org.apache.solr.util.PayloadDecoder;
/**
* The computation Lucene's PayloadScoreQuery uses is SimScorer#computePayloadFactor.
* This wrapper delegates to a main similarity except for this one method.
*/
public class PayloadScoringSimilarityWrapper extends Similarity {
private Similarity delegate;
private PayloadDecoder decoder;
public PayloadScoringSimilarityWrapper(Similarity delegate, PayloadDecoder decoder) {
this.delegate = delegate;
this.decoder = decoder;
}
@Override
public String toString() {
return "PayloadScoring(" + delegate.toString() + ", decoder=" + decoder.toString() + ")";
}
@Override
public long computeNorm(FieldInvertState state) {
return delegate.computeNorm(state);
}
@Override
public SimWeight computeWeight(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) {
return delegate.computeWeight(boost, collectionStats, termStats);
}
@Override
public SimScorer simScorer(SimWeight weight, LeafReaderContext context) throws IOException {
final SimScorer simScorer = delegate.simScorer(weight,context);
SimScorer payloadSimScorer = new SimScorer() {
@Override
public float score(int doc, float freq) throws IOException {
return simScorer.score(doc,freq);
}
@Override
public float computeSlopFactor(int distance) {
return simScorer.computeSlopFactor(distance);
}
@Override
public float computePayloadFactor(int doc, int start, int end, BytesRef payload) {
return decoder.decode(doc, start, end, payload);
}
};
return payloadSimScorer;
}
}

View File

@ -16,22 +16,17 @@
*/ */
package org.apache.solr.search.similarities; package org.apache.solr.search.similarities;
import java.util.HashMap;
import org.apache.lucene.search.similarities.ClassicSimilarity;
import org.apache.lucene.search.similarities.BM25Similarity; import org.apache.lucene.search.similarities.BM25Similarity;
import org.apache.lucene.search.similarities.ClassicSimilarity;
import org.apache.lucene.search.similarities.PerFieldSimilarityWrapper; import org.apache.lucene.search.similarities.PerFieldSimilarityWrapper;
import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.util.Version; import org.apache.lucene.util.Version;
import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode; import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.params.SolrParams;
import org.apache.solr.core.SolrCore; import org.apache.solr.core.SolrCore;
import org.apache.solr.schema.FieldType; import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.SimilarityFactory; import org.apache.solr.schema.SimilarityFactory;
import org.apache.solr.util.PayloadDecoder;
import org.apache.solr.util.PayloadUtils;
import org.apache.solr.util.plugin.SolrCoreAware; import org.apache.solr.util.plugin.SolrCoreAware;
/** /**
@ -138,7 +133,6 @@ public class SchemaSimilarityFactory extends SimilarityFactory implements SolrCo
private class SchemaSimilarity extends PerFieldSimilarityWrapper { private class SchemaSimilarity extends PerFieldSimilarityWrapper {
private Similarity defaultSimilarity; private Similarity defaultSimilarity;
private HashMap<FieldType,PayloadDecoder> decoders; // cache to avoid scanning token filters repeatedly, unnecessarily
public SchemaSimilarity(Similarity defaultSimilarity) { public SchemaSimilarity(Similarity defaultSimilarity) {
this.defaultSimilarity = defaultSimilarity; this.defaultSimilarity = defaultSimilarity;
@ -151,19 +145,7 @@ public class SchemaSimilarityFactory extends SimilarityFactory implements SolrCo
return defaultSimilarity; return defaultSimilarity;
} else { } else {
Similarity similarity = fieldType.getSimilarity(); Similarity similarity = fieldType.getSimilarity();
similarity = similarity == null ? defaultSimilarity : similarity; return similarity == null ? defaultSimilarity : similarity;
// Payload score handling: if field type has index-time payload encoding, wrap and computePayloadFactor accordingly
if (decoders == null) decoders = new HashMap<>();
PayloadDecoder decoder;
if (!decoders.containsKey(fieldType)) {
decoders.put(fieldType, PayloadUtils.getPayloadDecoder(fieldType));
}
decoder = decoders.get(fieldType);
if (decoder != null) similarity = new PayloadScoringSimilarityWrapper(similarity, decoder);
return similarity;
} }
} }

View File

@ -1,27 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.util;
import org.apache.lucene.util.BytesRef;
/**
* Mirrors SimScorer#computePayloadFactor's signature
*/
public interface PayloadDecoder {
float decode(int doc, int start, int end, BytesRef payload);
}

View File

@ -32,6 +32,7 @@ import org.apache.lucene.index.Term;
import org.apache.lucene.queries.payloads.AveragePayloadFunction; import org.apache.lucene.queries.payloads.AveragePayloadFunction;
import org.apache.lucene.queries.payloads.MaxPayloadFunction; import org.apache.lucene.queries.payloads.MaxPayloadFunction;
import org.apache.lucene.queries.payloads.MinPayloadFunction; import org.apache.lucene.queries.payloads.MinPayloadFunction;
import org.apache.lucene.queries.payloads.PayloadDecoder;
import org.apache.lucene.queries.payloads.PayloadFunction; import org.apache.lucene.queries.payloads.PayloadFunction;
import org.apache.lucene.queries.payloads.SumPayloadFunction; import org.apache.lucene.queries.payloads.SumPayloadFunction;
import org.apache.lucene.search.spans.SpanNearQuery; import org.apache.lucene.search.spans.SpanNearQuery;
@ -44,6 +45,7 @@ import org.apache.solr.schema.FieldType;
import org.apache.solr.search.PayloadScoreQParserPlugin; import org.apache.solr.search.PayloadScoreQParserPlugin;
public class PayloadUtils { public class PayloadUtils {
public static String getPayloadEncoder(FieldType fieldType) { public static String getPayloadEncoder(FieldType fieldType) {
// TODO: support custom payload encoding fields too somehow - maybe someone has a custom component that encodes payloads as floats // TODO: support custom payload encoding fields too somehow - maybe someone has a custom component that encodes payloads as floats
String encoder = null; String encoder = null;
@ -75,10 +77,10 @@ public class PayloadUtils {
String encoder = getPayloadEncoder(fieldType); String encoder = getPayloadEncoder(fieldType);
if ("integer".equals(encoder)) { if ("integer".equals(encoder)) {
decoder = (int doc, int start, int end, BytesRef payload) -> PayloadHelper.decodeInt(payload.bytes, payload.offset); decoder = (BytesRef payload) -> payload == null ? 1 : PayloadHelper.decodeInt(payload.bytes, payload.offset);
} }
if ("float".equals(encoder)) { if ("float".equals(encoder)) {
decoder = (int doc, int start, int end, BytesRef payload) -> PayloadHelper.decodeFloat(payload.bytes, payload.offset); decoder = (BytesRef payload) -> payload == null ? 1 : PayloadHelper.decodeFloat(payload.bytes, payload.offset);
} }
// encoder could be "identity" at this point, in the case of DelimitedTokenFilterFactory encoder="identity" // encoder could be "identity" at this point, in the case of DelimitedTokenFilterFactory encoder="identity"