SOLR-11610: Move SOLR to PayloadDecoder

This commit is contained in:
Alan Woodward 2017-11-06 11:52:13 +00:00
parent 5c9bcc9e90
commit 1a80bc76b1
9 changed files with 32 additions and 137 deletions

View File

@ -150,6 +150,9 @@ Other Changes
* SOLR-11628: Add documentation of maxRamMB for filter cache and query result cache. (shalin)
* SOLR-11610: Refactored payload handling to use lucene's PayloadDecoder
framework (Alan Woodward)
================== 7.1.0 ==================
Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release.

View File

@ -49,9 +49,10 @@ import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.queries.payloads.PayloadDecoder;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.solr.uninverting.UninvertingReader;
import org.apache.lucene.util.Version;
import org.apache.solr.common.MapSerializable;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.params.CommonParams;
@ -62,14 +63,15 @@ import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.Pair;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.core.Config;
import org.apache.solr.common.MapSerializable;
import org.apache.solr.core.SolrConfig;
import org.apache.solr.core.SolrResourceLoader;
import org.apache.solr.request.LocalSolrQueryRequest;
import org.apache.solr.response.SchemaXmlWriter;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.search.similarities.SchemaSimilarityFactory;
import org.apache.solr.uninverting.UninvertingReader;
import org.apache.solr.util.DOMUtil;
import org.apache.solr.util.PayloadUtils;
import org.apache.solr.util.plugin.SolrCoreAware;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -150,6 +152,8 @@ public class IndexSchema {
protected DynamicCopy[] dynamicCopyFields;
public DynamicCopy[] getDynamicCopyFields() { return dynamicCopyFields; }
private Map<FieldType, PayloadDecoder> decoders = new HashMap<>(); // cache to avoid scanning token filters repeatedly, unnecessarily
/**
* keys are all fields copied to, count is num of copyField
* directives that target them.
@ -1937,4 +1941,12 @@ public class IndexSchema {
null != rootType &&
rootType.getTypeName().equals(uniqueKeyFieldType.getTypeName()));
}
public PayloadDecoder getPayloadDecoder(String field) {
FieldType ft = getFieldType(field);
if (ft == null)
return null;
return decoders.computeIfAbsent(ft, f -> PayloadUtils.getPayloadDecoder(ft));
}
}

View File

@ -27,10 +27,10 @@ import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.queries.function.FunctionValues;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.queries.function.docvalues.FloatDocValues;
import org.apache.lucene.queries.payloads.PayloadDecoder;
import org.apache.lucene.queries.payloads.PayloadFunction;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.BytesRef;
import org.apache.solr.util.PayloadDecoder;
public class FloatPayloadValueSource extends ValueSource {
protected final String field;
@ -167,7 +167,7 @@ public class FloatPayloadValueSource extends ValueSource {
docs.nextPosition();
BytesRef payload = docs.getPayload();
if (payload != null) {
float payloadVal = decoder.decode(atDoc, docs.startOffset(), docs.endOffset(), payload);
float payloadVal = decoder.computePayloadFactor(payload);
// payloadFunction = null represents "first"
if (payloadFunction == null) return payloadVal;

View File

@ -20,6 +20,7 @@ package org.apache.solr.search;
import java.io.IOException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.queries.payloads.PayloadDecoder;
import org.apache.lucene.queries.payloads.PayloadFunction;
import org.apache.lucene.queries.payloads.PayloadScoreQuery;
import org.apache.lucene.search.Query;
@ -83,7 +84,8 @@ public class PayloadScoreQParserPlugin extends QParserPlugin {
PayloadFunction payloadFunction = PayloadUtils.getPayloadFunction(func);
if (payloadFunction == null) throw new SyntaxError("Unknown payload function: " + func);
return new PayloadScoreQuery(query, payloadFunction, includeSpanScore);
PayloadDecoder payloadDecoder = req.getCore().getLatestSchema().getPayloadDecoder(field);
return new PayloadScoreQuery(query, payloadFunction, payloadDecoder, includeSpanScore);
}
};
}

View File

@ -34,6 +34,7 @@ import org.apache.lucene.queries.function.docvalues.BoolDocValues;
import org.apache.lucene.queries.function.docvalues.DoubleDocValues;
import org.apache.lucene.queries.function.docvalues.LongDocValues;
import org.apache.lucene.queries.function.valuesource.*;
import org.apache.lucene.queries.payloads.PayloadDecoder;
import org.apache.lucene.queries.payloads.PayloadFunction;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
@ -49,6 +50,7 @@ import org.apache.solr.common.util.NamedList;
import org.apache.solr.request.SolrRequestInfo;
import org.apache.solr.schema.CurrencyFieldType;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.schema.StrField;
import org.apache.solr.schema.TextField;
@ -77,7 +79,6 @@ import org.apache.solr.search.function.distance.StringDistanceFunction;
import org.apache.solr.search.function.distance.VectorDistanceFunction;
import org.apache.solr.search.join.ChildFieldValueSourceParser;
import org.apache.solr.util.DateMathParser;
import org.apache.solr.util.PayloadDecoder;
import org.apache.solr.util.PayloadUtils;
import org.apache.solr.util.plugin.NamedListInitializedPlugin;
import org.locationtech.spatial4j.distance.DistanceUtils;
@ -737,8 +738,8 @@ public abstract class ValueSourceParser implements NamedListInitializedPlugin {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Invalid payload function: " + func);
}
FieldType fieldType = fp.getReq().getCore().getLatestSchema().getFieldTypeNoEx(tinfo.field);
PayloadDecoder decoder = PayloadUtils.getPayloadDecoder(fieldType);
IndexSchema schema = fp.getReq().getCore().getLatestSchema();
PayloadDecoder decoder = schema.getPayloadDecoder(tinfo.field);
if (decoder==null) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "No payload decoder found for field: " + tinfo.field);

View File

@ -1,80 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search.similarities;
import java.io.IOException;
import org.apache.lucene.index.FieldInvertState;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.CollectionStatistics;
import org.apache.lucene.search.TermStatistics;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.util.BytesRef;
import org.apache.solr.util.PayloadDecoder;
/**
* The computation Lucene's PayloadScoreQuery uses is SimScorer#computePayloadFactor.
* This wrapper delegates to a main similarity except for this one method.
*/
public class PayloadScoringSimilarityWrapper extends Similarity {
private Similarity delegate;
private PayloadDecoder decoder;
public PayloadScoringSimilarityWrapper(Similarity delegate, PayloadDecoder decoder) {
this.delegate = delegate;
this.decoder = decoder;
}
@Override
public String toString() {
return "PayloadScoring(" + delegate.toString() + ", decoder=" + decoder.toString() + ")";
}
@Override
public long computeNorm(FieldInvertState state) {
return delegate.computeNorm(state);
}
@Override
public SimWeight computeWeight(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) {
return delegate.computeWeight(boost, collectionStats, termStats);
}
@Override
public SimScorer simScorer(SimWeight weight, LeafReaderContext context) throws IOException {
final SimScorer simScorer = delegate.simScorer(weight,context);
SimScorer payloadSimScorer = new SimScorer() {
@Override
public float score(int doc, float freq) throws IOException {
return simScorer.score(doc,freq);
}
@Override
public float computeSlopFactor(int distance) {
return simScorer.computeSlopFactor(distance);
}
@Override
public float computePayloadFactor(int doc, int start, int end, BytesRef payload) {
return decoder.decode(doc, start, end, payload);
}
};
return payloadSimScorer;
}
}

View File

@ -16,22 +16,17 @@
*/
package org.apache.solr.search.similarities;
import java.util.HashMap;
import org.apache.lucene.search.similarities.ClassicSimilarity;
import org.apache.lucene.search.similarities.BM25Similarity;
import org.apache.lucene.search.similarities.ClassicSimilarity;
import org.apache.lucene.search.similarities.PerFieldSimilarityWrapper;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.util.Version;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.core.SolrCore;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.SimilarityFactory;
import org.apache.solr.util.PayloadDecoder;
import org.apache.solr.util.PayloadUtils;
import org.apache.solr.util.plugin.SolrCoreAware;
/**
@ -138,7 +133,6 @@ public class SchemaSimilarityFactory extends SimilarityFactory implements SolrCo
private class SchemaSimilarity extends PerFieldSimilarityWrapper {
private Similarity defaultSimilarity;
private HashMap<FieldType,PayloadDecoder> decoders; // cache to avoid scanning token filters repeatedly, unnecessarily
public SchemaSimilarity(Similarity defaultSimilarity) {
this.defaultSimilarity = defaultSimilarity;
@ -151,19 +145,7 @@ public class SchemaSimilarityFactory extends SimilarityFactory implements SolrCo
return defaultSimilarity;
} else {
Similarity similarity = fieldType.getSimilarity();
similarity = similarity == null ? defaultSimilarity : similarity;
// Payload score handling: if field type has index-time payload encoding, wrap and computePayloadFactor accordingly
if (decoders == null) decoders = new HashMap<>();
PayloadDecoder decoder;
if (!decoders.containsKey(fieldType)) {
decoders.put(fieldType, PayloadUtils.getPayloadDecoder(fieldType));
}
decoder = decoders.get(fieldType);
if (decoder != null) similarity = new PayloadScoringSimilarityWrapper(similarity, decoder);
return similarity;
return similarity == null ? defaultSimilarity : similarity;
}
}

View File

@ -1,27 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.util;
import org.apache.lucene.util.BytesRef;
/**
* Mirrors SimScorer#computePayloadFactor's signature
*/
public interface PayloadDecoder {
float decode(int doc, int start, int end, BytesRef payload);
}

View File

@ -32,6 +32,7 @@ import org.apache.lucene.index.Term;
import org.apache.lucene.queries.payloads.AveragePayloadFunction;
import org.apache.lucene.queries.payloads.MaxPayloadFunction;
import org.apache.lucene.queries.payloads.MinPayloadFunction;
import org.apache.lucene.queries.payloads.PayloadDecoder;
import org.apache.lucene.queries.payloads.PayloadFunction;
import org.apache.lucene.queries.payloads.SumPayloadFunction;
import org.apache.lucene.search.spans.SpanNearQuery;
@ -44,6 +45,7 @@ import org.apache.solr.schema.FieldType;
import org.apache.solr.search.PayloadScoreQParserPlugin;
public class PayloadUtils {
public static String getPayloadEncoder(FieldType fieldType) {
// TODO: support custom payload encoding fields too somehow - maybe someone has a custom component that encodes payloads as floats
String encoder = null;
@ -75,10 +77,10 @@ public class PayloadUtils {
String encoder = getPayloadEncoder(fieldType);
if ("integer".equals(encoder)) {
decoder = (int doc, int start, int end, BytesRef payload) -> PayloadHelper.decodeInt(payload.bytes, payload.offset);
decoder = (BytesRef payload) -> payload == null ? 1 : PayloadHelper.decodeInt(payload.bytes, payload.offset);
}
if ("float".equals(encoder)) {
decoder = (int doc, int start, int end, BytesRef payload) -> PayloadHelper.decodeFloat(payload.bytes, payload.offset);
decoder = (BytesRef payload) -> payload == null ? 1 : PayloadHelper.decodeFloat(payload.bytes, payload.offset);
}
// encoder could be "identity" at this point, in the case of DelimitedTokenFilterFactory encoder="identity"