Merge branch 'apache-https-master' into jira/solr-8593

This commit is contained in:
Kevin Risden 2017-01-27 10:19:38 -05:00
commit 934a92e730
58 changed files with 5837 additions and 360 deletions

View File

@ -77,6 +77,11 @@ API Changes
* LUCENE-7643: Replaced doc-values queries in lucene/sandbox with factory
methods on the *DocValuesField classes. (Adrien Grand)
* LUCENE-7659: Added a IndexWriter#getFieldNames() method (experimental) to return
all field names as visible from the IndexWriter. This would be useful for
IndexWriter#updateDocValues() calls, to prevent calling with non-existent
docValues fields (Ishan Chattopadhyaya, Adrien Grand, Mike McCandless)
New Features
* LUCENE-7623: Add FunctionScoreQuery and FunctionMatchQuery (Alan Woodward,
@ -87,6 +92,11 @@ New Features
proximity queries at search time will produce correct results (Mike
McCandless)
* LUCENE-7656: Added the LatLonDocValuesField.new(Box/Distance)Query() factory
methods that are the equivalent of factory methods on LatLonPoint but operate
on doc values. These new methods should be wrapped in an IndexOrDocValuesQuery
for best performance. (Adrien Grand)
Bug Fixes
* LUCENE-7630: Fix (Edge)NGramTokenFilter to no longer drop payloads
@ -109,6 +119,9 @@ Optimizations
match the range on single-valued fields when more than half the documents in
the index would match. (Adrien Grand)
* LUCENE-7656: Speed up for LatLonPointDistanceQuery by computing distances even
less often. (Adrien Grand)
Build
* LUCENE-7651: Fix Javadocs build for Java 8u121 by injecting "Google Code
@ -118,6 +131,17 @@ Build
* LUCENE-7653: Update randomizedtesting to version 2.5.0. (Dawid Weiss)
======================= Lucene 6.4.1 =======================
Bug Fixes
* LUCENE-7657: Fixed potential memory leak in the case that a (Span)TermQuery
with a TermContext is cached. (Adrien Grand)
* LUCENE-7647: Made stored fields reclaim native memory more aggressively when
configured with BEST_COMPRESSION. This could otherwise result in out-of-memory
issues. (Adrien Grand)
======================= Lucene 6.4.0 =======================
API Changes

View File

@ -81,7 +81,7 @@ public final class CompressingStoredFieldsWriter extends StoredFieldsWriter {
private CompressingStoredFieldsIndexWriter indexWriter;
private IndexOutput fieldsStream;
private final Compressor compressor;
private Compressor compressor;
private final CompressionMode compressionMode;
private final int chunkSize;
private final int maxDocsPerChunk;
@ -141,10 +141,11 @@ public final class CompressingStoredFieldsWriter extends StoredFieldsWriter {
@Override
public void close() throws IOException {
try {
IOUtils.close(fieldsStream, indexWriter);
IOUtils.close(fieldsStream, indexWriter, compressor);
} finally {
fieldsStream = null;
indexWriter = null;
compressor = null;
}
}

View File

@ -164,6 +164,10 @@ public abstract class CompressionMode {
LZ4.compress(bytes, off, len, out, ht);
}
@Override
public void close() throws IOException {
// no-op
}
}
private static final class LZ4HighCompressor extends Compressor {
@ -180,15 +184,17 @@ public abstract class CompressionMode {
LZ4.compressHC(bytes, off, len, out, ht);
}
@Override
public void close() throws IOException {
// no-op
}
}
private static final class DeflateDecompressor extends Decompressor {
final Inflater decompressor;
byte[] compressed;
DeflateDecompressor() {
decompressor = new Inflater(true);
compressed = new byte[0];
}
@ -207,20 +213,24 @@ public abstract class CompressionMode {
in.readBytes(compressed, 0, compressedLength);
compressed[compressedLength] = 0; // explicitly set dummy byte to 0
decompressor.reset();
// extra "dummy byte"
decompressor.setInput(compressed, 0, paddedLength);
bytes.offset = bytes.length = 0;
bytes.bytes = ArrayUtil.grow(bytes.bytes, originalLength);
final Inflater decompressor = new Inflater(true);
try {
bytes.length = decompressor.inflate(bytes.bytes, bytes.length, originalLength);
} catch (DataFormatException e) {
throw new IOException(e);
}
if (!decompressor.finished()) {
throw new CorruptIndexException("Invalid decoder state: needsInput=" + decompressor.needsInput()
+ ", needsDict=" + decompressor.needsDictionary(), in);
// extra "dummy byte"
decompressor.setInput(compressed, 0, paddedLength);
bytes.offset = bytes.length = 0;
bytes.bytes = ArrayUtil.grow(bytes.bytes, originalLength);
try {
bytes.length = decompressor.inflate(bytes.bytes, bytes.length, originalLength);
} catch (DataFormatException e) {
throw new IOException(e);
}
if (!decompressor.finished()) {
throw new CorruptIndexException("Invalid decoder state: needsInput=" + decompressor.needsInput()
+ ", needsDict=" + decompressor.needsDictionary(), in);
}
} finally {
decompressor.end();
}
if (bytes.length != originalLength) {
throw new CorruptIndexException("Lengths mismatch: " + bytes.length + " != " + originalLength, in);
@ -240,6 +250,7 @@ public abstract class CompressionMode {
final Deflater compressor;
byte[] compressed;
boolean closed;
DeflateCompressor(int level) {
compressor = new Deflater(level, true);
@ -275,6 +286,14 @@ public abstract class CompressionMode {
out.writeBytes(compressed, totalCount);
}
@Override
public void close() throws IOException {
if (closed == false) {
compressor.end();
closed = true;
}
}
}
}

View File

@ -17,6 +17,7 @@
package org.apache.lucene.codecs.compressing;
import java.io.Closeable;
import java.io.IOException;
import org.apache.lucene.store.DataOutput;
@ -24,7 +25,7 @@ import org.apache.lucene.store.DataOutput;
/**
* A data compressor.
*/
public abstract class Compressor {
public abstract class Compressor implements Closeable {
/** Sole constructor, typically called from sub-classes. */
protected Compressor() {}

View File

@ -16,7 +16,9 @@
*/
package org.apache.lucene.geo;
import org.apache.lucene.index.PointValues.Relation;
import org.apache.lucene.util.NumericUtils;
import org.apache.lucene.util.SloppyMath;
import static org.apache.lucene.geo.GeoUtils.MAX_LAT_INCL;
import static org.apache.lucene.geo.GeoUtils.MAX_LON_INCL;
@ -144,4 +146,142 @@ public final class GeoEncodingUtils {
public static double decodeLongitude(byte[] src, int offset) {
return decodeLongitude(NumericUtils.sortableBytesToInt(src, offset));
}
/** Create a predicate that checks whether points are within a distance of a given point.
* It works by computing the bounding box around the circle that is defined
* by the given points/distance and splitting it into between 1024 and 4096
* smaller boxes (4096*0.75^2=2304 on average). Then for each sub box, it
* computes the relation between this box and the distance query. Finally at
* search time, it first computes the sub box that the point belongs to,
* most of the time, no distance computation will need to be performed since
* all points from the sub box will either be in or out of the circle.
* @lucene.internal */
public static DistancePredicate createDistancePredicate(double lat, double lon, double radiusMeters) {
final Rectangle boundingBox = Rectangle.fromPointDistance(lat, lon, radiusMeters);
final int minLat = encodeLatitudeCeil(boundingBox.minLat);
final int maxLat = encodeLatitude(boundingBox.maxLat);
final int minLon = encodeLongitudeCeil(boundingBox.minLon);
final int maxLon = encodeLongitude(boundingBox.maxLon);
final int latShift, lonShift;
final int latBase, lonBase;
final int maxLatDelta, maxLonDelta;
{
long minLat2 = (long) minLat - Integer.MIN_VALUE;
long maxLat2 = (long) maxLat - Integer.MIN_VALUE;
latShift = computeShift(minLat2, maxLat2);
latBase = (int) (minLat2 >>> latShift);
maxLatDelta = (int) (maxLat2 >>> latShift) - latBase + 1;
assert maxLatDelta > 0;
}
{
long minLon2 = (long) minLon - Integer.MIN_VALUE;
long maxLon2 = (long) maxLon - Integer.MIN_VALUE;
if (boundingBox.crossesDateline()) {
maxLon2 += 1L << 32; // wrap
}
lonShift = computeShift(minLon2, maxLon2);
lonBase = (int) (minLon2 >>> lonShift);
maxLonDelta = (int) (maxLon2 >>> lonShift) - lonBase + 1;
assert maxLonDelta > 0;
}
final double axisLat = Rectangle.axisLat(lat, radiusMeters);
final double distanceSortKey = GeoUtils.distanceQuerySortKey(radiusMeters);
final byte[] relations = new byte[maxLatDelta * maxLonDelta];
for (int i = 0; i < maxLatDelta; ++i) {
for (int j = 0; j < maxLonDelta; ++j) {
final int boxMinLat = ((latBase + i) << latShift) + Integer.MIN_VALUE;
final int boxMinLon = ((lonBase + j) << lonShift) + Integer.MIN_VALUE;
final int boxMaxLat = boxMinLat + (1 << latShift) - 1;
final int boxMaxLon = boxMinLon + (1 << lonShift) - 1;
relations[i * maxLonDelta + j] = (byte) GeoUtils.relate(
decodeLatitude(boxMinLat), decodeLatitude(boxMaxLat),
decodeLongitude(boxMinLon), decodeLongitude(boxMaxLon),
lat, lon, distanceSortKey, axisLat).ordinal();
}
}
return new DistancePredicate(
latShift, lonShift,
latBase, lonBase,
maxLatDelta, maxLonDelta,
relations,
lat, lon, distanceSortKey);
}
/** Compute the minimum shift value so that
* {@code (b>>>shift)-(a>>>shift)} is less that {@code ARITY}. */
private static int computeShift(long a, long b) {
assert a < b;
// We enforce a shift of at least 1 so that when we work with unsigned ints
// by doing (lat - MIN_VALUE), the result of the shift (lat - MIN_VALUE) >>> shift
// can be used for comparisons without particular care: the sign bit has
// been cleared so comparisons work the same for signed and unsigned ints
for (int shift = 1; ; ++shift) {
final long delta = (b >>> shift) - (a >>> shift);
if (delta >= 0 && delta < DistancePredicate.ARITY) {
return shift;
}
}
}
/** A predicate that checks whether a given point is within a distance of another point. */
public static class DistancePredicate {
private static final int ARITY = 64;
private final int latShift, lonShift;
private final int latBase, lonBase;
private final int maxLatDelta, maxLonDelta;
private final byte[] relations;
private final double lat, lon;
private final double distanceKey;
private DistancePredicate(
int latShift, int lonShift,
int latBase, int lonBase,
int maxLatDelta, int maxLonDelta,
byte[] relations,
double lat, double lon, double distanceKey) {
this.latShift = latShift;
this.lonShift = lonShift;
this.latBase = latBase;
this.lonBase = lonBase;
this.maxLatDelta = maxLatDelta;
this.maxLonDelta = maxLonDelta;
this.relations = relations;
this.lat = lat;
this.lon = lon;
this.distanceKey = distanceKey;
}
/** Check whether the given point is within a distance of another point.
* NOTE: this operates directly on the encoded representation of points. */
public boolean apply(int lat, int lon) {
final int lat2 = ((lat - Integer.MIN_VALUE) >>> latShift);
if (lat2 < latBase || lat2 >= latBase + maxLatDelta) {
return false;
}
int lon2 = ((lon - Integer.MIN_VALUE) >>> lonShift);
if (lon2 < lonBase) { // wrap
lon2 += 1L << (32 - lonShift);
assert lon2 >= lonBase;
}
if (lon2 - lonBase >= maxLonDelta) {
return false;
}
final int relation = relations[(lat2 - latBase) * maxLonDelta + (lon2 - lonBase)];
if (relation == Relation.CELL_CROSSES_QUERY.ordinal()) {
return SloppyMath.haversinSortKey(
decodeLatitude(lat), decodeLongitude(lon),
this.lat, this.lon) <= distanceKey;
} else {
return relation == Relation.CELL_INSIDE_QUERY.ordinal();
}
}
}
}

View File

@ -20,6 +20,10 @@ import static org.apache.lucene.util.SloppyMath.TO_RADIANS;
import static org.apache.lucene.util.SloppyMath.cos;
import static org.apache.lucene.util.SloppyMath.haversinMeters;
import org.apache.lucene.index.PointValues;
import org.apache.lucene.index.PointValues.Relation;
import org.apache.lucene.util.SloppyMath;
/**
* Basic reusable geo-spatial utility methods
*
@ -124,4 +128,39 @@ public final class GeoUtils {
assert haversinMeters(ceil) > radius;
return ceil;
}
/**
* Compute the relation between the provided box and distance query.
* This only works for boxes that do not cross the dateline.
*/
public static PointValues.Relation relate(
double minLat, double maxLat, double minLon, double maxLon,
double lat, double lon, double distanceSortKey, double axisLat) {
if (minLon > maxLon) {
throw new IllegalArgumentException("Box crosses the dateline");
}
if ((lon < minLon || lon > maxLon) && (axisLat + Rectangle.AXISLAT_ERROR < minLat || axisLat - Rectangle.AXISLAT_ERROR > maxLat)) {
// circle not fully inside / crossing axis
if (SloppyMath.haversinSortKey(lat, lon, minLat, minLon) > distanceSortKey &&
SloppyMath.haversinSortKey(lat, lon, minLat, maxLon) > distanceSortKey &&
SloppyMath.haversinSortKey(lat, lon, maxLat, minLon) > distanceSortKey &&
SloppyMath.haversinSortKey(lat, lon, maxLat, maxLon) > distanceSortKey) {
// no points inside
return Relation.CELL_OUTSIDE_QUERY;
}
}
if (maxLon - lon < 90 && lon - minLon < 90 &&
SloppyMath.haversinSortKey(lat, lon, minLat, minLon) <= distanceSortKey &&
SloppyMath.haversinSortKey(lat, lon, minLat, maxLon) <= distanceSortKey &&
SloppyMath.haversinSortKey(lat, lon, maxLat, minLon) <= distanceSortKey &&
SloppyMath.haversinSortKey(lat, lon, maxLat, maxLon) <= distanceSortKey) {
// we are fully enclosed, collect everything within this subtree
return Relation.CELL_INSIDE_QUERY;
}
return Relation.CELL_CROSSES_QUERY;
}
}

View File

@ -20,8 +20,10 @@ package org.apache.lucene.index;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.SortedMap;
import java.util.TreeMap;
@ -324,6 +326,10 @@ public class FieldInfos implements Iterable<FieldInfo> {
}
}
synchronized Set<String> getFieldNames() {
return Collections.unmodifiableSet(new HashSet<String>(nameToNumber.keySet()));
}
synchronized void clear() {
numberToName.clear();
nameToNumber.clear();

View File

@ -33,6 +33,11 @@ public abstract class IndexReaderContext {
/** the ord for this reader in the parent, <tt>0</tt> if parent is null */
public final int ordInParent;
// An object that uniquely identifies this context without referencing
// segments. The goal is to make it fine to have references to this
// identity object, even after the index reader has been closed
final Object identity = new Object();
IndexReaderContext(CompositeReaderContext parent, int ordInParent, int docBaseInParent) {
if (!(this instanceof CompositeReaderContext || this instanceof LeafReaderContext))
throw new Error("This class should never be extended by custom code!");

View File

@ -1783,6 +1783,22 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
return flushDeletesCount.get();
}
/**
* Return an unmodifiable set of all field names as visible
* from this IndexWriter, across all segments of the index.
* Useful for knowing which fields exist, before {@link #updateDocValues(Term, Field...)} is
* attempted. We could phase out this method if
* {@link #updateDocValues(Term, Field...)} could create the non-existent
* docValues fields as necessary, instead of throwing
* IllegalArgumentException for attempts to update non-existent
* docValues fields.
* @lucene.internal
* @lucene.experimental
*/
public Set<String> getFieldNames() {
return globalFieldNumberMap.getFieldNames(); // FieldNumbers#getFieldNames() returns an unmodifiableSet
}
final String newSegmentName() {
// Cannot synchronize on IndexWriter because that causes
// deadlock

View File

@ -33,12 +33,8 @@ import java.util.Arrays;
*/
public final class TermContext {
/** Holds the {@link IndexReaderContext} of the top-level
* {@link IndexReader}, used internally only for
* asserting.
*
* @lucene.internal */
public final IndexReaderContext topReaderContext;
// Important: do NOT keep hard references to index readers
private final Object topReaderContextIdentity;
private final TermState[] states;
private int docFreq;
private long totalTermFreq;
@ -50,7 +46,7 @@ public final class TermContext {
*/
public TermContext(IndexReaderContext context) {
assert context != null && context.isTopLevel;
topReaderContext = context;
topReaderContextIdentity = context.identity;
docFreq = 0;
totalTermFreq = 0;
final int len;
@ -62,6 +58,15 @@ public final class TermContext {
states = new TermState[len];
}
/**
* Expert: Return whether this {@link TermContext} was built for the given
* {@link IndexReaderContext}. This is typically used for assertions.
* @lucene.internal
*/
public boolean wasBuiltFor(IndexReaderContext context) {
return topReaderContextIdentity == context.identity;
}
/**
* Creates a {@link TermContext} with an initial {@link TermState},
* {@link IndexReader} pair.

View File

@ -22,6 +22,7 @@ import java.util.Arrays;
import java.util.List;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexReaderContext;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
@ -264,7 +265,7 @@ public final class BlendedTermQuery extends Query {
public final Query rewrite(IndexReader reader) throws IOException {
final TermContext[] contexts = Arrays.copyOf(this.contexts, this.contexts.length);
for (int i = 0; i < contexts.length; ++i) {
if (contexts[i] == null || contexts[i].topReaderContext != reader.getContext()) {
if (contexts[i] == null || contexts[i].wasBuiltFor(reader.getContext()) == false) {
contexts[i] = TermContext.build(reader.getContext(), terms[i]);
}
}
@ -284,7 +285,7 @@ public final class BlendedTermQuery extends Query {
}
for (int i = 0; i < contexts.length; ++i) {
contexts[i] = adjustFrequencies(contexts[i], df, ttf);
contexts[i] = adjustFrequencies(reader.getContext(), contexts[i], df, ttf);
}
Query[] termQueries = new Query[terms.length];
@ -297,15 +298,16 @@ public final class BlendedTermQuery extends Query {
return rewriteMethod.rewrite(termQueries);
}
private static TermContext adjustFrequencies(TermContext ctx, int artificialDf, long artificialTtf) {
List<LeafReaderContext> leaves = ctx.topReaderContext.leaves();
private static TermContext adjustFrequencies(IndexReaderContext readerContext,
TermContext ctx, int artificialDf, long artificialTtf) {
List<LeafReaderContext> leaves = readerContext.leaves();
final int len;
if (leaves == null) {
len = 1;
} else {
len = leaves.size();
}
TermContext newCtx = new TermContext(ctx.topReaderContext);
TermContext newCtx = new TermContext(readerContext);
for (int i = 0; i < len; ++i) {
TermState termState = ctx.get(i);
if (termState == null) {

View File

@ -86,7 +86,7 @@ public class TermQuery extends Query {
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
assert termStates == null || termStates.topReaderContext == ReaderUtil.getTopLevelContext(context) : "The top-reader used to create Weight (" + termStates.topReaderContext + ") is not the same as the current reader's top-reader (" + ReaderUtil.getTopLevelContext(context);;
assert termStates == null || termStates.wasBuiltFor(ReaderUtil.getTopLevelContext(context)) : "The top-reader used to create Weight is not the same as the current reader's top-reader (" + ReaderUtil.getTopLevelContext(context);;
final TermsEnum termsEnum = getTermsEnum(context);
if (termsEnum == null) {
return null;
@ -103,7 +103,7 @@ public class TermQuery extends Query {
private TermsEnum getTermsEnum(LeafReaderContext context) throws IOException {
if (termStates != null) {
// TermQuery either used as a Query or the term states have been provided at construction time
assert termStates.topReaderContext == ReaderUtil.getTopLevelContext(context) : "The top-reader used to create Weight (" + termStates.topReaderContext + ") is not the same as the current reader's top-reader (" + ReaderUtil.getTopLevelContext(context);
assert termStates.wasBuiltFor(ReaderUtil.getTopLevelContext(context)) : "The top-reader used to create Weight is not the same as the current reader's top-reader (" + ReaderUtil.getTopLevelContext(context);
final TermState state = termStates.get(context.ord);
if (state == null) { // term is not present in that reader
assert termNotInReader(context.reader(), term) : "no termstate found but term exists in reader term=" + term;
@ -181,7 +181,7 @@ public class TermQuery extends Query {
final IndexReaderContext context = searcher.getTopReaderContext();
final TermContext termState;
if (perReaderTermState == null
|| perReaderTermState.topReaderContext != context) {
|| perReaderTermState.wasBuiltFor(context) == false) {
if (needsScores) {
// make TermQuery single-pass if we don't have a PRTS or if the context
// differs!

View File

@ -67,7 +67,7 @@ public class SpanTermQuery extends SpanQuery {
public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
final TermContext context;
final IndexReaderContext topContext = searcher.getTopReaderContext();
if (termContext == null || termContext.topReaderContext != topContext) {
if (termContext == null || termContext.wasBuiltFor(topContext) == false) {
context = TermContext.build(topContext, term);
}
else {
@ -99,7 +99,7 @@ public class SpanTermQuery extends SpanQuery {
@Override
public Spans getSpans(final LeafReaderContext context, Postings requiredPostings) throws IOException {
assert termContext.topReaderContext == ReaderUtil.getTopLevelContext(context) : "The top-reader used to create Weight (" + termContext.topReaderContext + ") is not the same as the current reader's top-reader (" + ReaderUtil.getTopLevelContext(context);
assert termContext.wasBuiltFor(ReaderUtil.getTopLevelContext(context)) : "The top-reader used to create Weight is not the same as the current reader's top-reader (" + ReaderUtil.getTopLevelContext(context);
final TermState state = termContext.get(context.ord);
if (state == null) { // term is not present in that reader

View File

@ -80,6 +80,13 @@ public final class Version {
@Deprecated
public static final Version LUCENE_6_4_0 = new Version(6, 4, 0);
/**
* Match settings and bugs in Lucene's 6.4.1 release.
* @deprecated Use latest
*/
@Deprecated
public static final Version LUCENE_6_4_1 = new Version(6, 4, 1);
/**
* Match settings and bugs in Lucene's 6.5.0 release.
* @deprecated Use latest

View File

@ -0,0 +1,145 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.document;
import java.io.IOException;
import org.apache.lucene.geo.GeoEncodingUtils;
import org.apache.lucene.geo.GeoUtils;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.search.ConstantScoreScorer;
import org.apache.lucene.search.ConstantScoreWeight;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.TwoPhaseIterator;
import org.apache.lucene.search.Weight;
/** Distance query for {@link LatLonDocValuesField}. */
final class LatLonDocValuesBoxQuery extends Query {
private final String field;
private final int minLatitude, maxLatitude, minLongitude, maxLongitude;
private final boolean crossesDateline;
LatLonDocValuesBoxQuery(String field, double minLatitude, double maxLatitude, double minLongitude, double maxLongitude) {
GeoUtils.checkLatitude(minLatitude);
GeoUtils.checkLatitude(maxLatitude);
GeoUtils.checkLongitude(minLongitude);
GeoUtils.checkLongitude(maxLongitude);
if (field == null) {
throw new IllegalArgumentException("field must not be null");
}
this.field = field;
this.crossesDateline = minLongitude > maxLongitude; // make sure to compute this before rounding
this.minLatitude = GeoEncodingUtils.encodeLatitudeCeil(minLatitude);
this.maxLatitude = GeoEncodingUtils.encodeLatitude(maxLatitude);
this.minLongitude = GeoEncodingUtils.encodeLongitudeCeil(minLongitude);
this.maxLongitude = GeoEncodingUtils.encodeLongitude(maxLongitude);
}
@Override
public String toString(String field) {
StringBuilder sb = new StringBuilder();
if (!this.field.equals(field)) {
sb.append(this.field);
sb.append(':');
}
sb.append("box(minLat=").append(GeoEncodingUtils.decodeLatitude(minLatitude));
sb.append(", maxLat=").append(GeoEncodingUtils.decodeLatitude(maxLatitude));
sb.append(", minLon=").append(GeoEncodingUtils.decodeLongitude(minLongitude));
sb.append(", maxLon=").append(GeoEncodingUtils.decodeLongitude(maxLongitude));
return sb.append(")").toString();
}
@Override
public boolean equals(Object obj) {
if (sameClassAs(obj) == false) {
return false;
}
LatLonDocValuesBoxQuery other = (LatLonDocValuesBoxQuery) obj;
return field.equals(other.field) &&
crossesDateline == other.crossesDateline &&
minLatitude == other.minLatitude &&
maxLatitude == other.maxLatitude &&
minLongitude == other.minLongitude &&
maxLongitude == other.maxLongitude;
}
@Override
public int hashCode() {
int h = classHash();
h = 31 * h + field.hashCode();
h = 31 * h + Boolean.hashCode(crossesDateline);
h = 31 * h + Integer.hashCode(minLatitude);
h = 31 * h + Integer.hashCode(maxLatitude);
h = 31 * h + Integer.hashCode(minLongitude);
h = 31 * h + Integer.hashCode(maxLongitude);
return h;
}
@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
return new ConstantScoreWeight(this, boost) {
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
final SortedNumericDocValues values = context.reader().getSortedNumericDocValues(field);
if (values == null) {
return null;
}
final TwoPhaseIterator iterator = new TwoPhaseIterator(values) {
@Override
public boolean matches() throws IOException {
for (int i = 0, count = values.docValueCount(); i < count; ++i) {
final long value = values.nextValue();
final int lat = (int) (value >>> 32);
if (lat < minLatitude || lat > maxLatitude) {
// not within latitude range
continue;
}
final int lon = (int) (value & 0xFFFFFFFF);
if (crossesDateline) {
if (lon > maxLongitude && lon < minLongitude) {
// not within longitude range
continue;
}
} else {
if (lon < minLongitude || lon > maxLongitude) {
// not within longitude range
continue;
}
}
return true;
}
return false;
}
@Override
public float matchCost() {
return 5; // 5 comparisons
}
};
return new ConstantScoreScorer(this, boost, iterator);
}
};
}
}

View File

@ -0,0 +1,132 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.document;
import java.io.IOException;
import org.apache.lucene.geo.GeoEncodingUtils;
import org.apache.lucene.geo.GeoUtils;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.search.ConstantScoreScorer;
import org.apache.lucene.search.ConstantScoreWeight;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.TwoPhaseIterator;
import org.apache.lucene.search.Weight;
/** Distance query for {@link LatLonDocValuesField}. */
final class LatLonDocValuesDistanceQuery extends Query {
private final String field;
private final double latitude, longitude;
private final double radiusMeters;
LatLonDocValuesDistanceQuery(String field, double latitude, double longitude, double radiusMeters) {
if (Double.isFinite(radiusMeters) == false || radiusMeters < 0) {
throw new IllegalArgumentException("radiusMeters: '" + radiusMeters + "' is invalid");
}
GeoUtils.checkLatitude(latitude);
GeoUtils.checkLongitude(longitude);
if (field == null) {
throw new IllegalArgumentException("field must not be null");
}
this.field = field;
this.latitude = latitude;
this.longitude = longitude;
this.radiusMeters = radiusMeters;
}
@Override
public String toString(String field) {
StringBuilder sb = new StringBuilder();
if (!this.field.equals(field)) {
sb.append(this.field);
sb.append(':');
}
sb.append(latitude);
sb.append(",");
sb.append(longitude);
sb.append(" +/- ");
sb.append(radiusMeters);
sb.append(" meters");
return sb.toString();
}
@Override
public boolean equals(Object obj) {
if (sameClassAs(obj) == false) {
return false;
}
LatLonDocValuesDistanceQuery other = (LatLonDocValuesDistanceQuery) obj;
return field.equals(other.field) &&
Double.doubleToLongBits(latitude) == Double.doubleToLongBits(other.latitude) &&
Double.doubleToLongBits(longitude) == Double.doubleToLongBits(other.longitude) &&
Double.doubleToLongBits(radiusMeters) == Double.doubleToLongBits(other.radiusMeters);
}
@Override
public int hashCode() {
int h = classHash();
h = 31 * h + field.hashCode();
h = 31 * h + Double.hashCode(latitude);
h = 31 * h + Double.hashCode(longitude);
h = 31 * h + Double.hashCode(radiusMeters);
return h;
}
@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
return new ConstantScoreWeight(this, boost) {
private final GeoEncodingUtils.DistancePredicate distancePredicate = GeoEncodingUtils.createDistancePredicate(latitude, longitude, radiusMeters);
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
final SortedNumericDocValues values = context.reader().getSortedNumericDocValues(field);
if (values == null) {
return null;
}
final TwoPhaseIterator iterator = new TwoPhaseIterator(values) {
@Override
public boolean matches() throws IOException {
for (int i = 0, count = values.docValueCount(); i < count; ++i) {
final long value = values.nextValue();
final int lat = (int) (value >>> 32);
final int lon = (int) (value & 0xFFFFFFFF);
if (distancePredicate.apply(lat, lon)) {
return true;
}
}
return false;
}
@Override
public float matchCost() {
return 100f; // TODO: what should it be?
}
};
return new ConstantScoreScorer(this, boost, iterator);
}
};
}
}

View File

@ -24,6 +24,9 @@ import static org.apache.lucene.geo.GeoEncodingUtils.encodeLongitude;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.search.FieldDoc;
import org.apache.lucene.search.IndexOrDocValuesQuery;
import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.SortField;
/**
@ -132,4 +135,47 @@ public class LatLonDocValuesField extends Field {
public static SortField newDistanceSort(String field, double latitude, double longitude) {
return new LatLonPointSortField(field, latitude, longitude);
}
/**
* Create a query for matching a bounding box using doc values.
* This query is usually slow as it does not use an index structure and needs
* to verify documents one-by-one in order to know whether they match. It is
* best used wrapped in an {@link IndexOrDocValuesQuery} alongside a
* {@link LatLonPoint#newBoxQuery}.
*/
public static Query newBoxQuery(String field, double minLatitude, double maxLatitude, double minLongitude, double maxLongitude) {
// exact double values of lat=90.0D and lon=180.0D must be treated special as they are not represented in the encoding
// and should not drag in extra bogus junk! TODO: should encodeCeil just throw ArithmeticException to be less trappy here?
if (minLatitude == 90.0) {
// range cannot match as 90.0 can never exist
return new MatchNoDocsQuery("LatLonDocValuesField.newBoxQuery with minLatitude=90.0");
}
if (minLongitude == 180.0) {
if (maxLongitude == 180.0) {
// range cannot match as 180.0 can never exist
return new MatchNoDocsQuery("LatLonDocValuesField.newBoxQuery with minLongitude=maxLongitude=180.0");
} else if (maxLongitude < minLongitude) {
// encodeCeil() with dateline wrapping!
minLongitude = -180.0;
}
}
return new LatLonDocValuesBoxQuery(field, minLatitude, maxLatitude, minLongitude, maxLongitude);
}
/**
* Create a query for matching points within the specified distance of the supplied location.
* This query is usually slow as it does not use an index structure and needs
* to verify documents one-by-one in order to know whether they match. It is
* best used wrapped in an {@link IndexOrDocValuesQuery} alongside a
* {@link LatLonPoint#newDistanceQuery}.
* @param field field name. must not be null.
* @param latitude latitude at the center: must be within standard +/-90 coordinate bounds.
* @param longitude longitude at the center: must be within standard +/-180 coordinate bounds.
* @param radiusMeters maximum distance from the center in meters: must be non-negative and finite.
* @return query matching points within this distance
* @throws IllegalArgumentException if {@code field} is null, location has invalid coordinates, or radius is invalid.
*/
public static Query newDistanceQuery(String field, double latitude, double longitude, double radiusMeters) {
return new LatLonDocValuesDistanceQuery(field, latitude, longitude, radiusMeters);
}
}

View File

@ -18,6 +18,7 @@ package org.apache.lucene.document;
import java.io.IOException;
import org.apache.lucene.geo.GeoEncodingUtils;
import org.apache.lucene.geo.GeoUtils;
import org.apache.lucene.geo.Rectangle;
import org.apache.lucene.index.FieldInfo;
@ -31,10 +32,10 @@ import org.apache.lucene.search.ConstantScoreWeight;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.ScorerSupplier;
import org.apache.lucene.search.Weight;
import org.apache.lucene.util.DocIdSetBuilder;
import org.apache.lucene.util.NumericUtils;
import org.apache.lucene.util.SloppyMath;
import org.apache.lucene.util.StringHelper;
import static org.apache.lucene.geo.GeoEncodingUtils.decodeLatitude;
@ -102,8 +103,19 @@ final class LatLonPointDistanceQuery extends Query {
return new ConstantScoreWeight(this, boost) {
final GeoEncodingUtils.DistancePredicate distancePredicate = GeoEncodingUtils.createDistancePredicate(latitude, longitude, radiusMeters);
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
ScorerSupplier scorerSupplier = scorerSupplier(context);
if (scorerSupplier == null) {
return null;
}
return scorerSupplier.get(false);
}
@Override
public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException {
LeafReader reader = context.reader();
PointValues values = reader.getPointValues(field);
if (values == null) {
@ -119,8 +131,7 @@ final class LatLonPointDistanceQuery extends Query {
// matching docids
DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc(), values, field);
values.intersect(
final IntersectVisitor visitor =
new IntersectVisitor() {
DocIdSetBuilder.BulkAdder adder;
@ -151,11 +162,9 @@ final class LatLonPointDistanceQuery extends Query {
return;
}
double docLatitude = decodeLatitude(packedValue, 0);
double docLongitude = decodeLongitude(packedValue, Integer.BYTES);
// its a match only if its sortKey <= our sortKey
if (SloppyMath.haversinSortKey(latitude, longitude, docLatitude, docLongitude) <= sortKey) {
int docLatitude = NumericUtils.sortableBytesToInt(packedValue, 0);
int docLongitude = NumericUtils.sortableBytesToInt(packedValue, Integer.BYTES);
if (distancePredicate.apply(docLatitude, docLongitude)) {
adder.add(docID);
}
}
@ -185,32 +194,30 @@ final class LatLonPointDistanceQuery extends Query {
double latMax = decodeLatitude(maxPackedValue, 0);
double lonMax = decodeLongitude(maxPackedValue, Integer.BYTES);
if ((longitude < lonMin || longitude > lonMax) && (axisLat+ Rectangle.AXISLAT_ERROR < latMin || axisLat- Rectangle.AXISLAT_ERROR > latMax)) {
// circle not fully inside / crossing axis
if (SloppyMath.haversinSortKey(latitude, longitude, latMin, lonMin) > sortKey &&
SloppyMath.haversinSortKey(latitude, longitude, latMin, lonMax) > sortKey &&
SloppyMath.haversinSortKey(latitude, longitude, latMax, lonMin) > sortKey &&
SloppyMath.haversinSortKey(latitude, longitude, latMax, lonMax) > sortKey) {
// no points inside
return Relation.CELL_OUTSIDE_QUERY;
}
}
if (lonMax - longitude < 90 && longitude - lonMin < 90 &&
SloppyMath.haversinSortKey(latitude, longitude, latMin, lonMin) <= sortKey &&
SloppyMath.haversinSortKey(latitude, longitude, latMin, lonMax) <= sortKey &&
SloppyMath.haversinSortKey(latitude, longitude, latMax, lonMin) <= sortKey &&
SloppyMath.haversinSortKey(latitude, longitude, latMax, lonMax) <= sortKey) {
// we are fully enclosed, collect everything within this subtree
return Relation.CELL_INSIDE_QUERY;
} else {
// recurse: its inside our bounding box(es), but not fully, or it wraps around.
return Relation.CELL_CROSSES_QUERY;
}
return GeoUtils.relate(latMin, latMax, lonMin, lonMax, latitude, longitude, sortKey, axisLat);
}
});
};
final Weight weight = this;
return new ScorerSupplier() {
long cost = -1;
@Override
public Scorer get(boolean randomAccess) throws IOException {
values.intersect(visitor);
return new ConstantScoreScorer(weight, score(), result.build().iterator());
}
@Override
public long cost() {
if (cost == -1) {
cost = values.estimatePointCount(visitor);
}
assert cost >= 0;
return cost;
}
};
return new ConstantScoreScorer(this, score(), result.build().iterator());
}
};
}

View File

@ -378,7 +378,7 @@ public class TermAutomatonQuery extends Query {
boolean any = false;
for(Map.Entry<Integer,TermContext> ent : termStates.entrySet()) {
TermContext termContext = ent.getValue();
assert termContext.topReaderContext == ReaderUtil.getTopLevelContext(context) : "The top-reader used to create Weight (" + termContext.topReaderContext + ") is not the same as the current reader's top-reader (" + ReaderUtil.getTopLevelContext(context);
assert termContext.wasBuiltFor(ReaderUtil.getTopLevelContext(context)) : "The top-reader used to create Weight is not the same as the current reader's top-reader (" + ReaderUtil.getTopLevelContext(context);
BytesRef term = idToTerm.get(ent.getKey());
TermState state = termContext.get(context.ord);
if (state != null) {

View File

@ -0,0 +1,62 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.LatLonDocValuesField;
import org.apache.lucene.geo.BaseGeoPointTestCase;
import org.apache.lucene.geo.GeoEncodingUtils;
import org.apache.lucene.geo.Polygon;
public class TestLatLonDocValuesQueries extends BaseGeoPointTestCase {
@Override
protected boolean supportsPolygons() {
return false;
}
@Override
protected void addPointToDoc(String field, Document doc, double lat, double lon) {
doc.add(new LatLonDocValuesField(field, lat, lon));
}
@Override
protected Query newRectQuery(String field, double minLat, double maxLat, double minLon, double maxLon) {
return LatLonDocValuesField.newBoxQuery(field, minLat, maxLat, minLon, maxLon);
}
@Override
protected Query newDistanceQuery(String field, double centerLat, double centerLon, double radiusMeters) {
return LatLonDocValuesField.newDistanceQuery(field, centerLat, centerLon, radiusMeters);
}
@Override
protected Query newPolygonQuery(String field, Polygon... polygons) {
fail();
return null;
}
@Override
protected double quantizeLat(double latRaw) {
return GeoEncodingUtils.decodeLatitude(GeoEncodingUtils.encodeLatitude(latRaw));
}
@Override
protected double quantizeLon(double lonRaw) {
return GeoEncodingUtils.decodeLongitude(GeoEncodingUtils.encodeLongitude(lonRaw));
}
}

View File

@ -44,7 +44,11 @@ my @lines = <STDIN>; # Get all input at once
#
# Cmdline args: <LUCENE|SOLR> <project-DOAP-rdf-file> <lucene-javadoc-url>(only from Solr)
#
my $product = $ARGV[0];
my $product = uc($ARGV[0]);
if ($product !~ /^(LUCENE|SOLR)$/) {
print STDERR "Unknown product name '$ARGV[0]'\n";
exit(1);
}
my %release_dates = &setup_release_dates($ARGV[1]);
my $lucene_javadoc_url = ($product eq 'SOLR' ? $ARGV[2] : ''); # Only Solr supplies this on the cmdline
my $in_major_component_versions_section = 0;
@ -825,7 +829,6 @@ sub get_release_date {
sub setup_release_dates {
my %release_dates = ();
my $file = shift;
print STDERR "file: $file\n";
open(FILE, "<$file") || die "could not open $file: $!";
my $version_list = <FILE>;
my $created_list = <FILE>;

View File

@ -79,6 +79,9 @@ public class DummyCompressingCodec extends CompressingCodec {
out.writeBytes(bytes, off, len);
}
@Override
public void close() throws IOException {};
};
/** Constructor that allows to configure the chunk size. */

View File

@ -99,6 +99,11 @@ public abstract class BaseGeoPointTestCase extends LuceneTestCase {
return org.apache.lucene.geo.GeoTestUtil.nextPolygon();
}
/** Whether this impl supports polygons. */
protected boolean supportsPolygons() {
return true;
}
/** Valid values that should not cause exception */
public void testIndexExtremeValues() {
Document document = new Document();
@ -284,6 +289,7 @@ public abstract class BaseGeoPointTestCase extends LuceneTestCase {
/** test we can search for a polygon */
public void testPolygonBasics() throws Exception {
assumeTrue("Impl does not support polygons", supportsPolygons());
Directory dir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
@ -306,6 +312,7 @@ public abstract class BaseGeoPointTestCase extends LuceneTestCase {
/** test we can search for a polygon with a hole (but still includes the doc) */
public void testPolygonHole() throws Exception {
assumeTrue("Impl does not support polygons", supportsPolygons());
Directory dir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
@ -330,6 +337,7 @@ public abstract class BaseGeoPointTestCase extends LuceneTestCase {
/** test we can search for a polygon with a hole (that excludes the doc) */
public void testPolygonHoleExcludes() throws Exception {
assumeTrue("Impl does not support polygons", supportsPolygons());
Directory dir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
@ -354,6 +362,7 @@ public abstract class BaseGeoPointTestCase extends LuceneTestCase {
/** test we can search for a multi-polygon */
public void testMultiPolygonBasics() throws Exception {
assumeTrue("Impl does not support polygons", supportsPolygons());
Directory dir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
@ -378,6 +387,7 @@ public abstract class BaseGeoPointTestCase extends LuceneTestCase {
/** null field name not allowed */
public void testPolygonNullField() {
assumeTrue("Impl does not support polygons", supportsPolygons());
IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> {
newPolygonQuery(null, new Polygon(
new double[] { 18, 18, 19, 19, 18 },
@ -739,7 +749,9 @@ public abstract class BaseGeoPointTestCase extends LuceneTestCase {
}
verifyRandomRectangles(lats, lons);
verifyRandomDistances(lats, lons);
verifyRandomPolygons(lats, lons);
if (supportsPolygons()) {
verifyRandomPolygons(lats, lons);
}
}
protected void verifyRandomRectangles(double[] lats, double[] lons) throws Exception {
@ -844,6 +856,7 @@ public abstract class BaseGeoPointTestCase extends LuceneTestCase {
if (hits.get(docID) != expected) {
StringBuilder b = new StringBuilder();
b.append("docID=(" + docID + ")\n");
if (expected) {
b.append("FAIL: id=" + id + " should match but did not\n");
@ -1344,10 +1357,12 @@ public abstract class BaseGeoPointTestCase extends LuceneTestCase {
lons[3] = rect.maxLon;
lats[4] = rect.minLat;
lons[4] = rect.minLon;
q1 = newPolygonQuery("field", new Polygon(lats, lons));
q2 = newPolygonQuery("field", new Polygon(lats, lons));
assertEquals(q1, q2);
assertFalse(q1.equals(newPolygonQuery("field2", new Polygon(lats, lons))));
if (supportsPolygons()) {
q1 = newPolygonQuery("field", new Polygon(lats, lons));
q2 = newPolygonQuery("field", new Polygon(lats, lons));
assertEquals(q1, q2);
assertFalse(q1.equals(newPolygonQuery("field2", new Polygon(lats, lons))));
}
}
/** return topdocs over a small set of points in field "point" */
@ -1436,6 +1451,7 @@ public abstract class BaseGeoPointTestCase extends LuceneTestCase {
}
public void testSmallSetPoly() throws Exception {
assumeTrue("Impl does not support polygons", supportsPolygons());
TopDocs td = searchSmallSet(newPolygonQuery("point",
new Polygon(
new double[]{33.073130, 32.9942669, 32.938386, 33.0374494,
@ -1447,6 +1463,7 @@ public abstract class BaseGeoPointTestCase extends LuceneTestCase {
}
public void testSmallSetPolyWholeMap() throws Exception {
assumeTrue("Impl does not support polygons", supportsPolygons());
TopDocs td = searchSmallSet(newPolygonQuery("point",
new Polygon(
new double[] {GeoUtils.MIN_LAT_INCL, GeoUtils.MAX_LAT_INCL, GeoUtils.MAX_LAT_INCL, GeoUtils.MIN_LAT_INCL, GeoUtils.MIN_LAT_INCL},

View File

@ -76,6 +76,10 @@ Optimizations
* SOLR-9996: Unstored IntPointField returns Long type (Ishan Chattopadhyaya)
* SOLR-5944: In-place updates of Numeric DocValues. To leverage this, the _version_ field and the updated
field must both be stored=false, indexed=false, docValues=true. (Ishan Chattopadhyaya, hossman, noble,
shalin, yonik)
Other Changes
----------------------
* SOLR-8396: Add support for PointFields in Solr (Ishan Chattopadhyaya, Tomás Fernández Löbbe)
@ -133,6 +137,26 @@ Other Changes
----------------------
* SOLR-9980: Expose configVersion in core admin status (Jessica Cheng Mallet via Tomás Fernández Löbbe)
* SOLR-9972: SpellCheckComponent collations and suggestions returned as a JSON object rather than a list
(Christine Poerschke in response to bug report from Ricky Oktavianus Lazuardy)
================== 6.4.1 ==================
Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release.
Versions of Major Components
---------------------
Apache Tika 1.13
Carrot2 3.15.0
Velocity 1.7 and Velocity Tools 2.0
Apache UIMA 2.3.1
Apache ZooKeeper 3.4.6
Jetty 9.3.14.v20161028
Bug Fixes
----------------------
* SOLR-9969: "Plugin/Stats" section of the UI doesn't display empty metric types (Tomás Fernández Löbbe)
================== 6.4.0 ==================
Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release.

View File

@ -30,12 +30,15 @@ import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.EnumSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Random;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import org.apache.solr.client.solrj.SolrClient;
@ -96,26 +99,69 @@ public class JettySolrRunner {
private int proxyPort = -1;
public static class DebugFilter implements Filter {
public final static Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
private AtomicLong nRequests = new AtomicLong();
List<Delay> delays = new ArrayList<>();
public long getTotalRequests() {
return nRequests.get();
}
/**
* Introduce a delay of specified milliseconds for the specified request.
*
* @param reason Info message logged when delay occurs
* @param count The count-th request will experience a delay
* @param delay There will be a delay of this many milliseconds
*/
public void addDelay(String reason, int count, int delay) {
delays.add(new Delay(reason, count, delay));
}
/**
* Remove any delay introduced before.
*/
public void unsetDelay() {
delays.clear();
}
@Override
public void init(FilterConfig filterConfig) throws ServletException { }
@Override
public void doFilter(ServletRequest servletRequest, ServletResponse servletResponse, FilterChain filterChain) throws IOException, ServletException {
nRequests.incrementAndGet();
executeDelay();
filterChain.doFilter(servletRequest, servletResponse);
}
@Override
public void destroy() { }
private void executeDelay() {
int delayMs = 0;
for (Delay delay: delays) {
log.info("Delaying "+delay.delayValue+", for reason: "+delay.reason);
if (delay.counter.decrementAndGet() == 0) {
delayMs += delay.delayValue;
}
}
if (delayMs > 0) {
log.info("Pausing this socket connection for " + delayMs + "ms...");
try {
Thread.sleep(delayMs);
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
log.info("Waking up after the delay of " + delayMs + "ms...");
}
}
}
/**
@ -516,4 +562,16 @@ public class JettySolrRunner {
}
}
}
static class Delay {
final AtomicInteger counter;
final int delayValue;
final String reason;
public Delay(String reason, int counter, int delay) {
this.reason = reason;
this.counter = new AtomicInteger(counter);
this.delayValue = delay;
}
}
}

View File

@ -27,9 +27,11 @@ import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.atomic.AtomicLong;
import java.util.stream.Collectors;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.LeafReaderContext;
@ -45,6 +47,7 @@ import org.apache.solr.cloud.ZkController;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.StringUtils;
import org.apache.solr.common.cloud.ClusterState;
@ -75,11 +78,11 @@ import org.apache.solr.update.DocumentBuilder;
import org.apache.solr.update.IndexFingerprint;
import org.apache.solr.update.PeerSync;
import org.apache.solr.update.UpdateLog;
import org.apache.solr.update.processor.DistributedUpdateProcessor;
import org.apache.solr.util.RefCounted;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class RealTimeGetComponent extends SearchComponent
{
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
@ -149,6 +152,12 @@ public class RealTimeGetComponent extends SearchComponent
return;
}
val = params.get("getInputDocument");
if (val != null) {
processGetInputDocument(rb);
return;
}
final IdsRequsted reqIds = IdsRequsted.parseParams(req);
if (reqIds.allIds.isEmpty()) {
@ -176,14 +185,14 @@ public class RealTimeGetComponent extends SearchComponent
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e);
}
SolrCore core = req.getCore();
final SolrCore core = req.getCore();
SchemaField idField = core.getLatestSchema().getUniqueKeyField();
FieldType fieldType = idField.getType();
SolrDocumentList docList = new SolrDocumentList();
UpdateLog ulog = core.getUpdateHandler().getUpdateLog();
RefCounted<SolrIndexSearcher> searcherHolder = null;
SearcherInfo searcherInfo = new SearcherInfo(core);
// this is initialized & set on the context *after* any searcher (re-)opening
ResultContext resultContext = null;
@ -197,7 +206,7 @@ public class RealTimeGetComponent extends SearchComponent
|| ((null != transformer) && transformer.needsSolrIndexSearcher());
try {
SolrIndexSearcher searcher = null;
BytesRefBuilder idBytes = new BytesRefBuilder();
for (String idStr : reqIds.allIds) {
@ -208,24 +217,34 @@ public class RealTimeGetComponent extends SearchComponent
// should currently be a List<Oper,Ver,Doc/Id>
List entry = (List)o;
assert entry.size() >= 3;
int oper = (Integer)entry.get(0) & UpdateLog.OPERATION_MASK;
int oper = (Integer)entry.get(UpdateLog.FLAGS_IDX) & UpdateLog.OPERATION_MASK;
switch (oper) {
case UpdateLog.UPDATE_INPLACE: // fall through to ADD
case UpdateLog.ADD:
if (mustUseRealtimeSearcher) {
if (searcherHolder != null) {
// close handles to current searchers & result context
searcher = null;
searcherHolder.decref();
searcherHolder = null;
resultContext = null;
}
// close handles to current searchers & result context
searcherInfo.clear();
resultContext = null;
ulog.openRealtimeSearcher(); // force open a new realtime searcher
o = null; // pretend we never found this record and fall through to use the searcher
break;
}
SolrDocument doc = toSolrDoc((SolrInputDocument)entry.get(entry.size()-1), core.getLatestSchema());
SolrDocument doc;
if (oper == UpdateLog.ADD) {
doc = toSolrDoc((SolrInputDocument)entry.get(entry.size()-1), core.getLatestSchema());
} else if (oper == UpdateLog.UPDATE_INPLACE) {
assert entry.size() == 5;
// For in-place update case, we have obtained the partial document till now. We need to
// resolve it to a full document to be returned to the user.
doc = resolveFullDocument(core, idBytes.get(), rsp.getReturnFields(), (SolrInputDocument)entry.get(entry.size()-1), entry, null);
if (doc == null) {
break; // document has been deleted as the resolve was going on
}
} else {
throw new SolrException(ErrorCode.INVALID_STATE, "Expected ADD or UPDATE_INPLACE. Got: " + oper);
}
if (transformer!=null) {
transformer.transform(doc, -1, 0); // unknown docID
}
@ -241,23 +260,20 @@ public class RealTimeGetComponent extends SearchComponent
}
// didn't find it in the update log, so it should be in the newest searcher opened
if (searcher == null) {
searcherHolder = core.getRealtimeSearcher();
searcher = searcherHolder.get();
// don't bother with ResultContext yet, we won't need it if doc doesn't match filters
}
searcherInfo.init();
// don't bother with ResultContext yet, we won't need it if doc doesn't match filters
int docid = -1;
long segAndId = searcher.lookupId(idBytes.get());
long segAndId = searcherInfo.getSearcher().lookupId(idBytes.get());
if (segAndId >= 0) {
int segid = (int) segAndId;
LeafReaderContext ctx = searcher.getTopReaderContext().leaves().get((int) (segAndId >> 32));
LeafReaderContext ctx = searcherInfo.getSearcher().getTopReaderContext().leaves().get((int) (segAndId >> 32));
docid = segid + ctx.docBase;
if (rb.getFilters() != null) {
for (Query raw : rb.getFilters()) {
Query q = raw.rewrite(searcher.getIndexReader());
Scorer scorer = searcher.createWeight(q, false, 1f).scorer(ctx);
Query q = raw.rewrite(searcherInfo.getSearcher().getIndexReader());
Scorer scorer = searcherInfo.getSearcher().createWeight(q, false, 1f).scorer(ctx);
if (scorer == null || segid != scorer.iterator().advance(segid)) {
// filter doesn't match.
docid = -1;
@ -269,13 +285,13 @@ public class RealTimeGetComponent extends SearchComponent
if (docid < 0) continue;
Document luceneDocument = searcher.doc(docid, rsp.getReturnFields().getLuceneFieldNames());
Document luceneDocument = searcherInfo.getSearcher().doc(docid, rsp.getReturnFields().getLuceneFieldNames());
SolrDocument doc = toSolrDoc(luceneDocument, core.getLatestSchema());
searcher.decorateDocValueFields(doc, docid, searcher.getNonStoredDVs(true));
searcherInfo.getSearcher().decorateDocValueFields(doc, docid, searcherInfo.getSearcher().getNonStoredDVs(true));
if ( null != transformer) {
if (null == resultContext) {
// either first pass, or we've re-opened searcher - either way now we setContext
resultContext = new RTGResultContext(rsp.getReturnFields(), searcher, req);
resultContext = new RTGResultContext(rsp.getReturnFields(), searcherInfo.getSearcher(), req);
transformer.setContext(resultContext);
}
transformer.transform(doc, docid, 0);
@ -284,22 +300,210 @@ public class RealTimeGetComponent extends SearchComponent
}
} finally {
if (searcherHolder != null) {
searcherHolder.decref();
}
searcherInfo.clear();
}
addDocListToResponse(rb, docList);
}
/**
* Return the requested SolrInputDocument from the tlog/index. This will
* always be a full document, i.e. any partial in-place document will be resolved.
*/
void processGetInputDocument(ResponseBuilder rb) throws IOException {
SolrQueryRequest req = rb.req;
SolrQueryResponse rsp = rb.rsp;
SolrParams params = req.getParams();
if (!params.getBool(COMPONENT_NAME, true)) {
return;
}
String idStr = params.get("getInputDocument", null);
if (idStr == null) return;
AtomicLong version = new AtomicLong();
SolrInputDocument doc = getInputDocument(req.getCore(), new BytesRef(idStr), version, false, null, true);
log.info("getInputDocument called for id="+idStr+", returning: "+doc);
rb.rsp.add("inputDocument", doc);
rb.rsp.add("version", version.get());
}
/**
* A SearcherInfo provides mechanism for obtaining RT searcher, from
* a SolrCore, and closing it, while taking care of the RefCounted references.
*/
private static class SearcherInfo {
private RefCounted<SolrIndexSearcher> searcherHolder = null;
private SolrIndexSearcher searcher = null;
final SolrCore core;
public SearcherInfo(SolrCore core) {
this.core = core;
}
void clear(){
if (searcherHolder != null) {
// close handles to current searchers
searcher = null;
searcherHolder.decref();
searcherHolder = null;
}
}
void init(){
if (searcher == null) {
searcherHolder = core.getRealtimeSearcher();
searcher = searcherHolder.get();
}
}
public SolrIndexSearcher getSearcher() {
assert null != searcher : "init not called!";
return searcher;
}
}
/***
* Given a partial document obtained from the transaction log (e.g. as a result of RTG), resolve to a full document
* by populating all the partial updates that were applied on top of that last full document update.
*
* @param onlyTheseFields When a non-null set of field names is passed in, the resolve process only attempts to populate
* the given fields in this set. When this set is null, it resolves all fields.
* @return Returns the merged document, i.e. the resolved full document, or null if the document was not found (deleted
* after the resolving began)
*/
private static SolrDocument resolveFullDocument(SolrCore core, BytesRef idBytes,
ReturnFields returnFields, SolrInputDocument partialDoc, List logEntry, Set<String> onlyTheseFields) throws IOException {
if (idBytes == null || logEntry.size() != 5) {
throw new SolrException(ErrorCode.INVALID_STATE, "Either Id field not present in partial document or log entry doesn't have previous version.");
}
long prevPointer = (long) logEntry.get(UpdateLog.PREV_POINTER_IDX);
long prevVersion = (long) logEntry.get(UpdateLog.PREV_VERSION_IDX);
// get the last full document from ulog
UpdateLog ulog = core.getUpdateHandler().getUpdateLog();
long lastPrevPointer = ulog.applyPartialUpdates(idBytes, prevPointer, prevVersion, onlyTheseFields, partialDoc);
if (lastPrevPointer == -1) { // full document was not found in tlog, but exists in index
SolrDocument mergedDoc = mergePartialDocWithFullDocFromIndex(core, idBytes, returnFields, onlyTheseFields, partialDoc);
return mergedDoc;
} else if (lastPrevPointer > 0) {
// We were supposed to have found the last full doc also in the tlogs, but the prevPointer links led to nowhere
// We should reopen a new RT searcher and get the doc. This should be a rare occurrence
Term idTerm = new Term(core.getLatestSchema().getUniqueKeyField().getName(), idBytes);
SolrDocument mergedDoc = reopenRealtimeSearcherAndGet(core, idTerm, returnFields);
if (mergedDoc == null) {
return null; // the document may have been deleted as the resolving was going on.
}
return mergedDoc;
} else { // i.e. lastPrevPointer==0
assert lastPrevPointer == 0;
// We have successfully resolved the document based off the tlogs
return toSolrDoc(partialDoc, core.getLatestSchema());
}
}
/**
* Re-open the RT searcher and get the document, referred to by the idTerm, from that searcher.
* @return Returns the document or null if not found.
*/
private static SolrDocument reopenRealtimeSearcherAndGet(SolrCore core, Term idTerm, ReturnFields returnFields) throws IOException {
UpdateLog ulog = core.getUpdateHandler().getUpdateLog();
ulog.openRealtimeSearcher();
RefCounted<SolrIndexSearcher> searcherHolder = core.getRealtimeSearcher();
try {
SolrIndexSearcher searcher = searcherHolder.get();
int docid = searcher.getFirstMatch(idTerm);
if (docid < 0) {
return null;
}
Document luceneDocument = searcher.doc(docid, returnFields.getLuceneFieldNames());
SolrDocument doc = toSolrDoc(luceneDocument, core.getLatestSchema());
searcher.decorateDocValueFields(doc, docid, searcher.getNonStoredDVs(false));
return doc;
} finally {
searcherHolder.decref();
}
}
/**
* Gets a document from the index by id. If a non-null partial document (for in-place update) is passed in,
* this method obtains the document from the tlog/index by the given id, merges the partial document on top of it and then returns
* the resultant document.
*
* @param core A SolrCore instance, useful for obtaining a realtimesearcher and the schema
* @param idBytes Binary representation of the value of the unique key field
* @param returnFields Return fields, as requested
* @param onlyTheseFields When a non-null set of field names is passed in, the merge process only attempts to merge
* the given fields in this set. When this set is null, it merges all fields.
* @param partialDoc A partial document (containing an in-place update) used for merging against a full document
* from index; this maybe be null.
* @return If partial document is null, this returns document from the index or null if not found.
* If partial document is not null, this returns a document from index merged with the partial document, or null if
* document doesn't exist in the index.
*/
private static SolrDocument mergePartialDocWithFullDocFromIndex(SolrCore core, BytesRef idBytes, ReturnFields returnFields,
Set<String> onlyTheseFields, SolrInputDocument partialDoc) throws IOException {
RefCounted<SolrIndexSearcher> searcherHolder = core.getRealtimeSearcher(); //Searcher();
try {
// now fetch last document from index, and merge partialDoc on top of it
SolrIndexSearcher searcher = searcherHolder.get();
SchemaField idField = core.getLatestSchema().getUniqueKeyField();
Term idTerm = new Term(idField.getName(), idBytes);
int docid = searcher.getFirstMatch(idTerm);
if (docid < 0) {
// The document was not found in index! Reopen a new RT searcher (to be sure) and get again.
// This should be because the document was deleted recently.
SolrDocument doc = reopenRealtimeSearcherAndGet(core, idTerm, returnFields);
if (doc == null) {
// Unable to resolve the last full doc in tlog fully,
// and document not found in index even after opening new rt searcher.
// This must be a case of deleted doc
return null;
}
return doc;
}
SolrDocument doc;
Set<String> decorateFields = onlyTheseFields == null ? searcher.getNonStoredDVs(false): onlyTheseFields;
Document luceneDocument = searcher.doc(docid, returnFields.getLuceneFieldNames());
doc = toSolrDoc(luceneDocument, core.getLatestSchema());
searcher.decorateDocValueFields(doc, docid, decorateFields);
long docVersion = (long) doc.getFirstValue(DistributedUpdateProcessor.VERSION_FIELD);
Object partialVersionObj = partialDoc.getFieldValue(DistributedUpdateProcessor.VERSION_FIELD);
long partialDocVersion = partialVersionObj instanceof Field? ((Field) partialVersionObj).numericValue().longValue():
partialVersionObj instanceof Number? ((Number) partialVersionObj).longValue(): Long.parseLong(partialVersionObj.toString());
if (docVersion > partialDocVersion) {
return doc;
}
for (String fieldName: (Iterable<String>) partialDoc.getFieldNames()) {
doc.setField(fieldName.toString(), partialDoc.getFieldValue(fieldName)); // since partial doc will only contain single valued fields, this is fine
}
return doc;
} finally {
if (searcherHolder != null) {
searcherHolder.decref();
}
}
}
public static SolrInputDocument DELETED = new SolrInputDocument();
/** returns the SolrInputDocument from the current tlog, or DELETED if it has been deleted, or
* null if there is no record of it in the current update log. If null is returned, it could
* still be in the latest index.
* @param versionReturned If a non-null AtomicLong is passed in, it is set to the version of the update returned from the TLog.
* @param resolveFullDocument In case the document is fetched from the tlog, it could only be a partial document if the last update
* was an in-place update. In that case, should this partial document be resolved to a full document (by following
* back prevPointer/prevVersion)?
*/
public static SolrInputDocument getInputDocumentFromTlog(SolrCore core, BytesRef idBytes) {
public static SolrInputDocument getInputDocumentFromTlog(SolrCore core, BytesRef idBytes, AtomicLong versionReturned,
Set<String> onlyTheseNonStoredDVs, boolean resolveFullDocument) {
UpdateLog ulog = core.getUpdateHandler().getUpdateLog();
@ -310,9 +514,32 @@ public class RealTimeGetComponent extends SearchComponent
List entry = (List)o;
assert entry.size() >= 3;
int oper = (Integer)entry.get(0) & UpdateLog.OPERATION_MASK;
if (versionReturned != null) {
versionReturned.set((long)entry.get(UpdateLog.VERSION_IDX));
}
switch (oper) {
case UpdateLog.UPDATE_INPLACE:
assert entry.size() == 5;
if (resolveFullDocument) {
SolrInputDocument doc = (SolrInputDocument)entry.get(entry.size()-1);
try {
// For in-place update case, we have obtained the partial document till now. We need to
// resolve it to a full document to be returned to the user.
SolrDocument sdoc = resolveFullDocument(core, idBytes, new SolrReturnFields(), doc, entry, onlyTheseNonStoredDVs);
if (sdoc == null) {
return DELETED;
}
doc = toSolrInputDocument(sdoc, core.getLatestSchema());
return doc;
} catch (IOException ex) {
throw new SolrException(ErrorCode.SERVER_ERROR, "Error while resolving full document. ", ex);
}
} else {
// fall through to ADD, so as to get only the partial document
}
case UpdateLog.ADD:
return (SolrInputDocument)entry.get(entry.size()-1);
return (SolrInputDocument) entry.get(entry.size()-1);
case UpdateLog.DELETE:
return DELETED;
default:
@ -324,12 +551,40 @@ public class RealTimeGetComponent extends SearchComponent
return null;
}
/**
* Obtains the latest document for a given id from the tlog or index (if not found in the tlog).
*
* NOTE: This method uses the effective value for avoidRetrievingStoredFields param as false and
* for nonStoredDVs as null in the call to @see {@link RealTimeGetComponent#getInputDocument(SolrCore, BytesRef, AtomicLong, boolean, Set, boolean)},
* so as to retrieve all stored and non-stored DV fields from all documents. Also, it uses the effective value of
* resolveFullDocument param as true, i.e. it resolves any partial documents (in-place updates), in case the
* document is fetched from the tlog, to a full document.
*/
public static SolrInputDocument getInputDocument(SolrCore core, BytesRef idBytes) throws IOException {
return getInputDocument (core, idBytes, null, false, null, true);
}
/**
* Obtains the latest document for a given id from the tlog or through the realtime searcher (if not found in the tlog).
* @param versionReturned If a non-null AtomicLong is passed in, it is set to the version of the update returned from the TLog.
* @param avoidRetrievingStoredFields Setting this to true avoids fetching stored fields through the realtime searcher,
* however has no effect on documents obtained from the tlog.
* Non-stored docValues fields are populated anyway, and are not affected by this parameter. Note that if
* the id field is a stored field, it will not be populated if this parameter is true and the document is
* obtained from the index.
* @param onlyTheseNonStoredDVs If not-null, populate only these DV fields in the document fetched through the realtime searcher.
* If this is null, decorate all non-stored DVs (that are not targets of copy fields) from the searcher.
* @param resolveFullDocument In case the document is fetched from the tlog, it could only be a partial document if the last update
* was an in-place update. In that case, should this partial document be resolved to a full document (by following
* back prevPointer/prevVersion)?
*/
public static SolrInputDocument getInputDocument(SolrCore core, BytesRef idBytes, AtomicLong versionReturned, boolean avoidRetrievingStoredFields,
Set<String> onlyTheseNonStoredDVs, boolean resolveFullDocument) throws IOException {
SolrInputDocument sid = null;
RefCounted<SolrIndexSearcher> searcherHolder = null;
try {
SolrIndexSearcher searcher = null;
sid = getInputDocumentFromTlog(core, idBytes);
sid = getInputDocumentFromTlog(core, idBytes, versionReturned, onlyTheseNonStoredDVs, resolveFullDocument);
if (sid == DELETED) {
return null;
}
@ -346,9 +601,18 @@ public class RealTimeGetComponent extends SearchComponent
int docid = searcher.getFirstMatch(new Term(idField.getName(), idBytes));
if (docid < 0) return null;
Document luceneDocument = searcher.doc(docid);
sid = toSolrInputDocument(luceneDocument, core.getLatestSchema());
searcher.decorateDocValueFields(sid, docid, searcher.getNonStoredDVsWithoutCopyTargets());
if (avoidRetrievingStoredFields) {
sid = new SolrInputDocument();
} else {
Document luceneDocument = searcher.doc(docid);
sid = toSolrInputDocument(luceneDocument, core.getLatestSchema());
}
if (onlyTheseNonStoredDVs != null) {
searcher.decorateDocValueFields(sid, docid, onlyTheseNonStoredDVs);
} else {
searcher.decorateDocValueFields(sid, docid, searcher.getNonStoredDVsWithoutCopyTargets());
}
}
} finally {
if (searcherHolder != null) {
@ -356,6 +620,11 @@ public class RealTimeGetComponent extends SearchComponent
}
}
if (versionReturned != null) {
if (sid.containsKey(DistributedUpdateProcessor.VERSION_FIELD)) {
versionReturned.set((long)sid.getFieldValue(DistributedUpdateProcessor.VERSION_FIELD));
}
}
return sid;
}
@ -381,6 +650,30 @@ public class RealTimeGetComponent extends SearchComponent
return out;
}
private static SolrInputDocument toSolrInputDocument(SolrDocument doc, IndexSchema schema) {
SolrInputDocument out = new SolrInputDocument();
for( String fname : doc.getFieldNames() ) {
SchemaField sf = schema.getFieldOrNull(fname);
if (sf != null) {
if ((!sf.hasDocValues() && !sf.stored()) || schema.isCopyFieldTarget(sf)) continue;
}
for (Object val: doc.getFieldValues(fname)) {
if (val instanceof Field) {
Field f = (Field) val;
if (sf != null) {
val = sf.getType().toObject(f); // object or external string?
} else {
val = f.stringValue();
if (val == null) val = f.numericValue();
if (val == null) val = f.binaryValue();
if (val == null) val = f;
}
}
out.addField(fname, val);
}
}
return out;
}
private static SolrDocument toSolrDoc(Document doc, IndexSchema schema) {
SolrDocument out = new SolrDocument();
@ -409,9 +702,13 @@ public class RealTimeGetComponent extends SearchComponent
return out;
}
private static SolrDocument toSolrDoc(SolrInputDocument sdoc, IndexSchema schema) {
/**
* Converts a SolrInputDocument to SolrDocument, using an IndexSchema instance.
* @lucene.experimental
*/
public static SolrDocument toSolrDoc(SolrInputDocument sdoc, IndexSchema schema) {
// TODO: do something more performant than this double conversion
Document doc = DocumentBuilder.toDocument(sdoc, schema);
Document doc = DocumentBuilder.toDocument(sdoc, schema, false);
// copy the stored fields only
Document out = new Document();

View File

@ -199,8 +199,7 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar
boolean isCorrectlySpelled = hits > (maxResultsForSuggest==null ? 0 : maxResultsForSuggest);
NamedList response = new SimpleOrderedMap();
NamedList suggestions = toNamedList(shardRequest, spellingResult, q, extendedResults);
response.add("suggestions", suggestions);
response.add("suggestions", toNamedList(shardRequest, spellingResult, q, extendedResults));
if (extendedResults) {
response.add("correctlySpelled", isCorrectlySpelled);
@ -300,7 +299,7 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar
//even in cases when the internal rank is the same.
Collections.sort(collations);
NamedList collationList = new NamedList();
NamedList collationList = new SimpleOrderedMap();
for (SpellCheckCollation collation : collations) {
if (collationExtendedResults) {
NamedList extendedResult = new SimpleOrderedMap();
@ -424,8 +423,7 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar
NamedList response = new SimpleOrderedMap();
NamedList suggestions = toNamedList(false, result, origQuery, extendedResults);
response.add("suggestions", suggestions);
response.add("suggestions", toNamedList(false, result, origQuery, extendedResults));
if (extendedResults) {
response.add("correctlySpelled", isCorrectlySpelled);
@ -436,7 +434,7 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar
.toArray(new SpellCheckCollation[mergeData.collations.size()]);
Arrays.sort(sortedCollations);
NamedList collations = new NamedList();
NamedList collations = new SimpleOrderedMap();
int i = 0;
while (i < maxCollations && i < sortedCollations.length) {
SpellCheckCollation collation = sortedCollations[i];
@ -636,7 +634,7 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar
protected NamedList toNamedList(boolean shardRequest,
SpellingResult spellingResult, String origQuery, boolean extendedResults) {
NamedList result = new NamedList();
NamedList result = new SimpleOrderedMap();
Map<Token,LinkedHashMap<String,Integer>> suggestions = spellingResult
.getSuggestions();
boolean hasFreqInfo = spellingResult.hasTokenFrequencyInfo();

View File

@ -809,7 +809,11 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
}
}
} else {
final DocValuesType dvType = fieldInfos.fieldInfo(fieldName).getDocValuesType();
FieldInfo fi = fieldInfos.fieldInfo(fieldName);
if (fi == null) {
continue; // Searcher doesn't have info about this field, hence ignore it.
}
final DocValuesType dvType = fi.getDocValuesType();
switch (dvType) {
case NUMERIC:
final NumericDocValues ndv = leafReader.getNumericDocValues(fieldName);

View File

@ -39,10 +39,20 @@ public class AddUpdateCommand extends UpdateCommand implements Iterable<Document
// it will be obtained from the doc.
private BytesRef indexedId;
// Higher level SolrInputDocument, normally used to construct the Lucene Document
// to index.
/**
* Higher level SolrInputDocument, normally used to construct the Lucene Document
* to index.
*/
public SolrInputDocument solrDoc;
/**
* This is the version of a document, previously indexed, on which the current
* update depends on. This version could be that of a previous in-place update
* or a full update. A negative value here, e.g. -1, indicates that this add
* update does not depend on a previous update.
*/
public long prevVersion = -1;
public boolean overwrite = true;
public Term updateTerm;
@ -76,10 +86,19 @@ public class AddUpdateCommand extends UpdateCommand implements Iterable<Document
}
/** Creates and returns a lucene Document to index. Any changes made to the returned Document
* will not be reflected in the SolrInputDocument, or future calls to this method.
* will not be reflected in the SolrInputDocument, or future calls to this method. This defaults
* to false for the inPlaceUpdate parameter of {@link #getLuceneDocument(boolean)}.
*/
public Document getLuceneDocument() {
return DocumentBuilder.toDocument(getSolrInputDocument(), req.getSchema());
return getLuceneDocument(false);
}
/** Creates and returns a lucene Document to index. Any changes made to the returned Document
* will not be reflected in the SolrInputDocument, or future calls to this method.
* @param inPlaceUpdate Whether this document will be used for in-place updates.
*/
public Document getLuceneDocument(boolean inPlaceUpdate) {
return DocumentBuilder.toDocument(getSolrInputDocument(), req.getSchema(), inPlaceUpdate);
}
/** Returns the indexed ID for this document. The returned BytesRef is retained across multiple calls, and should not be modified. */
@ -212,7 +231,6 @@ public class AddUpdateCommand extends UpdateCommand implements Iterable<Document
unwrappedDocs.add(currentDoc);
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder(super.toString());
@ -223,5 +241,11 @@ public class AddUpdateCommand extends UpdateCommand implements Iterable<Document
return sb.toString();
}
/**
* Is this add update an in-place update? An in-place update is one where only docValues are
* updated, and a new docment is not indexed.
*/
public boolean isInPlaceUpdate() {
return (prevVersion >= 0);
}
}

View File

@ -27,9 +27,11 @@ import java.util.concurrent.Future;
import java.util.concurrent.atomic.LongAdder;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CodecReader;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.SlowCodecReaderWrapper;
import org.apache.lucene.index.Term;
@ -274,9 +276,7 @@ public class DirectUpdateHandler2 extends UpdateHandler implements SolrCoreState
if (cmd.isBlock()) {
writer.updateDocuments(updateTerm, cmd);
} else {
Document luceneDocument = cmd.getLuceneDocument();
// SolrCore.verbose("updateDocument",updateTerm,luceneDocument,writer);
writer.updateDocument(updateTerm, luceneDocument);
updateDocOrDocValues(cmd, writer, updateTerm);
}
// SolrCore.verbose("updateDocument",updateTerm,"DONE");
@ -331,7 +331,8 @@ public class DirectUpdateHandler2 extends UpdateHandler implements SolrCoreState
// see comment in deleteByQuery
synchronized (solrCoreState.getUpdateLock()) {
writer.updateDocument(idTerm, luceneDocument);
updateDocOrDocValues(cmd, writer, idTerm);
for (Query q : dbqList) {
writer.deleteDocuments(new DeleteByQueryWrapper(q, core.getLatestSchema()));
}
@ -450,6 +451,11 @@ public class DirectUpdateHandler2 extends UpdateHandler implements SolrCoreState
// as we use around ulog.preCommit... also see comments in ulog.postSoftCommit)
//
synchronized (solrCoreState.getUpdateLock()) {
// We are reopening a searcher before applying the deletes to overcome LUCENE-7344.
// Once LUCENE-7344 is resolved, we can consider removing this.
if (ulog != null) ulog.openRealtimeSearcher();
if (delAll) {
deleteAll();
} else {
@ -830,6 +836,44 @@ public class DirectUpdateHandler2 extends UpdateHandler implements SolrCoreState
splitter.split();
}
/**
* Calls either {@link IndexWriter#updateDocValues} or {@link IndexWriter#updateDocument} as
* needed based on {@link AddUpdateCommand#isInPlaceUpdate}.
* <p>
* If the this is an UPDATE_INPLACE cmd, then all fields inclued in
* {@link AddUpdateCommand#getLuceneDocument} must either be the uniqueKey field, or be DocValue
* only fields.
* </p>
*
* @param cmd - cmd apply to IndexWriter
* @param writer - IndexWriter to use
* @param updateTerm - used if this cmd results in calling {@link IndexWriter#updateDocument}
*/
private void updateDocOrDocValues(AddUpdateCommand cmd, IndexWriter writer, Term updateTerm) throws IOException {
assert null != cmd;
final SchemaField uniqueKeyField = cmd.req.getSchema().getUniqueKeyField();
final String uniqueKeyFieldName = null == uniqueKeyField ? null : uniqueKeyField.getName();
if (cmd.isInPlaceUpdate()) {
Document luceneDocument = cmd.getLuceneDocument(true);
final List<IndexableField> origDocFields = luceneDocument.getFields();
final List<Field> fieldsToUpdate = new ArrayList<>(origDocFields.size());
for (IndexableField field : origDocFields) {
if (! field.name().equals(uniqueKeyFieldName) ) {
fieldsToUpdate.add((Field)field);
}
}
log.debug("updateDocValues({})", cmd);
writer.updateDocValues(updateTerm, fieldsToUpdate.toArray(new Field[fieldsToUpdate.size()]));
} else {
Document luceneDocument = cmd.getLuceneDocument(false);
log.debug("updateDocument({})", cmd);
writer.updateDocument(updateTerm, luceneDocument);
}
}
/////////////////////////////////////////////////////////////////////
// SolrInfoMBean stuff: Statistics and Module Info
/////////////////////////////////////////////////////////////////////

View File

@ -21,6 +21,7 @@ import java.util.Set;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.index.IndexableField;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrInputDocument;
@ -37,15 +38,46 @@ import com.google.common.collect.Sets;
*/
public class DocumentBuilder {
private static void addField(Document doc, SchemaField field, Object val, float boost) {
/**
* Add a field value to a given document.
* @param doc Document that the field needs to be added to
* @param field The schema field object for the field
* @param val The value for the field to be added
* @param boost Boost value for the field
* @param forInPlaceUpdate Whether the field is to be added for in-place update. If true,
* only numeric docValues based fields are added to the document. This can be true
* when constructing a Lucene document for writing an in-place update, and we don't need
* presence of non-updatable fields (non NDV) in such a document.
*/
private static void addField(Document doc, SchemaField field, Object val, float boost,
boolean forInPlaceUpdate) {
if (val instanceof IndexableField) {
if (forInPlaceUpdate) {
assert val instanceof NumericDocValuesField: "Expected in-place update to be done on"
+ " NDV fields only.";
}
// set boost to the calculated compound boost
((Field)val).setBoost(boost);
doc.add((Field)val);
return;
}
for (IndexableField f : field.getType().createFields(field, val, boost)) {
if (f != null) doc.add((Field) f); // null fields are not added
if (f != null) { // null fields are not added
// HACK: workaround for SOLR-9809
// even though at this point in the code we know the field is single valued and DV only
// TrieField.createFields() may still return (usless) IndexableField instances that are not
// NumericDocValuesField instances.
//
// once SOLR-9809 is resolved, we should be able to replace this conditional with...
// assert f instanceof NumericDocValuesField
if (forInPlaceUpdate) {
if (f instanceof NumericDocValuesField) {
doc.add((Field) f);
}
} else {
doc.add((Field) f);
}
}
}
}
@ -59,6 +91,14 @@ public class DocumentBuilder {
return id;
}
/**
* @see DocumentBuilder#toDocument(SolrInputDocument, IndexSchema, boolean)
*/
public static Document toDocument( SolrInputDocument doc, IndexSchema schema )
{
return toDocument(doc, schema, false);
}
/**
* Convert a SolrInputDocument to a lucene Document.
*
@ -72,9 +112,19 @@ public class DocumentBuilder {
* moved to an independent function
*
* @since solr 1.3
*
* @param doc SolrInputDocument from which the document has to be built
* @param schema Schema instance
* @param forInPlaceUpdate Whether the output document would be used for an in-place update or not. When this is true,
* default fields values and copy fields targets are not populated.
* @return Built Lucene document
*/
public static Document toDocument( SolrInputDocument doc, IndexSchema schema )
public static Document toDocument( SolrInputDocument doc, IndexSchema schema, boolean forInPlaceUpdate )
{
final SchemaField uniqueKeyField = schema.getUniqueKeyField();
final String uniqueKeyFieldName = null == uniqueKeyField ? null : uniqueKeyField.getName();
Document out = new Document();
final float docBoost = doc.getDocumentBoost();
Set<String> usedFields = Sets.newHashSet();
@ -85,7 +135,6 @@ public class DocumentBuilder {
SchemaField sfield = schema.getFieldOrNull(name);
boolean used = false;
// Make sure it has the correct number
if( sfield!=null && !sfield.multiValued() && field.getValueCount() > 1 ) {
throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,
@ -119,45 +168,51 @@ public class DocumentBuilder {
hasField = true;
if (sfield != null) {
used = true;
addField(out, sfield, v, applyBoost ? compoundBoost : 1f);
addField(out, sfield, v, applyBoost ? compoundBoost : 1f,
name.equals(uniqueKeyFieldName) ? false : forInPlaceUpdate);
// record the field as having a value
usedFields.add(sfield.getName());
}
// Check if we should copy this field value to any other fields.
// This could happen whether it is explicit or not.
if( copyFields != null ){
for (CopyField cf : copyFields) {
SchemaField destinationField = cf.getDestination();
if (copyFields != null) {
// Do not copy this field if this document is to be used for an in-place update,
// and this is the uniqueKey field (because the uniqueKey can't change so no need to "update" the copyField).
if ( ! (forInPlaceUpdate && name.equals(uniqueKeyFieldName)) ) {
for (CopyField cf : copyFields) {
SchemaField destinationField = cf.getDestination();
final boolean destHasValues = usedFields.contains(destinationField.getName());
final boolean destHasValues = usedFields.contains(destinationField.getName());
// check if the copy field is a multivalued or not
if (!destinationField.multiValued() && destHasValues) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
"ERROR: "+getID(doc, schema)+"multiple values encountered for non multiValued copy field " +
destinationField.getName() + ": " + v);
// check if the copy field is a multivalued or not
if (!destinationField.multiValued() && destHasValues) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
"ERROR: "+getID(doc, schema)+"multiple values encountered for non multiValued copy field " +
destinationField.getName() + ": " + v);
}
used = true;
// Perhaps trim the length of a copy field
Object val = v;
if( val instanceof String && cf.getMaxChars() > 0 ) {
val = cf.getLimitedValue((String)val);
}
// we can't copy any boost unless the dest field is
// indexed & !omitNorms, but which boost we copy depends
// on whether the dest field already contains values (we
// don't want to apply the compounded docBoost more then once)
final float destBoost =
(destinationField.indexed() && !destinationField.omitNorms()) ?
(destHasValues ? fieldBoost : compoundBoost) : 1.0F;
addField(out, destinationField, val, destBoost,
destinationField.getName().equals(uniqueKeyFieldName) ? false : forInPlaceUpdate);
// record the field as having a value
usedFields.add(destinationField.getName());
}
used = true;
// Perhaps trim the length of a copy field
Object val = v;
if( val instanceof String && cf.getMaxChars() > 0 ) {
val = cf.getLimitedValue((String)val);
}
// we can't copy any boost unless the dest field is
// indexed & !omitNorms, but which boost we copy depends
// on whether the dest field already contains values (we
// don't want to apply the compounded docBoost more then once)
final float destBoost =
(destinationField.indexed() && !destinationField.omitNorms()) ?
(destHasValues ? fieldBoost : compoundBoost) : 1.0F;
addField(out, destinationField, val, destBoost);
// record the field as having a value
usedFields.add(destinationField.getName());
}
}
@ -187,14 +242,20 @@ public class DocumentBuilder {
// Now validate required fields or add default values
// fields with default values are defacto 'required'
for (SchemaField field : schema.getRequiredFields()) {
if (out.getField(field.getName() ) == null) {
if (field.getDefaultValue() != null) {
addField(out, field, field.getDefaultValue(), 1.0f);
}
else {
String msg = getID(doc, schema) + "missing required field: " + field.getName();
throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, msg );
// Note: We don't need to add default fields if this document is to be used for
// in-place updates, since this validation and population of default fields would've happened
// during the full indexing initially.
if (!forInPlaceUpdate) {
for (SchemaField field : schema.getRequiredFields()) {
if (out.getField(field.getName() ) == null) {
if (field.getDefaultValue() != null) {
addField(out, field, field.getDefaultValue(), 1.0f, false);
}
else {
String msg = getID(doc, schema) + "missing required field: " + field.getName();
throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, msg );
}
}
}
}

View File

@ -828,6 +828,16 @@ public class PeerSync implements SolrMetricProducer {
proc.processDelete(cmd);
break;
}
case UpdateLog.UPDATE_INPLACE:
{
AddUpdateCommand cmd = UpdateLog.convertTlogEntryToAddUpdateCommand(req, entry, oper, version);
cmd.setFlags(UpdateCommand.PEER_SYNC | UpdateCommand.IGNORE_AUTOCOMMIT);
if (debug) {
log.debug(msg() + "inplace update " + cmd + " prevVersion=" + cmd.prevVersion + ", doc=" + cmd.solrDoc);
}
proc.processAdd(cmd);
break;
}
default:
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown Operation! " + oper);

View File

@ -31,6 +31,7 @@ import org.apache.solr.common.cloud.ZkStateReader;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.Diagnostics;
import org.apache.solr.update.processor.DistributedUpdateProcessor;
import org.apache.solr.update.processor.DistributedUpdateProcessor.RequestReplicationTracker;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -206,6 +207,9 @@ public class SolrCmdDistributor {
uReq.lastDocInBatch();
uReq.setParams(params);
uReq.add(cmd.solrDoc, cmd.commitWithin, cmd.overwrite);
if (cmd.isInPlaceUpdate()) {
params.set(DistributedUpdateProcessor.DISTRIB_INPLACE_PREVVERSION, String.valueOf(cmd.prevVersion));
}
submit(new Req(cmd, node, uReq, synchronous, rrt, cmd.pollQueueTime), false);
}

View File

@ -342,7 +342,33 @@ public class TransactionLog implements Closeable {
int lastAddSize;
/**
* Writes an add update command to the transaction log. This is not applicable for
* in-place updates; use {@link #write(AddUpdateCommand, long, int)}.
* (The previous pointer (applicable for in-place updates) is set to -1 while writing
* the command to the transaction log.)
* @param cmd The add update command to be written
* @param flags Options for writing the command to the transaction log
* @return Returns the position pointer of the written update command
*
* @see #write(AddUpdateCommand, long, int)
*/
public long write(AddUpdateCommand cmd, int flags) {
return write(cmd, -1, flags);
}
/**
* Writes an add update command to the transaction log. This should be called only for
* writing in-place updates, or else pass -1 as the prevPointer.
* @param cmd The add update command to be written
* @param prevPointer The pointer in the transaction log which this update depends
* on (applicable for in-place updates)
* @param flags Options for writing the command to the transaction log
* @return Returns the position pointer of the written update command
*/
public long write(AddUpdateCommand cmd, long prevPointer, int flags) {
assert (-1 <= prevPointer && (cmd.isInPlaceUpdate() || (-1 == prevPointer)));
LogCodec codec = new LogCodec(resolver);
SolrInputDocument sdoc = cmd.getSolrInputDocument();
@ -355,10 +381,19 @@ public class TransactionLog implements Closeable {
MemOutputStream out = new MemOutputStream(new byte[bufSize]);
codec.init(out);
codec.writeTag(JavaBinCodec.ARR, 3);
codec.writeInt(UpdateLog.ADD | flags); // should just take one byte
codec.writeLong(cmd.getVersion());
codec.writeSolrInputDocument(cmd.getSolrInputDocument());
if (cmd.isInPlaceUpdate()) {
codec.writeTag(JavaBinCodec.ARR, 5);
codec.writeInt(UpdateLog.UPDATE_INPLACE | flags); // should just take one byte
codec.writeLong(cmd.getVersion());
codec.writeLong(prevPointer);
codec.writeLong(cmd.prevVersion);
codec.writeSolrInputDocument(cmd.getSolrInputDocument());
} else {
codec.writeTag(JavaBinCodec.ARR, 3);
codec.writeInt(UpdateLog.ADD | flags); // should just take one byte
codec.writeLong(cmd.getVersion());
codec.writeSolrInputDocument(cmd.getSolrInputDocument());
}
lastAddSize = (int)out.size();
synchronized (this) {

View File

@ -22,6 +22,7 @@ import java.io.FileNotFoundException;
import java.io.FilenameFilter;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.nio.charset.Charset;
import java.nio.file.Files;
import java.util.ArrayList;
import java.util.Arrays;
@ -34,6 +35,7 @@ import java.util.List;
import java.util.ListIterator;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ExecutorCompletionService;
import java.util.concurrent.Future;
import java.util.concurrent.SynchronousQueue;
@ -44,6 +46,7 @@ import com.codahale.metrics.Gauge;
import com.codahale.metrics.Meter;
import org.apache.hadoop.fs.FileSystem;
import org.apache.lucene.util.BytesRef;
import org.apache.solr.common.SolrDocumentBase;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.SolrInputDocument;
@ -122,6 +125,7 @@ public class UpdateLog implements PluginInfoInitialized, SolrMetricProducer {
public static final int DELETE = 0x02;
public static final int DELETE_BY_QUERY = 0x03;
public static final int COMMIT = 0x04;
public static final int UPDATE_INPLACE = 0x08;
// Flag indicating that this is a buffered operation, and that a gap exists before buffering started.
// for example, if full index replication starts and we are buffering updates, then this flag should
// be set to indicate that replaying the log would not bring us into sync (i.e. peersync should
@ -129,6 +133,28 @@ public class UpdateLog implements PluginInfoInitialized, SolrMetricProducer {
public static final int FLAG_GAP = 0x10;
public static final int OPERATION_MASK = 0x0f; // mask off flags to get the operation
/**
* The index of the flags value in an entry from the transaction log.
*/
public static final int FLAGS_IDX = 0;
/**
* The index of the _version_ value in an entry from the transaction log.
*/
public static final int VERSION_IDX = 1;
/**
* The index of the previous pointer in an entry from the transaction log.
* This is only relevant if flags (indexed at FLAGS_IDX) includes UPDATE_INPLACE.
*/
public static final int PREV_POINTER_IDX = 2;
/**
* The index of the previous version in an entry from the transaction log.
* This is only relevant if flags (indexed at FLAGS_IDX) includes UPDATE_INPLACE.
*/
public static final int PREV_VERSION_IDX = 3;
public static class RecoveryInfo {
public long positionOfStart;
@ -215,10 +241,29 @@ public class UpdateLog implements PluginInfoInitialized, SolrMetricProducer {
public static class LogPtr {
final long pointer;
final long version;
final long previousPointer; // used for entries that are in-place updates and need a pointer to a previous update command
/**
* Creates an object that contains the position and version of an update. In this constructor,
* the effective value of the previousPointer is -1.
*
* @param pointer Position in the transaction log of an update
* @param version Version of the update at the given position
*/
public LogPtr(long pointer, long version) {
this(pointer, version, -1);
}
/**
*
* @param pointer Position in the transaction log of an update
* @param version Version of the update at the given position
* @param previousPointer Position, in the transaction log, of an update on which the current update depends
*/
public LogPtr(long pointer, long version, long previousPointer) {
this.pointer = pointer;
this.version = version;
this.previousPointer = previousPointer;
}
@Override
@ -476,16 +521,18 @@ public class UpdateLog implements PluginInfoInitialized, SolrMetricProducer {
synchronized (this) {
long pos = -1;
long prevPointer = getPrevPointerForUpdate(cmd);
// don't log if we are replaying from another log
if ((cmd.getFlags() & UpdateCommand.REPLAY) == 0) {
ensureLog();
pos = tlog.write(cmd, operationFlags);
pos = tlog.write(cmd, prevPointer, operationFlags);
}
if (!clearCaches) {
// TODO: in the future we could support a real position for a REPLAY update.
// Only currently would be useful for RTG while in recovery mode though.
LogPtr ptr = new LogPtr(pos, cmd.getVersion());
LogPtr ptr = new LogPtr(pos, cmd.getVersion(), prevPointer);
// only update our map if we're not buffering
if ((cmd.getFlags() & UpdateCommand.BUFFERING) == 0) {
@ -506,6 +553,31 @@ public class UpdateLog implements PluginInfoInitialized, SolrMetricProducer {
}
}
/**
* @return If cmd is an in-place update, then returns the pointer (in the tlog) of the previous
* update that the given update depends on.
* Returns -1 if this is not an in-place update, or if we can't find a previous entry in
* the tlog. Upon receiving a -1, it should be clear why it was -1: if the command's
* flags|UpdateLog.UPDATE_INPLACE is set, then this command is an in-place update whose
* previous update is in the index and not in the tlog; if that flag is not set, it is
* not an in-place update at all, and don't bother about the prevPointer value at
* all (which is -1 as a dummy value).)
*/
private synchronized long getPrevPointerForUpdate(AddUpdateCommand cmd) {
// note: sync required to ensure maps aren't changed out form under us
if (cmd.isInPlaceUpdate()) {
BytesRef indexedId = cmd.getIndexedId();
for (Map<BytesRef, LogPtr> currentMap : Arrays.asList(map, prevMap, prevMap2)) {
if (currentMap != null) {
LogPtr prevEntry = currentMap.get(indexedId);
if (null != prevEntry) {
return prevEntry.pointer;
}
}
}
}
return -1;
}
public void delete(DeleteUpdateCommand cmd) {
@ -755,6 +827,117 @@ public class UpdateLog implements PluginInfoInitialized, SolrMetricProducer {
}
}
/**
* Goes over backwards, following the prevPointer, to merge all partial updates into the passed doc. Stops at either a full
* document, or if there are no previous entries to follow in the update log.
*
* @param id Binary representation of the unique key field
* @param prevPointer Pointer to the previous entry in the ulog, based on which the current in-place update was made.
* @param prevVersion Version of the previous entry in the ulog, based on which the current in-place update was made.
* @param onlyTheseFields When a non-null set of field names is passed in, the resolve process only attempts to populate
* the given fields in this set. When this set is null, it resolves all fields.
* @param latestPartialDoc Partial document that is to be populated
* @return Returns 0 if a full document was found in the log, -1 if no full document was found. If full document was supposed
* to be found in the tlogs, but couldn't be found (because the logs were rotated) then the prevPointer is returned.
*/
synchronized public long applyPartialUpdates(BytesRef id, long prevPointer, long prevVersion,
Set<String> onlyTheseFields, SolrDocumentBase latestPartialDoc) {
SolrInputDocument partialUpdateDoc = null;
List<TransactionLog> lookupLogs = Arrays.asList(tlog, prevMapLog, prevMapLog2);
while (prevPointer >= 0) {
//go through each partial update and apply it on the incoming doc one after another
List entry;
entry = getEntryFromTLog(prevPointer, prevVersion, lookupLogs);
if (entry == null) {
return prevPointer; // a previous update was supposed to be found, but wasn't found (due to log rotation)
}
int flags = (int) entry.get(UpdateLog.FLAGS_IDX);
// since updates can depend only upon ADD updates or other UPDATE_INPLACE updates, we assert that we aren't
// getting something else
if ((flags & UpdateLog.ADD) != UpdateLog.ADD && (flags & UpdateLog.UPDATE_INPLACE) != UpdateLog.UPDATE_INPLACE) {
throw new SolrException(ErrorCode.INVALID_STATE, entry + " should've been either ADD or UPDATE_INPLACE update" +
", while looking for id=" + new String(id.bytes, Charset.forName("UTF-8")));
}
// if this is an ADD (i.e. full document update), stop here
if ((flags & UpdateLog.ADD) == UpdateLog.ADD) {
partialUpdateDoc = (SolrInputDocument) entry.get(entry.size() - 1);
applyOlderUpdates(latestPartialDoc, partialUpdateDoc, onlyTheseFields);
return 0; // Full document was found in the tlog itself
}
if (entry.size() < 5) {
throw new SolrException(ErrorCode.INVALID_STATE, entry + " is not a partial doc" +
", while looking for id=" + new String(id.bytes, Charset.forName("UTF-8")));
}
// This update is an inplace update, get the partial doc. The input doc is always at last position.
partialUpdateDoc = (SolrInputDocument) entry.get(entry.size() - 1);
applyOlderUpdates(latestPartialDoc, partialUpdateDoc, onlyTheseFields);
prevPointer = (long) entry.get(UpdateLog.PREV_POINTER_IDX);
prevVersion = (long) entry.get(UpdateLog.PREV_VERSION_IDX);
if (onlyTheseFields != null && latestPartialDoc.keySet().containsAll(onlyTheseFields)) {
return 0; // all the onlyTheseFields have been resolved, safe to abort now.
}
}
return -1; // last full document is not supposed to be in tlogs, but it must be in the index
}
/**
* Add all fields from olderDoc into newerDoc if not already present in newerDoc
*/
private void applyOlderUpdates(SolrDocumentBase newerDoc, SolrInputDocument olderDoc, Set<String> mergeFields) {
for (String fieldName : olderDoc.getFieldNames()) {
// if the newerDoc has this field, then this field from olderDoc can be ignored
if (!newerDoc.containsKey(fieldName) && (mergeFields == null || mergeFields.contains(fieldName))) {
for (Object val : olderDoc.getFieldValues(fieldName)) {
newerDoc.addField(fieldName, val);
}
}
}
}
/***
* Get the entry that has the given lookupVersion in the given lookupLogs at the lookupPointer position.
*
* @return The entry if found, otherwise null
*/
private synchronized List getEntryFromTLog(long lookupPointer, long lookupVersion, List<TransactionLog> lookupLogs) {
for (TransactionLog lookupLog : lookupLogs) {
if (lookupLog != null && lookupLog.getLogSize() > lookupPointer) {
lookupLog.incref();
try {
Object obj = null;
try {
obj = lookupLog.lookup(lookupPointer);
} catch (Exception | Error ex) {
// This can happen when trying to deserialize the entry at position lookupPointer,
// but from a different tlog than the one containing the desired entry.
// Just ignore the exception, so as to proceed to the next tlog.
log.debug("Exception reading the log (this is expected, don't worry)=" + lookupLog + ", for version=" + lookupVersion +
". This can be ignored.");
}
if (obj != null && obj instanceof List) {
List tmpEntry = (List) obj;
if (tmpEntry.size() >= 2 &&
(tmpEntry.get(UpdateLog.VERSION_IDX) instanceof Long) &&
((Long) tmpEntry.get(UpdateLog.VERSION_IDX)).equals(lookupVersion)) {
return tmpEntry;
}
}
} finally {
lookupLog.decref();
}
}
}
return null;
}
public Object lookup(BytesRef indexedId) {
LogPtr entry;
TransactionLog lookupLog;
@ -967,6 +1150,7 @@ public class UpdateLog implements PluginInfoInitialized, SolrMetricProducer {
static class Update {
TransactionLog log;
long version;
long previousVersion; // for in-place updates
long pointer;
}
@ -1070,15 +1254,16 @@ public class UpdateLog implements PluginInfoInitialized, SolrMetricProducer {
List entry = (List)o;
// TODO: refactor this out so we get common error handling
int opAndFlags = (Integer)entry.get(0);
int opAndFlags = (Integer)entry.get(UpdateLog.FLAGS_IDX);
if (latestOperation == 0) {
latestOperation = opAndFlags;
}
int oper = opAndFlags & UpdateLog.OPERATION_MASK;
long version = (Long) entry.get(1);
long version = (Long) entry.get(UpdateLog.VERSION_IDX);
switch (oper) {
case UpdateLog.ADD:
case UpdateLog.UPDATE_INPLACE:
case UpdateLog.DELETE:
case UpdateLog.DELETE_BY_QUERY:
Update update = new Update();
@ -1086,13 +1271,16 @@ public class UpdateLog implements PluginInfoInitialized, SolrMetricProducer {
update.pointer = reader.position();
update.version = version;
if (oper == UpdateLog.UPDATE_INPLACE && entry.size() == 5) {
update.previousVersion = (Long) entry.get(UpdateLog.PREV_VERSION_IDX);
}
updatesForLog.add(update);
updates.put(version, update);
if (oper == UpdateLog.DELETE_BY_QUERY) {
deleteByQueryList.add(update);
} else if (oper == UpdateLog.DELETE) {
deleteList.add(new DeleteUpdate(version, (byte[])entry.get(2)));
deleteList.add(new DeleteUpdate(version, (byte[])entry.get(entry.size()-1)));
}
break;
@ -1429,23 +1617,17 @@ public class UpdateLog implements PluginInfoInitialized, SolrMetricProducer {
// should currently be a List<Oper,Ver,Doc/Id>
List entry = (List) o;
operationAndFlags = (Integer) entry.get(0);
operationAndFlags = (Integer) entry.get(UpdateLog.FLAGS_IDX);
int oper = operationAndFlags & OPERATION_MASK;
long version = (Long) entry.get(1);
long version = (Long) entry.get(UpdateLog.VERSION_IDX);
switch (oper) {
case UpdateLog.UPDATE_INPLACE: // fall through to ADD
case UpdateLog.ADD: {
recoveryInfo.adds++;
// byte[] idBytes = (byte[]) entry.get(2);
SolrInputDocument sdoc = (SolrInputDocument) entry.get(entry.size() - 1);
AddUpdateCommand cmd = new AddUpdateCommand(req);
// cmd.setIndexedId(new BytesRef(idBytes));
cmd.solrDoc = sdoc;
cmd.setVersion(version);
AddUpdateCommand cmd = convertTlogEntryToAddUpdateCommand(req, entry, oper, version);
cmd.setFlags(UpdateCommand.REPLAY | UpdateCommand.IGNORE_AUTOCOMMIT);
if (debug) log.debug("add " + cmd);
log.debug("{} {}", oper == ADD ? "add" : "update", cmd);
proc.processAdd(cmd);
break;
}
@ -1472,7 +1654,6 @@ public class UpdateLog implements PluginInfoInitialized, SolrMetricProducer {
proc.processDelete(cmd);
break;
}
case UpdateLog.COMMIT: {
commitVersion = version;
break;
@ -1552,6 +1733,31 @@ public class UpdateLog implements PluginInfoInitialized, SolrMetricProducer {
}
}
/**
* Given a entry from the transaction log containing a document, return a new AddUpdateCommand that
* can be applied to ADD the document or do an UPDATE_INPLACE.
*
* @param req The request to use as the owner of the new AddUpdateCommand
* @param entry Entry from the transaction log that contains the document to be added
* @param operation The value of the operation flag; this must be either ADD or UPDATE_INPLACE --
* if it is UPDATE_INPLACE then the previous version will also be read from the entry
* @param version Version already obtained from the entry.
*/
public static AddUpdateCommand convertTlogEntryToAddUpdateCommand(SolrQueryRequest req, List entry,
int operation, long version) {
assert operation == UpdateLog.ADD || operation == UpdateLog.UPDATE_INPLACE;
SolrInputDocument sdoc = (SolrInputDocument) entry.get(entry.size()-1);
AddUpdateCommand cmd = new AddUpdateCommand(req);
cmd.solrDoc = sdoc;
cmd.setVersion(version);
if (operation == UPDATE_INPLACE) {
long prevVersion = (Long) entry.get(UpdateLog.PREV_VERSION_IDX);
cmd.prevVersion = prevVersion;
}
return cmd;
}
public void cancelApplyBufferedUpdates() {
this.cancelApplyBufferUpdate = true;
}

View File

@ -193,6 +193,10 @@ public class VersionInfo {
return ulog.lookupVersion(idBytes);
}
/**
* Returns the latest version from the index, searched by the given id (bytes) as seen from the realtime searcher.
* Returns null if no document can be found in the index for the given id.
*/
public Long getVersionFromIndex(BytesRef idBytes) {
// TODO: we could cache much of this and invalidate during a commit.
// TODO: most DocValues classes are threadsafe - expose which.
@ -219,6 +223,9 @@ public class VersionInfo {
}
}
/**
* Returns the highest version from the index, or 0L if no versions can be found in the index.
*/
public Long getMaxVersionFromIndex(IndexSearcher searcher) throws IOException {
String versionFieldName = versionField.getName();

View File

@ -16,25 +16,34 @@
*/
package org.apache.solr.update.processor;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.SolrInputField;
import org.apache.solr.core.SolrCore;
import org.apache.solr.handler.component.RealTimeGetComponent;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.schema.CopyField;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.NumericValueFieldType;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.update.AddUpdateCommand;
import org.apache.solr.util.RefCounted;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -124,6 +133,177 @@ public class AtomicUpdateDocumentMerger {
return toDoc;
}
/**
* Given a schema field, return whether or not such a field is supported for an in-place update.
* Note: If an update command has updates to only supported fields (and _version_ is also supported),
* only then is such an update command executed as an in-place update.
*/
private static boolean isSupportedFieldForInPlaceUpdate(SchemaField schemaField) {
return !(schemaField.indexed() || schemaField.stored() || !schemaField.hasDocValues() ||
schemaField.multiValued() || !(schemaField.getType() instanceof NumericValueFieldType));
}
/**
* Given an add update command, compute a list of fields that can be updated in-place. If there is even a single
* field in the update that cannot be updated in-place, the entire update cannot be executed in-place (and empty set
* will be returned in that case).
*
* @return Return a set of fields that can be in-place updated.
*/
public static Set<String> computeInPlaceUpdatableFields(AddUpdateCommand cmd) throws IOException {
SolrInputDocument sdoc = cmd.getSolrInputDocument();
IndexSchema schema = cmd.getReq().getSchema();
final SchemaField uniqueKeyField = schema.getUniqueKeyField();
final String uniqueKeyFieldName = null == uniqueKeyField ? null : uniqueKeyField.getName();
final Set<String> candidateFields = new HashSet<>();
// if _version_ field is not supported for in-place update, bail out early
SchemaField versionField = schema.getFieldOrNull(DistributedUpdateProcessor.VERSION_FIELD);
if (versionField == null || !isSupportedFieldForInPlaceUpdate(versionField)) {
return Collections.emptySet();
}
// first pass, check the things that are virtually free,
// and bail out early if anything is obviously not a valid in-place update
for (String fieldName : sdoc.getFieldNames()) {
if (fieldName.equals(uniqueKeyFieldName)
|| fieldName.equals(DistributedUpdateProcessor.VERSION_FIELD)) {
continue;
}
Object fieldValue = sdoc.getField(fieldName).getValue();
if (! (fieldValue instanceof Map) ) {
// not an in-place update if there are fields that are not maps
return Collections.emptySet();
}
// else it's a atomic update map...
for (String op : ((Map<String, Object>)fieldValue).keySet()) {
if (!op.equals("set") && !op.equals("inc")) {
// not a supported in-place update op
return Collections.emptySet();
}
}
candidateFields.add(fieldName);
}
if (candidateFields.isEmpty()) {
return Collections.emptySet();
}
// second pass over the candidates for in-place updates
// this time more expensive checks involving schema/config settings
for (String fieldName: candidateFields) {
SchemaField schemaField = schema.getField(fieldName);
if (!isSupportedFieldForInPlaceUpdate(schemaField)) {
return Collections.emptySet();
}
// if this field has copy target which is not supported for in place, then empty
for (CopyField copyField: schema.getCopyFieldsList(fieldName)) {
if (!isSupportedFieldForInPlaceUpdate(copyField.getDestination()))
return Collections.emptySet();
}
}
// third pass: requiring checks against the actual IndexWriter due to internal DV update limitations
SolrCore core = cmd.getReq().getCore();
RefCounted<IndexWriter> holder = core.getSolrCoreState().getIndexWriter(core);
Set<String> fieldNamesFromIndexWriter = null;
Set<String> segmentSortingFields = null;
try {
IndexWriter iw = holder.get();
fieldNamesFromIndexWriter = iw.getFieldNames();
segmentSortingFields = iw.getConfig().getIndexSortFields();
} finally {
holder.decref();
}
for (String fieldName: candidateFields) {
if (! fieldNamesFromIndexWriter.contains(fieldName) ) {
return Collections.emptySet(); // if this field doesn't exist, DV update can't work
}
if (segmentSortingFields.contains(fieldName) ) {
return Collections.emptySet(); // if this is used for segment sorting, DV updates can't work
}
}
return candidateFields;
}
/**
* Given an AddUpdateCommand containing update operations (e.g. set, inc), merge and resolve the operations into
* a partial document that can be used for indexing the in-place updates. The AddUpdateCommand is modified to contain
* the partial document (instead of the original document which contained the update operations) and also
* the prevVersion that this in-place update depends on.
* Note: updatedFields passed into the method can be changed, i.e. the version field can be added to the set.
* @return If in-place update cannot succeed, e.g. if the old document is deleted recently, then false is returned. A false
* return indicates that this update can be re-tried as a full atomic update. Returns true if the in-place update
* succeeds.
*/
public boolean doInPlaceUpdateMerge(AddUpdateCommand cmd, Set<String> updatedFields) throws IOException {
SolrInputDocument inputDoc = cmd.getSolrInputDocument();
BytesRef idBytes = cmd.getIndexedId();
updatedFields.add(DistributedUpdateProcessor.VERSION_FIELD); // add the version field so that it is fetched too
SolrInputDocument oldDocument = RealTimeGetComponent.getInputDocument
(cmd.getReq().getCore(), idBytes,
null, // don't want the version to be returned
true, // avoid stored fields from index
updatedFields,
true); // resolve the full document
if (oldDocument == RealTimeGetComponent.DELETED || oldDocument == null) {
// This doc was deleted recently. In-place update cannot work, hence a full atomic update should be tried.
return false;
}
if (oldDocument.containsKey(DistributedUpdateProcessor.VERSION_FIELD) == false) {
throw new SolrException (ErrorCode.INVALID_STATE, "There is no _version_ in previous document. id=" +
cmd.getPrintableId());
}
Long oldVersion = (Long) oldDocument.remove(DistributedUpdateProcessor.VERSION_FIELD).getValue();
// If the oldDocument contains any other field apart from updatedFields (or id/version field), then remove them.
// This can happen, despite requesting for these fields in the call to RTGC.getInputDocument, if the document was
// fetched from the tlog and had all these fields (possibly because it was a full document ADD operation).
if (updatedFields != null) {
Collection<String> names = new HashSet<String>(oldDocument.getFieldNames());
for (String fieldName: names) {
if (fieldName.equals(DistributedUpdateProcessor.VERSION_FIELD)==false && fieldName.equals("id")==false && updatedFields.contains(fieldName)==false) {
oldDocument.remove(fieldName);
}
}
}
// Copy over all supported DVs from oldDocument to partialDoc
//
// Assuming multiple updates to the same doc: field 'dv1' in one update, then field 'dv2' in a second
// update, and then again 'dv1' in a third update (without commits in between), the last update would
// fetch from the tlog the partial doc for the 2nd (dv2) update. If that doc doesn't copy over the
// previous updates to dv1 as well, then a full resolution (by following previous pointers) would
// need to be done to calculate the dv1 value -- so instead copy all the potentially affected DV fields.
SolrInputDocument partialDoc = new SolrInputDocument();
String uniqueKeyField = schema.getUniqueKeyField().getName();
for (String fieldName : oldDocument.getFieldNames()) {
SchemaField schemaField = schema.getField(fieldName);
if (fieldName.equals(uniqueKeyField) || isSupportedFieldForInPlaceUpdate(schemaField)) {
partialDoc.addField(fieldName, oldDocument.getFieldValue(fieldName));
}
}
merge(inputDoc, partialDoc);
// Populate the id field if not already populated (this can happen since stored fields were avoided during fetch from RTGC)
if (!partialDoc.containsKey(schema.getUniqueKeyField().getName())) {
partialDoc.addField(idField.getName(),
inputDoc.getField(schema.getUniqueKeyField().getName()).getFirstValue());
}
cmd.prevVersion = oldVersion;
cmd.solrDoc = partialDoc;
return true;
}
protected void doSet(SolrInputDocument toDoc, SolrInputField sif, Object fieldVal) {
SchemaField sf = schema.getField(sif.getName());
toDoc.setField(sif.getName(), sf.getType().toNativeType(fieldVal), sif.getBoost());

View File

@ -36,7 +36,13 @@ import java.util.concurrent.locks.ReentrantLock;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRefBuilder;
import org.apache.solr.client.solrj.SolrRequest;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.SolrRequest.METHOD;
import org.apache.solr.client.solrj.impl.HttpSolrClient;
import org.apache.solr.client.solrj.request.GenericSolrRequest;
import org.apache.solr.client.solrj.request.UpdateRequest;
import org.apache.solr.client.solrj.response.SimpleSolrResponse;
import org.apache.solr.cloud.CloudDescriptor;
import org.apache.solr.cloud.DistributedQueue;
import org.apache.solr.cloud.Overseer;
@ -82,9 +88,11 @@ import org.apache.solr.update.SolrIndexSplitter;
import org.apache.solr.update.UpdateCommand;
import org.apache.solr.update.UpdateHandler;
import org.apache.solr.update.UpdateLog;
import org.apache.solr.update.UpdateShardHandler;
import org.apache.solr.update.VersionBucket;
import org.apache.solr.update.VersionInfo;
import org.apache.solr.util.TestInjection;
import org.apache.solr.util.TimeOut;
import org.apache.zookeeper.KeeperException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -98,6 +106,7 @@ public class DistributedUpdateProcessor extends UpdateRequestProcessor {
public static final String DISTRIB_FROM_COLLECTION = "distrib.from.collection";
public static final String DISTRIB_FROM_PARENT = "distrib.from.parent";
public static final String DISTRIB_FROM = "distrib.from";
public static final String DISTRIB_INPLACE_PREVVERSION = "distrib.inplace.prevversion";
private static final String TEST_DISTRIB_SKIP_SERVERS = "test.distrib.skip.servers";
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
@ -727,6 +736,10 @@ public class DistributedUpdateProcessor extends UpdateRequestProcessor {
}
}
// If we were sent a previous version, set this to the AddUpdateCommand (if not already set)
if (!cmd.isInPlaceUpdate()) {
cmd.prevVersion = cmd.getReq().getParams().getLong(DistributedUpdateProcessor.DISTRIB_INPLACE_PREVVERSION, -1);
}
// TODO: if minRf > 1 and we know the leader is the only active replica, we could fail
// the request right here but for now I think it is better to just return the status
// to the client that the minRf wasn't reached and let them handle it
@ -784,6 +797,9 @@ public class DistributedUpdateProcessor extends UpdateRequestProcessor {
if (replicationTracker != null && minRf > 1)
params.set(UpdateRequest.MIN_REPFACT, String.valueOf(minRf));
if (cmd.isInPlaceUpdate()) {
params.set(DISTRIB_INPLACE_PREVVERSION, String.valueOf(cmd.prevVersion));
}
cmdDistrib.distribAdd(cmd, nodes, params, false, replicationTracker);
}
@ -1011,9 +1027,21 @@ public class DistributedUpdateProcessor extends UpdateRequestProcessor {
VersionBucket bucket = vinfo.bucket(bucketHash);
long dependentVersionFound = -1; // Last found version for a dependent update; applicable only for in-place updates; useful for logging later
// if this is an inplace update, check and wait if we should be waiting for a dependent update, before
// entering the synchronized block
if (!leaderLogic && cmd.isInPlaceUpdate()) {
dependentVersionFound = waitForDependentUpdates(cmd, versionOnUpdate, isReplayOrPeersync, bucket);
if (dependentVersionFound == -1) {
// it means in leader, the document has been deleted by now. drop this update
return true;
}
}
vinfo.lockForUpdate();
try {
synchronized (bucket) {
bucket.notifyAll(); //just in case anyone is waiting let them know that we have a new update
// we obtain the version when synchronized and then do the add so we can ensure that
// if version1 < version2 then version1 is actually added before version2.
@ -1078,23 +1106,69 @@ public class DistributedUpdateProcessor extends UpdateRequestProcessor {
return true;
}
// if we aren't the leader, then we need to check that updates were not re-ordered
if (bucketVersion != 0 && bucketVersion < versionOnUpdate) {
// we're OK... this update has a version higher than anything we've seen
// in this bucket so far, so we know that no reordering has yet occurred.
bucket.updateHighest(versionOnUpdate);
} else {
// there have been updates higher than the current update. we need to check
// the specific version for this id.
if (cmd.isInPlaceUpdate()) {
long prev = cmd.prevVersion;
Long lastVersion = vinfo.lookupVersion(cmd.getIndexedId());
if (lastVersion != null && Math.abs(lastVersion) >= versionOnUpdate) {
// This update is a repeat, or was reordered. We need to drop this update.
log.debug("Dropping add update due to version {}", idBytes.utf8ToString());
return true;
}
if (lastVersion == null || Math.abs(lastVersion) < prev) {
// this was checked for (in waitForDependentUpdates()) before entering the synchronized block.
// So we shouldn't be here, unless what must've happened is:
// by the time synchronization block was entered, the prev update was deleted by DBQ. Since
// now that update is not in index, the vinfo.lookupVersion() is possibly giving us a version
// from the deleted list (which might be older than the prev update!)
UpdateCommand fetchedFromLeader = fetchFullUpdateFromLeader(cmd, versionOnUpdate);
// also need to re-apply newer deleteByQuery commands
checkDeleteByQueries = true;
if (fetchedFromLeader instanceof DeleteUpdateCommand) {
log.info("In-place update of {} failed to find valid lastVersion to apply to, and the document"
+ " was deleted at the leader subsequently.", idBytes.utf8ToString());
versionDelete((DeleteUpdateCommand)fetchedFromLeader);
return true;
} else {
assert fetchedFromLeader instanceof AddUpdateCommand;
// Newer document was fetched from the leader. Apply that document instead of this current in-place update.
log.info("In-place update of {} failed to find valid lastVersion to apply to, forced to fetch full doc from leader: {}",
idBytes.utf8ToString(), (fetchedFromLeader == null? null: ((AddUpdateCommand)fetchedFromLeader).solrDoc));
// Make this update to become a non-inplace update containing the full document obtained from the leader
cmd.solrDoc = ((AddUpdateCommand)fetchedFromLeader).solrDoc;
cmd.prevVersion = -1;
cmd.setVersion((long)cmd.solrDoc.getFieldValue(VERSION_FIELD));
assert cmd.isInPlaceUpdate() == false;
}
} else {
if (lastVersion != null && Math.abs(lastVersion) > prev) {
// this means we got a newer full doc update and in that case it makes no sense to apply the older
// inplace update. Drop this update
log.info("Update was applied on version: " + prev + ", but last version I have is: " + lastVersion
+ ". Dropping current update.");
return true;
} else {
// We're good, we should apply this update. First, update the bucket's highest.
if (bucketVersion != 0 && bucketVersion < versionOnUpdate) {
bucket.updateHighest(versionOnUpdate);
}
}
}
}
if (!cmd.isInPlaceUpdate()) {
// if we aren't the leader, then we need to check that updates were not re-ordered
if (bucketVersion != 0 && bucketVersion < versionOnUpdate) {
// we're OK... this update has a version higher than anything we've seen
// in this bucket so far, so we know that no reordering has yet occurred.
bucket.updateHighest(versionOnUpdate);
} else {
// there have been updates higher than the current update. we need to check
// the specific version for this id.
Long lastVersion = vinfo.lookupVersion(cmd.getIndexedId());
if (lastVersion != null && Math.abs(lastVersion) >= versionOnUpdate) {
// This update is a repeat, or was reordered. We need to drop this update.
log.debug("Dropping add update due to version {}", idBytes.utf8ToString());
return true;
}
// also need to re-apply newer deleteByQuery commands
checkDeleteByQueries = true;
}
}
}
}
@ -1120,11 +1194,161 @@ public class DistributedUpdateProcessor extends UpdateRequestProcessor {
return false;
}
/**
* This method checks the update/transaction logs and index to find out if the update ("previous update") that the current update
* depends on (in the case that this current update is an in-place update) has already been completed. If not,
* this method will wait for the missing update until it has arrived. If it doesn't arrive within a timeout threshold,
* then this actively fetches from the leader.
*
* @return -1 if the current in-place should be dropped, or last found version if previous update has been indexed.
*/
private long waitForDependentUpdates(AddUpdateCommand cmd, long versionOnUpdate,
boolean isReplayOrPeersync, VersionBucket bucket) throws IOException {
long lastFoundVersion = 0;
TimeOut waitTimeout = new TimeOut(5, TimeUnit.SECONDS);
vinfo.lockForUpdate();
try {
synchronized (bucket) {
Long lookedUpVersion = vinfo.lookupVersion(cmd.getIndexedId());
lastFoundVersion = lookedUpVersion == null ? 0L: lookedUpVersion;
if (Math.abs(lastFoundVersion) < cmd.prevVersion) {
log.debug("Re-ordered inplace update. version={}, prevVersion={}, lastVersion={}, replayOrPeerSync={}, id={}",
(cmd.getVersion() == 0 ? versionOnUpdate : cmd.getVersion()), cmd.prevVersion, lastFoundVersion, isReplayOrPeersync, cmd.getPrintableId());
}
while (Math.abs(lastFoundVersion) < cmd.prevVersion && !waitTimeout.hasTimedOut()) {
try {
long timeLeft = waitTimeout.timeLeft(TimeUnit.MILLISECONDS);
if (timeLeft > 0) { // wait(0) waits forever until notified, but we don't want that.
bucket.wait(timeLeft);
}
} catch (InterruptedException ie) {
throw new RuntimeException(ie);
}
lookedUpVersion = vinfo.lookupVersion(cmd.getIndexedId());
lastFoundVersion = lookedUpVersion == null ? 0L: lookedUpVersion;
}
}
} finally {
vinfo.unlockForUpdate();
}
if (Math.abs(lastFoundVersion) > cmd.prevVersion) {
// This must've been the case due to a higher version full update succeeding concurrently, while we were waiting or
// trying to index this partial update. Since a full update more recent than this partial update has succeeded,
// we can drop the current update.
if (log.isDebugEnabled()) {
log.debug("Update was applied on version: {}, but last version I have is: {}"
+ ". Current update should be dropped. id={}", cmd.prevVersion, lastFoundVersion, cmd.getPrintableId());
}
return -1;
} else if (Math.abs(lastFoundVersion) == cmd.prevVersion) {
assert 0 < lastFoundVersion : "prevVersion " + cmd.prevVersion + " found but is a delete!";
if (log.isDebugEnabled()) {
log.debug("Dependent update found. id={}", cmd.getPrintableId());
}
return lastFoundVersion;
}
// We have waited enough, but dependent update didn't arrive. Its time to actively fetch it from leader
log.info("Missing update, on which current in-place update depends on, hasn't arrived. id={}, looking for version={}, last found version={}",
cmd.getPrintableId(), cmd.prevVersion, lastFoundVersion);
UpdateCommand missingUpdate = fetchFullUpdateFromLeader(cmd, versionOnUpdate);
if (missingUpdate instanceof DeleteUpdateCommand) {
log.info("Tried to fetch document {} from the leader, but the leader says document has been deleted. "
+ "Deleting the document here and skipping this update: Last found version: {}, was looking for: {}", cmd.getPrintableId(), lastFoundVersion, cmd.prevVersion);
versionDelete((DeleteUpdateCommand)missingUpdate);
return -1;
} else {
assert missingUpdate instanceof AddUpdateCommand;
log.info("Fetched the document: {}", ((AddUpdateCommand)missingUpdate).getSolrInputDocument());
versionAdd((AddUpdateCommand)missingUpdate);
log.info("Added the fetched document, id="+((AddUpdateCommand)missingUpdate).getPrintableId()+", version="+missingUpdate.getVersion());
}
return missingUpdate.getVersion();
}
/**
* This method is used when an update on which a particular in-place update has been lost for some reason. This method
* sends a request to the shard leader to fetch the latest full document as seen on the leader.
* @return AddUpdateCommand containing latest full doc at shard leader for the given id, or null if not found.
*/
private UpdateCommand fetchFullUpdateFromLeader(AddUpdateCommand inplaceAdd, long versionOnUpdate) throws IOException {
String id = inplaceAdd.getPrintableId();
UpdateShardHandler updateShardHandler = inplaceAdd.getReq().getCore().getCoreDescriptor().getCoreContainer().getUpdateShardHandler();
ModifiableSolrParams params = new ModifiableSolrParams();
params.set("distrib", false);
params.set("getInputDocument", id);
params.set("onlyIfActive", true);
SolrRequest<SimpleSolrResponse> ur = new GenericSolrRequest(METHOD.GET, "/get", params);
String leaderUrl = req.getParams().get(DISTRIB_FROM);
if (leaderUrl == null) {
// An update we're dependent upon didn't arrive! This is unexpected. Perhaps likely our leader is
// down or partitioned from us for some reason. Lets force refresh cluster state, and request the
// leader for the update.
if (zkController == null) { // we should be in cloud mode, but wtf? could be a unit test
throw new SolrException(ErrorCode.SERVER_ERROR, "Can't find document with id=" + id + ", but fetching from leader "
+ "failed since we're not in cloud mode.");
}
Replica leader;
try {
leader = zkController.getZkStateReader().getLeaderRetry(cloudDesc.getCollectionName(), cloudDesc.getShardId());
} catch (InterruptedException e) {
throw new SolrException(ErrorCode.SERVER_ERROR, "Exception during fetching from leader.", e);
}
leaderUrl = leader.getCoreUrl();
}
HttpSolrClient hsc = new HttpSolrClient.Builder(leaderUrl).
withHttpClient(updateShardHandler.getHttpClient()).build();
NamedList rsp = null;
try {
rsp = hsc.request(ur);
} catch (SolrServerException e) {
throw new SolrException(ErrorCode.SERVER_ERROR, "Error during fetching [" + id +
"] from leader (" + leaderUrl + "): ", e);
} finally {
hsc.close();
}
Object inputDocObj = rsp.get("inputDocument");
Long version = (Long)rsp.get("version");
SolrInputDocument leaderDoc = (SolrInputDocument) inputDocObj;
if (leaderDoc == null) {
// this doc was not found (deleted) on the leader. Lets delete it here as well.
DeleteUpdateCommand del = new DeleteUpdateCommand(inplaceAdd.getReq());
del.setIndexedId(inplaceAdd.getIndexedId());
del.setId(inplaceAdd.getIndexedId().utf8ToString());
del.setVersion((version == null || version == 0)? -versionOnUpdate: version);
return del;
}
AddUpdateCommand cmd = new AddUpdateCommand(req);
cmd.solrDoc = leaderDoc;
cmd.setVersion((long)leaderDoc.getFieldValue(VERSION_FIELD));
return cmd;
}
// TODO: may want to switch to using optimistic locking in the future for better concurrency
// that's why this code is here... need to retry in a loop closely around/in versionAdd
boolean getUpdatedDocument(AddUpdateCommand cmd, long versionOnUpdate) throws IOException {
if (!AtomicUpdateDocumentMerger.isAtomicUpdate(cmd)) return false;
Set<String> inPlaceUpdatedFields = AtomicUpdateDocumentMerger.computeInPlaceUpdatableFields(cmd);
if (inPlaceUpdatedFields.size() > 0) { // non-empty means this is suitable for in-place updates
if (docMerger.doInPlaceUpdateMerge(cmd, inPlaceUpdatedFields)) {
return true;
} else {
// in-place update failed, so fall through and re-try the same with a full atomic update
}
}
// full (non-inplace) atomic update
SolrInputDocument sdoc = cmd.getSolrInputDocument();
BytesRef id = cmd.getIndexedId();
SolrInputDocument oldDoc = RealTimeGetComponent.getInputDocument(cmd.getReq().getCore(), id);

View File

@ -261,7 +261,7 @@ public class DocBasedVersionConstraintsProcessorFactory extends UpdateRequestPro
SolrInputDocument oldDoc = null;
if (useFieldCache) {
oldDoc = RealTimeGetComponent.getInputDocumentFromTlog(core, indexedDocId);
oldDoc = RealTimeGetComponent.getInputDocumentFromTlog(core, indexedDocId, null, null, true);
if (oldDoc == RealTimeGetComponent.DELETED) {
return true;
}

View File

@ -34,6 +34,7 @@ import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.Collections;
import static org.apache.solr.common.SolrException.ErrorCode.SERVER_ERROR;
import static org.apache.solr.update.processor.DistributingUpdateProcessorFactory.DISTRIB_UPDATE_PARAM;
@ -183,7 +184,9 @@ public class SkipExistingDocumentsProcessorFactory extends UpdateRequestProcesso
boolean doesDocumentExist(BytesRef indexedDocId) throws IOException {
assert null != indexedDocId;
SolrInputDocument oldDoc = RealTimeGetComponent.getInputDocumentFromTlog(core, indexedDocId);
// we don't need any fields populated, we just need to know if the doc is in the tlog...
SolrInputDocument oldDoc = RealTimeGetComponent.getInputDocumentFromTlog(core, indexedDocId, null,
Collections.<String>emptySet(), false);
if (oldDoc == RealTimeGetComponent.DELETED) {
return false;
}

View File

@ -0,0 +1,67 @@
<?xml version="1.0" encoding="UTF-8" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<schema name="inplace-updates" version="1.6">
<uniqueKey>id</uniqueKey>
<field name="id" type="string" indexed="true" stored="true" docValues="true"/>
<field name="_version_" type="long" indexed="false" stored="false" docValues="true" />
<!-- specific schema fields for dv in-place updates -->
<field name="inplace_updatable_float" type="float" indexed="false" stored="false" docValues="true" />
<field name="inplace_updatable_int" type="int" indexed="false" stored="false" docValues="true" />
<field name="inplace_updatable_float_with_default"
type="float" indexed="false" stored="false" docValues="true" default="42.0"/>
<field name="inplace_updatable_int_with_default"
type="int" indexed="false" stored="false" docValues="true" default="666"/>
<!-- dynamic fields which *ONLY* use docValues so they can be updated in place -->
<dynamicField name="*_i_dvo" multiValued="false" type="int" docValues="true" indexed="false" stored="false"/>
<dynamicField name="*_f_dvo" multiValued="false" type="float" docValues="true" indexed="false" stored="false"/>
<dynamicField name="*_l_dvo" multiValued="false" type="long" docValues="true" indexed="false" stored="false"/>
<!-- dynamic fields that must *NOT* support in place updates -->
<dynamicField name="*_s" type="string" indexed="true" stored="true"/>
<dynamicField name="*_i" type="int" indexed="true" stored="true" docValues="true"/>
<dynamicField name="*_l" type="long" indexed="true" stored="true" docValues="true"/>
<!-- Copy fields -->
<!-- The id field has a non in-place updatable copy target, but in-place updates should still work. -->
<copyField source="id" dest="id_field_copy_that_does_not_support_in_place_update_s"/>
<!-- copyfield1: src and dest are both updatable -->
<field name="copyfield1_src__both_updatable" type="int" indexed="false" stored="false" docValues="true" />
<copyField source="copyfield1_src__both_updatable" dest="copyfield1_dest__both_updatable_i_dvo"/>
<!-- copyfield2: src is updatable but dest is not -->
<field name="copyfield2_src__only_src_updatable" type="int" indexed="false" stored="false" docValues="true" />
<copyField source="copyfield2_src__only_src_updatable" dest="copyfield2_dest__only_src_updatable_i"/>
<!-- cruft needed by the solrconfig used in our tests for startup, but not used in the tests -->
<field name="signatureField" type="string" indexed="true" stored="false"/>
<dynamicField name="*_sS" type="string" indexed="true" stored="true"/>
<fieldType name="string" class="solr.StrField" multiValued="false" indexed="false" stored="false" docValues="false" />
<fieldType name="long" class="solr.${solr.tests.longClassName}" multiValued="false" indexed="false" stored="false" docValues="false"/>
<fieldType name="float" class="solr.${solr.tests.floatClassName}" multiValued="false" indexed="false" stored="false" docValues="false"/>
<fieldType name="int" class="solr.${solr.tests.intClassName}" multiValued="false" indexed="false" stored="false" docValues="false"/>
</schema>

View File

@ -572,6 +572,8 @@
<field name="timestamp" type="date" indexed="true" stored="true" docValues="true" default="NOW" multiValued="false"/>
<field name="multiDefault" type="string" indexed="true" stored="true" default="muLti-Default" multiValued="true"/>
<field name="intDefault" type="int" indexed="true" stored="true" default="42" multiValued="false"/>
<field name="intDvoDefault" type="int" indexed="false" stored="false" multiValued="false"
useDocValuesAsStored="true" docValues="true" default="42" />
<field name="intRemove" type="int" indexed="true" stored="true" multiValued="true"/>
<field name="dateRemove" type="date" indexed="true" stored="true" multiValued="true"/>
<field name="floatRemove" type="float" indexed="true" stored="true" multiValued="true"/>
@ -580,7 +582,7 @@
<field name="tlong" type="tlong" indexed="true" stored="true"/>
<field name="_version_" type="long" indexed="true" stored="true" multiValued="false"/>
<field name="_version_" type="long" indexed="false" stored="false" docValues="true" multiValued="false" useDocValuesAsStored="true"/>
<field name="title_stringNoNorms" type="string" omitNorms="true" indexed="true" stored="true"/>
@ -685,15 +687,15 @@
<dynamicField name="*_f1_dv" type="${solr.tests.floatClass:pfloat}" indexed="true" stored="true" docValues="true" multiValued="false"/>
<!-- Non-stored, DocValues=true -->
<dynamicField name="*_i_dvo" multiValued="false" type="${solr.tests.intClass:pint}" docValues="true" indexed="true" stored="false"
<dynamicField name="*_i_dvo" multiValued="false" type="${solr.tests.intClass:pint}" docValues="true" indexed="false" stored="false"
useDocValuesAsStored="true"/>
<dynamicField name="*_d_dvo" multiValued="false" type="${solr.tests.doubleClass:pdouble}" docValues="true" indexed="true" stored="false"
<dynamicField name="*_d_dvo" multiValued="false" type="${solr.tests.doubleClass:pdouble}" docValues="true" indexed="false" stored="false"
useDocValuesAsStored="true"/>
<dynamicField name="*_s_dvo" multiValued="false" type="string" docValues="true" indexed="true" stored="false"
<dynamicField name="*_s_dvo" multiValued="false" type="string" docValues="true" indexed="false" stored="false"
useDocValuesAsStored="true"/>
<dynamicField name="*_ii_dvo" multiValued="true" type="int" docValues="true" indexed="true" stored="false"
<dynamicField name="*_ii_dvo" multiValued="true" type="int" docValues="true" indexed="false" stored="false"
useDocValuesAsStored="true"/>
<dynamicField name="*_dd_dvo" multiValued="true" type="double" docValues="true" indexed="true" stored="false"
<dynamicField name="*_dd_dvo" multiValued="true" type="double" docValues="true" indexed="false" stored="false"
useDocValuesAsStored="true"/>
<!-- Non-stored, DocValues=true, useDocValuesAsStored=false -->

View File

@ -529,7 +529,7 @@
<field name="copyfield_source" type="string" indexed="true" stored="true" multiValued="true"/>
<!-- for versioning -->
<field name="_version_" type="long" indexed="true" stored="true"/>
<field name="_version_" type="long" indexed="false" stored="false" docValues="true"/>
<!-- points to the root document of a block of nested documents -->
<field name="_root_" type="string" indexed="true" stored="true"/>
@ -545,6 +545,11 @@
<dynamicField name="tv_mv_*" type="text" indexed="true" stored="true" multiValued="true"
termVectors="true" termPositions="true" termOffsets="true"/>
<!-- for in-place updates -->
<dynamicField name="*_i_dvo" multiValued="false" type="int" docValues="true" indexed="false" stored="false"/>
<dynamicField name="*_f_dvo" multiValued="false" type="float" docValues="true" indexed="false" stored="false"/>
<dynamicField name="*_l_dvo" multiValued="false" type="long" docValues="true" indexed="false" stored="false"/>
<dynamicField name="*_mfacet" type="string" indexed="true" stored="false" multiValued="true"/>
<dynamicField name="*_sw" type="text_sw" indexed="true" stored="true" multiValued="true"/>

View File

@ -26,8 +26,9 @@
<mergePolicyFactory class="org.apache.solr.index.SortingMergePolicyFactory">
<str name="wrapped.prefix">in</str>
<str name="in.class">org.apache.solr.util.RandomForceMergePolicyFactory</str>
<str name="sort">timestamp desc</str>
<str name="sort">timestamp_i_dvo desc</str>
</mergePolicyFactory>
<lockType>${solr.tests.lockType:single}</lockType>
</indexConfig>
<requestHandler name="standard" class="solr.StandardRequestHandler"></requestHandler>

View File

@ -17,8 +17,6 @@
package org.apache.solr.cloud;
import java.time.ZoneOffset;
import java.time.ZonedDateTime;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
@ -37,9 +35,13 @@ import org.apache.solr.response.SolrQueryResponse;
class SegmentTerminateEarlyTestState {
final String keyField = "id";
final String timestampField = "timestamp";
final String oddField = "odd_l1"; // <dynamicField name="*_l1" type="long" indexed="true" stored="true" multiValued="false"/>
final String quadField = "quad_l1"; // <dynamicField name="*_l1" type="long" indexed="true" stored="true" multiValued="false"/>
// for historic reasons, this is refered to as a "timestamp" field, but in actuallity is just an int
// value representing a number of "minutes" between 0-60.
// aka: I decided not to rename a million things while refactoring this test
public static final String timestampField = "timestamp_i_dvo";
public static final String oddField = "odd_l1"; // <dynamicField name="*_l1" type="long" indexed="true" stored="true" multiValued="false"/>
public static final String quadField = "quad_l1"; // <dynamicField name="*_l1" type="long" indexed="true" stored="true" multiValued="false"/>
final Set<Integer> minTimestampDocKeys = new HashSet<>();
final Set<Integer> maxTimestampDocKeys = new HashSet<>();
@ -77,7 +79,7 @@ class SegmentTerminateEarlyTestState {
maxTimestampMM = new Integer(MM);
maxTimestampDocKeys.add(docKey);
}
doc.setField(timestampField, ZonedDateTime.of(2016, 1, 1, 0, MM, 0, 0, ZoneOffset.UTC).toInstant().toString());
doc.setField(timestampField, (Integer)MM);
doc.setField(oddField, ""+(numDocs % 2));
doc.setField(quadField, ""+(numDocs % 4)+1);
cloudSolrClient.add(doc);

View File

@ -17,19 +17,26 @@
package org.apache.solr.cloud;
import java.lang.invoke.MethodHandles;
import java.nio.file.Paths;
import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.index.TieredMergePolicy;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.lucene.util.TestUtil;
import org.apache.solr.client.solrj.impl.CloudSolrClient;
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
import org.apache.solr.client.solrj.request.schema.SchemaRequest.Field;
import org.apache.solr.client.solrj.response.RequestStatusState;
import org.apache.solr.common.cloud.ZkStateReader;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.core.CoreDescriptor;
import org.apache.solr.index.TieredMergePolicyFactory;
import org.junit.After;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Rule;
import org.junit.rules.TestName;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -40,77 +47,54 @@ public class TestSegmentSorting extends SolrCloudTestCase {
private static final int NUM_SERVERS = 5;
private static final int NUM_SHARDS = 2;
private static final int REPLICATION_FACTOR = 2;
private static final String configName = MethodHandles.lookup().lookupClass() + "_configSet";
@BeforeClass
public static void setupCluster() throws Exception {
configureCluster(NUM_SERVERS).configure();
configureCluster(NUM_SERVERS)
.addConfig(configName, Paths.get(TEST_HOME(), "collection1", "conf"))
.configure();
}
@Rule public TestName testName = new TestName();
@After
public void ensureClusterEmpty() throws Exception {
cluster.deleteAllCollections();
cluster.getSolrClient().setDefaultCollection(null);
}
private void createCollection(MiniSolrCloudCluster miniCluster, String collectionName, String createNodeSet, String asyncId,
Boolean indexToPersist, Map<String,String> collectionProperties) throws Exception {
String configName = "solrCloudCollectionConfig";
miniCluster.uploadConfigSet(SolrTestCaseJ4.TEST_PATH().resolve("collection1").resolve("conf"), configName);
@Before
public void createCollection() throws Exception {
final String collectionName = testName.getMethodName();
final CloudSolrClient cloudSolrClient = cluster.getSolrClient();
final Map<String, String> collectionProperties = new HashMap<>();
collectionProperties.put(CoreDescriptor.CORE_CONFIG, "solrconfig-sortingmergepolicyfactory.xml");
CollectionAdminRequest.Create cmd =
CollectionAdminRequest.createCollection(collectionName, configName,
NUM_SHARDS, REPLICATION_FACTOR)
.setProperties(collectionProperties);
final boolean persistIndex = (indexToPersist != null ? indexToPersist.booleanValue() : random().nextBoolean());
if (collectionProperties == null) {
collectionProperties = new HashMap<>();
}
collectionProperties.putIfAbsent(CoreDescriptor.CORE_CONFIG, "solrconfig-tlog.xml");
collectionProperties.putIfAbsent("solr.tests.maxBufferedDocs", "100000");
collectionProperties.putIfAbsent("solr.tests.ramBufferSizeMB", "100");
// use non-test classes so RandomizedRunner isn't necessary
if (random().nextBoolean()) {
collectionProperties.putIfAbsent(SolrTestCaseJ4.SYSTEM_PROPERTY_SOLR_TESTS_MERGEPOLICY, TieredMergePolicy.class.getName());
collectionProperties.putIfAbsent(SolrTestCaseJ4.SYSTEM_PROPERTY_SOLR_TESTS_USEMERGEPOLICY, "true");
collectionProperties.putIfAbsent(SolrTestCaseJ4.SYSTEM_PROPERTY_SOLR_TESTS_USEMERGEPOLICYFACTORY, "false");
} else {
collectionProperties.putIfAbsent(SolrTestCaseJ4.SYSTEM_PROPERTY_SOLR_TESTS_MERGEPOLICYFACTORY, TieredMergePolicyFactory.class.getName());
collectionProperties.putIfAbsent(SolrTestCaseJ4.SYSTEM_PROPERTY_SOLR_TESTS_USEMERGEPOLICYFACTORY, "true");
collectionProperties.putIfAbsent(SolrTestCaseJ4.SYSTEM_PROPERTY_SOLR_TESTS_USEMERGEPOLICY, "false");
assertTrue( cmd.process(cloudSolrClient).isSuccess() );
} else { // async
assertEquals(RequestStatusState.COMPLETED, cmd.processAndWait(cloudSolrClient, 30));
}
collectionProperties.putIfAbsent("solr.tests.mergeScheduler", "org.apache.lucene.index.ConcurrentMergeScheduler");
collectionProperties.putIfAbsent("solr.directoryFactory", (persistIndex ? "solr.StandardDirectoryFactory" : "solr.RAMDirectoryFactory"));
if (asyncId == null) {
CollectionAdminRequest.createCollection(collectionName, configName, NUM_SHARDS, REPLICATION_FACTOR)
.setCreateNodeSet(createNodeSet)
.setProperties(collectionProperties)
.process(miniCluster.getSolrClient());
}
else {
CollectionAdminRequest.createCollection(collectionName, configName, NUM_SHARDS, REPLICATION_FACTOR)
.setCreateNodeSet(createNodeSet)
.setProperties(collectionProperties)
.processAndWait(miniCluster.getSolrClient(), 30);
}
ZkStateReader zkStateReader = cloudSolrClient.getZkStateReader();
AbstractDistribZkTestBase.waitForRecoveriesToFinish(collectionName, zkStateReader, true, true, 330);
cloudSolrClient.setDefaultCollection(collectionName);
}
public void testSegmentTerminateEarly() throws Exception {
final String collectionName = "testSegmentTerminateEarlyCollection";
final SegmentTerminateEarlyTestState tstes = new SegmentTerminateEarlyTestState(random());
final CloudSolrClient cloudSolrClient = cluster.getSolrClient();
cloudSolrClient.setDefaultCollection(collectionName);
// create collection
{
final String asyncId = (random().nextBoolean() ? null : "asyncId("+collectionName+".create)="+random().nextInt());
final Map<String, String> collectionProperties = new HashMap<>();
collectionProperties.put(CoreDescriptor.CORE_CONFIG, "solrconfig-sortingmergepolicyfactory.xml");
createCollection(cluster, collectionName, null, asyncId, Boolean.TRUE, collectionProperties);
}
ZkStateReader zkStateReader = cloudSolrClient.getZkStateReader();
AbstractDistribZkTestBase.waitForRecoveriesToFinish(collectionName, zkStateReader, true, true, 330);
// add some documents, then optimize to get merged-sorted segments
tstes.addDocuments(cloudSolrClient, 10, 10, true);
@ -130,4 +114,71 @@ public class TestSegmentSorting extends SolrCloudTestCase {
tstes.queryTimestampAscendingSegmentTerminateEarlyYes(cloudSolrClient); // uses a sort order that is _not_ compatible with the merge sort order
}
/**
* Verify that atomic updates against our (DVO) segment sort field doesn't cause errors.
* In this situation, the updates should *NOT* be done inplace, because that would
* break the index sorting
*/
public void testAtomicUpdateOfSegmentSortField() throws Exception {
final CloudSolrClient cloudSolrClient = cluster.getSolrClient();
final String updateField = SegmentTerminateEarlyTestState.timestampField;
// sanity check that updateField is in fact a DocValues only field, meaning it
// would normally be eligable for inplace updates -- if it weren't also used for merge sorting
final Map<String,Object> schemaOpts
= new Field(updateField, params("includeDynamic", "true",
"showDefaults","true")).process(cloudSolrClient).getField();
assertEquals(true, schemaOpts.get("docValues"));
assertEquals(false, schemaOpts.get("indexed"));
assertEquals(false, schemaOpts.get("stored"));
// add some documents
final int numDocs = atLeast(1000);
for (int id = 1; id <= numDocs; id++) {
cloudSolrClient.add(sdoc("id", id, updateField, random().nextInt(60)));
}
cloudSolrClient.commit();
// do some random iterations of replacing docs, atomic updates against segment sort field, and commits
// (at this point we're just sanity checking no serious failures)
for (int iter = 0; iter < 20; iter++) {
final int iterSize = atLeast(20);
for (int i = 0; i < iterSize; i++) {
// replace
cloudSolrClient.add(sdoc("id", TestUtil.nextInt(random(), 1, numDocs),
updateField, random().nextInt(60)));
// atomic update
cloudSolrClient.add(sdoc("id", TestUtil.nextInt(random(), 1, numDocs),
updateField, map("set", random().nextInt(60))));
}
cloudSolrClient.commit();
}
// pick a random doc, and verify that doing an atomic update causes the docid to change
// ie: not an inplace update
final int id = TestUtil.nextInt(random(), 1, numDocs);
final int oldDocId = (Integer) cloudSolrClient.getById(""+id, params("fl","[docid]")).get("[docid]");
cloudSolrClient.add(sdoc("id", id, updateField, map("inc","666")));
cloudSolrClient.commit();
// loop incase we're waiting for a newSearcher to be opened
int newDocId = -1;
int attempts = 10;
while ((newDocId < 0) && (0 < attempts--)) {
SolrDocumentList docs = cloudSolrClient.query(params("q", "id:"+id,
"fl","[docid]",
"fq", updateField + "[666 TO *]")).getResults();
if (0 < docs.size()) {
newDocId = (Integer)docs.get(0).get("[docid]");
} else {
Thread.sleep(50);
}
}
assertTrue(oldDocId != newDocId);
}
}

View File

@ -0,0 +1,612 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.cloud;
import java.lang.invoke.MethodHandles;
import java.util.Arrays;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Random;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicLong;
import org.apache.commons.math3.primes.Primes;
import org.apache.lucene.util.LuceneTestCase.Slow;
import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.impl.HttpSolrClient;
import org.apache.solr.client.solrj.request.UpdateRequest;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.client.solrj.response.UpdateResponse;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.cloud.ClusterState;
import org.apache.solr.common.cloud.Replica;
import org.apache.solr.common.cloud.Slice;
import org.apache.solr.common.cloud.ZkStateReader;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.zookeeper.KeeperException;
import org.junit.After;
import org.junit.BeforeClass;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@Slow
public class TestStressInPlaceUpdates extends AbstractFullDistribZkTestBase {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
@BeforeClass
public static void beforeSuperClass() throws Exception {
System.setProperty("solr.tests.intClassName", random().nextBoolean()? "TrieIntField": "IntPointField");
System.setProperty("solr.tests.longClassName", random().nextBoolean()? "TrieLongField": "LongPointField");
System.setProperty("solr.tests.floatClassName", random().nextBoolean()? "TrieFloatField": "FloatPointField");
System.setProperty("solr.tests.doubleClassName", random().nextBoolean()? "TrieDoubleField": "DoublePointField");
schemaString = "schema-inplace-updates.xml";
configString = "solrconfig-tlog.xml";
// sanity check that autocommits are disabled
initCore(configString, schemaString);
assertEquals(-1, h.getCore().getSolrConfig().getUpdateHandlerInfo().autoCommmitMaxTime);
assertEquals(-1, h.getCore().getSolrConfig().getUpdateHandlerInfo().autoSoftCommmitMaxTime);
assertEquals(-1, h.getCore().getSolrConfig().getUpdateHandlerInfo().autoCommmitMaxDocs);
assertEquals(-1, h.getCore().getSolrConfig().getUpdateHandlerInfo().autoSoftCommmitMaxDocs);
}
@After
public void after() {
System.clearProperty("solr.tests.intClassName");
System.clearProperty("solr.tests.longClassName");
System.clearProperty("solr.tests.floatClassName");
System.clearProperty("solr.tests.doubleClassName");
}
public TestStressInPlaceUpdates() {
super();
sliceCount = 1;
fixShardCount(3);
}
protected final ConcurrentHashMap<Integer, DocInfo> model = new ConcurrentHashMap<>();
protected Map<Integer, DocInfo> committedModel = new HashMap<>();
protected long snapshotCount;
protected long committedModelClock;
protected int clientIndexUsedForCommit;
protected volatile int lastId;
protected final String field = "val_l";
private void initModel(int ndocs) {
for (int i = 0; i < ndocs; i++) {
// seed versions w/-1 so "from scratch" adds/updates will fail optimistic concurrency checks
// if some other thread beats us to adding the id
model.put(i, new DocInfo(-1L, 0, 0));
}
committedModel.putAll(model);
}
SolrClient leaderClient = null;
@Test
@ShardsFixed(num = 3)
public void stressTest() throws Exception {
waitForRecoveriesToFinish(true);
this.leaderClient = getClientForLeader();
assertNotNull("Couldn't obtain client for the leader of the shard", this.leaderClient);
final int commitPercent = 5 + random().nextInt(20);
final int softCommitPercent = 30 + random().nextInt(75); // what percent of the commits are soft
final int deletePercent = 4 + random().nextInt(25);
final int deleteByQueryPercent = random().nextInt(8);
final int ndocs = atLeast(5);
int nWriteThreads = 5 + random().nextInt(25);
int fullUpdatePercent = 5 + random().nextInt(50);
// query variables
final int percentRealtimeQuery = 75;
// number of cumulative read/write operations by all threads
final AtomicLong operations = new AtomicLong(25000);
int nReadThreads = 5 + random().nextInt(25);
/** // testing
final int commitPercent = 5;
final int softCommitPercent = 100; // what percent of the commits are soft
final int deletePercent = 0;
final int deleteByQueryPercent = 50;
final int ndocs = 10;
int nWriteThreads = 10;
final int maxConcurrentCommits = nWriteThreads; // number of committers at a time... it should be <= maxWarmingSearchers
// query variables
final int percentRealtimeQuery = 101;
final AtomicLong operations = new AtomicLong(50000); // number of query operations to perform in total
int nReadThreads = 10;
int fullUpdatePercent = 20;
**/
log.info("{}", Arrays.asList
("commitPercent", commitPercent, "softCommitPercent", softCommitPercent,
"deletePercent", deletePercent, "deleteByQueryPercent", deleteByQueryPercent,
"ndocs", ndocs, "nWriteThreads", nWriteThreads, "percentRealtimeQuery", percentRealtimeQuery,
"operations", operations, "nReadThreads", nReadThreads));
initModel(ndocs);
List<Thread> threads = new ArrayList<>();
for (int i = 0; i < nWriteThreads; i++) {
Thread thread = new Thread("WRITER" + i) {
Random rand = new Random(random().nextInt());
@Override
public void run() {
try {
while (operations.decrementAndGet() > 0) {
int oper = rand.nextInt(100);
if (oper < commitPercent) {
Map<Integer, DocInfo> newCommittedModel;
long version;
synchronized (TestStressInPlaceUpdates.this) {
// take a snapshot of the model
// this is safe to do w/o synchronizing on the model because it's a ConcurrentHashMap
newCommittedModel = new HashMap<>(model);
version = snapshotCount++;
int chosenClientIndex = rand.nextInt(clients.size());
if (rand.nextInt(100) < softCommitPercent) {
log.info("softCommit start");
clients.get(chosenClientIndex).commit(true, true, true);
log.info("softCommit end");
} else {
log.info("hardCommit start");
clients.get(chosenClientIndex).commit();
log.info("hardCommit end");
}
// install this model snapshot only if it's newer than the current one
if (version >= committedModelClock) {
if (VERBOSE) {
log.info("installing new committedModel version={}", committedModelClock);
}
clientIndexUsedForCommit = chosenClientIndex;
committedModel = newCommittedModel;
committedModelClock = version;
}
}
continue;
}
int id;
if (rand.nextBoolean()) {
id = rand.nextInt(ndocs);
} else {
id = lastId; // reuse the last ID half of the time to force more race conditions
}
// set the lastId before we actually change it sometimes to try and
// uncover more race conditions between writing and reading
boolean before = rand.nextBoolean();
if (before) {
lastId = id;
}
DocInfo info = model.get(id);
// yield after getting the next version to increase the odds of updates happening out of order
if (rand.nextBoolean()) Thread.yield();
if (oper < commitPercent + deletePercent + deleteByQueryPercent) {
final boolean dbq = (oper >= commitPercent + deletePercent);
final String delType = dbq ? "DBI": "DBQ";
log.info("{} id {}: {}", delType, id, info);
Long returnedVersion = null;
try {
returnedVersion = deleteDocAndGetVersion(Integer.toString(id), params("_version_", Long.toString(info.version)), dbq);
log.info(delType + ": Deleting id=" + id + ", version=" + info.version
+ ". Returned version=" + returnedVersion);
} catch (RuntimeException e) {
if (e.getMessage() != null && e.getMessage().contains("version conflict")
|| e.getMessage() != null && e.getMessage().contains("Conflict")) {
// Its okay for a leader to reject a concurrent request
log.warn("Conflict during {}, rejected id={}, {}", delType, id, e);
returnedVersion = null;
} else {
throw e;
}
}
// only update model if update had no conflict & the version is newer
synchronized (model) {
DocInfo currInfo = model.get(id);
if (null != returnedVersion &&
(Math.abs(returnedVersion.longValue()) > Math.abs(currInfo.version))) {
model.put(id, new DocInfo(returnedVersion.longValue(), 0, 0));
}
}
} else {
int val1 = info.intFieldValue;
long val2 = info.longFieldValue;
int nextVal1 = val1;
long nextVal2 = val2;
int addOper = rand.nextInt(100);
Long returnedVersion;
if (addOper < fullUpdatePercent || info.version <= 0) { // if document was never indexed or was deleted
// FULL UPDATE
nextVal1 = Primes.nextPrime(val1 + 1);
nextVal2 = nextVal1 * 1000000000l;
try {
returnedVersion = addDocAndGetVersion("id", id, "title_s", "title" + id, "val1_i_dvo", nextVal1, "val2_l_dvo", nextVal2, "_version_", info.version);
log.info("FULL: Writing id=" + id + ", val=[" + nextVal1 + "," + nextVal2 + "], version=" + info.version + ", Prev was=[" + val1 + "," + val2 + "]. Returned version=" + returnedVersion);
} catch (RuntimeException e) {
if (e.getMessage() != null && e.getMessage().contains("version conflict")
|| e.getMessage() != null && e.getMessage().contains("Conflict")) {
// Its okay for a leader to reject a concurrent request
log.warn("Conflict during full update, rejected id={}, {}", id, e);
returnedVersion = null;
} else {
throw e;
}
}
} else {
// PARTIAL
nextVal2 = val2 + val1;
try {
returnedVersion = addDocAndGetVersion("id", id, "val2_l_dvo", map("inc", String.valueOf(val1)), "_version_", info.version);
log.info("PARTIAL: Writing id=" + id + ", val=[" + nextVal1 + "," + nextVal2 + "], version=" + info.version + ", Prev was=[" + val1 + "," + val2 + "]. Returned version=" + returnedVersion);
} catch (RuntimeException e) {
if (e.getMessage() != null && e.getMessage().contains("version conflict")
|| e.getMessage() != null && e.getMessage().contains("Conflict")) {
// Its okay for a leader to reject a concurrent request
log.warn("Conflict during partial update, rejected id={}, {}", id, e);
} else if (e.getMessage() != null && e.getMessage().contains("Document not found for update.")
&& e.getMessage().contains("id="+id)) {
log.warn("Attempted a partial update for a recently deleted document, rejected id={}, {}", id, e);
} else {
throw e;
}
returnedVersion = null;
}
}
// only update model if update had no conflict & the version is newer
synchronized (model) {
DocInfo currInfo = model.get(id);
if (null != returnedVersion &&
(Math.abs(returnedVersion.longValue()) > Math.abs(currInfo.version))) {
model.put(id, new DocInfo(returnedVersion.longValue(), nextVal1, nextVal2));
}
}
}
if (!before) {
lastId = id;
}
}
} catch (Throwable e) {
operations.set(-1L);
log.error("", e);
throw new RuntimeException(e);
}
}
};
threads.add(thread);
}
// Read threads
for (int i = 0; i < nReadThreads; i++) {
Thread thread = new Thread("READER" + i) {
Random rand = new Random(random().nextInt());
@SuppressWarnings("unchecked")
@Override
public void run() {
try {
while (operations.decrementAndGet() >= 0) {
// bias toward a recently changed doc
int id = rand.nextInt(100) < 25 ? lastId : rand.nextInt(ndocs);
// when indexing, we update the index, then the model
// so when querying, we should first check the model, and then the index
boolean realTime = rand.nextInt(100) < percentRealtimeQuery;
DocInfo expected;
if (realTime) {
expected = model.get(id);
} else {
synchronized (TestStressInPlaceUpdates.this) {
expected = committedModel.get(id);
}
}
if (VERBOSE) {
log.info("querying id {}", id);
}
ModifiableSolrParams params = new ModifiableSolrParams();
if (realTime) {
params.set("wt", "json");
params.set("qt", "/get");
params.set("ids", Integer.toString(id));
} else {
params.set("wt", "json");
params.set("q", "id:" + Integer.toString(id));
params.set("omitHeader", "true");
}
int clientId = rand.nextInt(clients.size());
if (!realTime) clientId = clientIndexUsedForCommit;
QueryResponse response = clients.get(clientId).query(params);
if (response.getResults().size() == 0) {
// there's no info we can get back with a delete, so not much we can check without further synchronization
} else if (response.getResults().size() == 1) {
final SolrDocument actual = response.getResults().get(0);
final String msg = "Realtime=" + realTime + ", expected=" + expected + ", actual=" + actual;
assertNotNull(msg, actual);
final Long foundVersion = (Long) actual.getFieldValue("_version_");
assertNotNull(msg, foundVersion);
assertTrue(msg + "... solr doc has non-positive version???",
0 < foundVersion.longValue());
final Integer intVal = (Integer) actual.getFieldValue("val1_i_dvo");
assertNotNull(msg, intVal);
final Long longVal = (Long) actual.getFieldValue("val2_l_dvo");
assertNotNull(msg, longVal);
assertTrue(msg + " ...solr returned older version then model. " +
"should not be possible given the order of operations in writer threads",
Math.abs(expected.version) <= foundVersion.longValue());
if (foundVersion.longValue() == expected.version) {
assertEquals(msg, expected.intFieldValue, intVal.intValue());
assertEquals(msg, expected.longFieldValue, longVal.longValue());
}
// Some things we can assert about any Doc returned from solr,
// even if it's newer then our (expected) model information...
assertTrue(msg + " ...how did a doc in solr get a non positive intVal?",
0 < intVal);
assertTrue(msg + " ...how did a doc in solr get a non positive longVal?",
0 < longVal);
assertEquals(msg + " ...intVal and longVal in solr doc are internally (modulo) inconsistent w/eachother",
0, (longVal % intVal));
// NOTE: when foundVersion is greater then the version read from the model,
// it's not possible to make any assertions about the field values in solr relative to the
// field values in the model -- ie: we can *NOT* assert expected.longFieldVal <= doc.longVal
//
// it's tempting to think that this would be possible if we changed our model to preserve the
// "old" valuess when doing a delete, but that's still no garuntee because of how oportunistic
// concurrency works with negative versions: When adding a doc, we can assert that it must not
// exist with version<0, but we can't assert that the *reason* it doesn't exist was because of
// a delete with the specific version of "-42".
// So a wrtier thread might (1) prep to add a doc for the first time with "intValue=1,_version_=-1",
// and that add may succeed and (2) return some version X which is put in the model. but
// inbetween #1 and #2 other threads may have added & deleted the doc repeatedly, updating
// the model with intValue=7,_version_=-42, and a reader thread might meanwhile read from the
// model before #2 and expect intValue=5, but get intValue=1 from solr (with a greater version)
} else {
fail(String.format(Locale.ENGLISH, "There were more than one result: {}", response));
}
}
} catch (Throwable e) {
operations.set(-1L);
log.error("", e);
throw new RuntimeException(e);
}
}
};
threads.add(thread);
}
// Start all threads
for (Thread thread : threads) {
thread.start();
}
for (Thread thread : threads) {
thread.join();
}
{ // final pass over uncommitted model with RTG
for (SolrClient client : clients) {
for (Map.Entry<Integer,DocInfo> entry : model.entrySet()) {
final Integer id = entry.getKey();
final DocInfo expected = entry.getValue();
final SolrDocument actual = client.getById(id.toString());
String msg = "RTG: " + id + "=" + expected;
if (null == actual) {
// a deleted or non-existent document
// sanity check of the model agrees...
assertTrue(msg + " is deleted/non-existent in Solr, but model has non-neg version",
expected.version < 0);
assertEquals(msg + " is deleted/non-existent in Solr", expected.intFieldValue, 0);
assertEquals(msg + " is deleted/non-existent in Solr", expected.longFieldValue, 0);
} else {
msg = msg + " <==VS==> " + actual;
assertEquals(msg, expected.intFieldValue, actual.getFieldValue("val1_i_dvo"));
assertEquals(msg, expected.longFieldValue, actual.getFieldValue("val2_l_dvo"));
assertEquals(msg, expected.version, actual.getFieldValue("_version_"));
assertTrue(msg + " doc exists in solr, but version is negative???",
0 < expected.version);
}
}
}
}
{ // do a final search and compare every result with the model
// because commits don't provide any sort of concrete versioning (or optimistic concurrency constraints)
// there's no way to garuntee that our committedModel matches what was in Solr at the time of the last commit.
// It's possible other threads made additional writes to solr before the commit was processed, but after
// the committedModel variable was assigned it's new value.
//
// what we can do however, is commit all completed updates, and *then* compare solr search results
// against the (new) committed model....
waitForThingsToLevelOut(30); // NOTE: this does an automatic commit for us & ensures replicas are up to date
committedModel = new HashMap<>(model);
// first, prune the model of any docs that have negative versions
// ie: were never actually added, or were ultimately deleted.
for (int i = 0; i < ndocs; i++) {
DocInfo info = committedModel.get(i);
if (info.version < 0) {
// first, a quick sanity check of the model itself...
assertEquals("Inconsistent int value in model for deleted doc" + i + "=" + info,
0, info.intFieldValue);
assertEquals("Inconsistent long value in model for deleted doc" + i + "=" + info,
0L, info.longFieldValue);
committedModel.remove(i);
}
}
for (SolrClient client : clients) {
QueryResponse rsp = client.query(params("q","*:*", "sort", "id asc", "rows", ndocs+""));
for (SolrDocument actual : rsp.getResults()) {
final Integer id = Integer.parseInt(actual.getFieldValue("id").toString());
final DocInfo expected = committedModel.get(id);
assertNotNull("Doc found but missing/deleted from model: " + actual, expected);
final String msg = "Search: " + id + "=" + expected + " <==VS==> " + actual;
assertEquals(msg, expected.intFieldValue, actual.getFieldValue("val1_i_dvo"));
assertEquals(msg, expected.longFieldValue, actual.getFieldValue("val2_l_dvo"));
assertEquals(msg, expected.version, actual.getFieldValue("_version_"));
assertTrue(msg + " doc exists in solr, but version is negative???",
0 < expected.version);
// also sanity check the model (which we already know matches the doc)
assertEquals("Inconsistent (modulo) values in model for id " + id + "=" + expected,
0, (expected.longFieldValue % expected.intFieldValue));
}
assertEquals(committedModel.size(), rsp.getResults().getNumFound());
}
}
}
/**
* Used for storing the info for a document in an in-memory model.
*/
private static class DocInfo {
long version;
int intFieldValue;
long longFieldValue;
public DocInfo(long version, int val1, long val2) {
assert version != 0; // must either be real positive version, or negative deleted version/indicator
this.version = version;
this.intFieldValue = val1;
this.longFieldValue = val2;
}
@Override
public String toString() {
return "[version=" + version + ", intValue=" + intFieldValue + ",longValue=" + longFieldValue + "]";
}
}
@SuppressWarnings("rawtypes")
protected long addDocAndGetVersion(Object... fields) throws Exception {
SolrInputDocument doc = new SolrInputDocument();
addFields(doc, fields);
ModifiableSolrParams params = new ModifiableSolrParams();
params.add("versions", "true");
UpdateRequest ureq = new UpdateRequest();
ureq.setParams(params);
ureq.add(doc);
UpdateResponse resp;
// send updates to leader, to avoid SOLR-8733
resp = ureq.process(leaderClient);
long returnedVersion = Long.parseLong(((NamedList) resp.getResponse().get("adds")).getVal(0).toString());
assertTrue("Due to SOLR-8733, sometimes returned version is 0. Let us assert that we have successfully"
+ " worked around that problem here.", returnedVersion > 0);
return returnedVersion;
}
@SuppressWarnings("rawtypes")
protected long deleteDocAndGetVersion(String id, ModifiableSolrParams params, boolean deleteByQuery) throws Exception {
params.add("versions", "true");
UpdateRequest ureq = new UpdateRequest();
ureq.setParams(params);
if (deleteByQuery) {
ureq.deleteByQuery("id:"+id);
} else {
ureq.deleteById(id);
}
UpdateResponse resp;
// send updates to leader, to avoid SOLR-8733
resp = ureq.process(leaderClient);
String key = deleteByQuery? "deleteByQuery": "deletes";
long returnedVersion = Long.parseLong(((NamedList) resp.getResponse().get(key)).getVal(0).toString());
assertTrue("Due to SOLR-8733, sometimes returned version is 0. Let us assert that we have successfully"
+ " worked around that problem here.", returnedVersion < 0);
return returnedVersion;
}
/**
* Method gets the SolrClient for the leader replica. This is needed for a workaround for SOLR-8733.
*/
public SolrClient getClientForLeader() throws KeeperException, InterruptedException {
ZkStateReader zkStateReader = cloudClient.getZkStateReader();
cloudClient.getZkStateReader().forceUpdateCollection(DEFAULT_COLLECTION);
ClusterState clusterState = cloudClient.getZkStateReader().getClusterState();
Replica leader = null;
Slice shard1 = clusterState.getCollection(DEFAULT_COLLECTION).getSlice(SHARD1);
leader = shard1.getLeader();
for (int i = 0; i < clients.size(); i++) {
String leaderBaseUrl = zkStateReader.getBaseUrlForNodeName(leader.getNodeName());
if (((HttpSolrClient) clients.get(i)).getBaseURL().startsWith(leaderBaseUrl))
return clients.get(i);
}
return null;
}
}

View File

@ -82,77 +82,62 @@ public class SpellCheckComponentTest extends SolrTestCaseJ4 {
public void testMaximumResultsForSuggest() throws Exception {
assertJQ(req("qt",rh, SpellCheckComponent.COMPONENT_NAME, "true", SpellingParams.SPELLCHECK_BUILD, "true", "q","lowerfilt:(this OR brwn)",
SpellingParams.SPELLCHECK_COUNT,"5", SpellingParams.SPELLCHECK_EXTENDED_RESULTS,"false", SpellingParams.SPELLCHECK_MAX_RESULTS_FOR_SUGGEST, "7")
,"/spellcheck/suggestions/[0]=='brwn'"
,"/spellcheck/suggestions/[1]/numFound==1"
,"/spellcheck/suggestions/brwn/numFound==1"
);
try {
assertJQ(req("qt",rh, SpellCheckComponent.COMPONENT_NAME, "true", SpellingParams.SPELLCHECK_BUILD, "true", "q","lowerfilt:(this OR brwn)",
SpellingParams.SPELLCHECK_COUNT,"5", SpellingParams.SPELLCHECK_EXTENDED_RESULTS,"false", SpellingParams.SPELLCHECK_MAX_RESULTS_FOR_SUGGEST, "6")
,"/spellcheck/suggestions/[1]/numFound==1"
);
fail("there should have been no suggestions (6<7)");
} catch(Exception e) {
//correctly threw exception
}
assertJQ(req("qt",rh, SpellCheckComponent.COMPONENT_NAME, "true", SpellingParams.SPELLCHECK_BUILD, "true", "q","lowerfilt:(this OR brwn)",
SpellingParams.SPELLCHECK_COUNT,"5", SpellingParams.SPELLCHECK_EXTENDED_RESULTS,"false", SpellingParams.SPELLCHECK_MAX_RESULTS_FOR_SUGGEST, "6")
,"/spellcheck/suggestions=={}");
// there should have been no suggestions (6<7)
assertJQ(req("qt",rh, SpellCheckComponent.COMPONENT_NAME, "true", SpellingParams.SPELLCHECK_BUILD, "true", "q","lowerfilt:(this OR brwn)",
"fq", "id:[0 TO 9]", /*returns 10, less selective */ "fq", "lowerfilt:th*", /* returns 8, most selective */
SpellingParams.SPELLCHECK_COUNT,"5", SpellingParams.SPELLCHECK_EXTENDED_RESULTS,"false", SpellingParams.SPELLCHECK_MAX_RESULTS_FOR_SUGGEST, ".90")
,"/spellcheck/suggestions/[0]=='brwn'"
,"/spellcheck/suggestions/[1]/numFound==1"
,"/spellcheck/suggestions/brwn/numFound==1"
);
try {
assertJQ(req("qt",rh, SpellCheckComponent.COMPONENT_NAME, "true", SpellingParams.SPELLCHECK_BUILD, "true", "q","lowerfilt:(this OR brwn)",
"fq", "id:[0 TO 9]", /*returns 10, less selective */ "fq", "lowerfilt:th*", /* returns 8, most selective */
SpellingParams.SPELLCHECK_COUNT,"5", SpellingParams.SPELLCHECK_EXTENDED_RESULTS,"false", SpellingParams.SPELLCHECK_MAX_RESULTS_FOR_SUGGEST, ".80")
,"/spellcheck/suggestions/[1]/numFound==1"
);
fail("there should have been no suggestions ((.8 * 8)<7)");
} catch(Exception e) {
//correctly threw exception
}
assertJQ(req("qt",rh, SpellCheckComponent.COMPONENT_NAME, "true", SpellingParams.SPELLCHECK_BUILD, "true", "q","lowerfilt:(this OR brwn)",
"fq", "id:[0 TO 9]", /*returns 10, less selective */ "fq", "lowerfilt:th*", /* returns 8, most selective */
SpellingParams.SPELLCHECK_COUNT,"5", SpellingParams.SPELLCHECK_EXTENDED_RESULTS,"false", SpellingParams.SPELLCHECK_MAX_RESULTS_FOR_SUGGEST, ".80")
,"/spellcheck/suggestions=={}");
// there should have been no suggestions ((.8 * 8)<7)
assertJQ(req("qt",rh, SpellCheckComponent.COMPONENT_NAME, "true", SpellingParams.SPELLCHECK_BUILD, "true", "q","lowerfilt:(this OR brwn)",
"fq", "id:[0 TO 9]", SpellingParams.SPELLCHECK_MAX_RESULTS_FOR_SUGGEST_FQ, "id:[0 TO 9]",
SpellingParams.SPELLCHECK_COUNT,"5", SpellingParams.SPELLCHECK_EXTENDED_RESULTS,"false", SpellingParams.SPELLCHECK_MAX_RESULTS_FOR_SUGGEST, ".70")
,"/spellcheck/suggestions/[0]=='brwn'"
,"/spellcheck/suggestions/[1]/numFound==1"
,"/spellcheck/suggestions/brwn/numFound==1"
);
try {
assertJQ(req("qt",rh, SpellCheckComponent.COMPONENT_NAME, "true", SpellingParams.SPELLCHECK_BUILD, "true", "q","lowerfilt:(this OR brwn)",
"fq", "id:[0 TO 9]", SpellingParams.SPELLCHECK_MAX_RESULTS_FOR_SUGGEST_FQ, "lowerfilt:th*",
SpellingParams.SPELLCHECK_COUNT,"5", SpellingParams.SPELLCHECK_EXTENDED_RESULTS,"false", SpellingParams.SPELLCHECK_MAX_RESULTS_FOR_SUGGEST, ".64")
,"/spellcheck/suggestions/[1]/numFound==1"
);
fail("there should have been no suggestions ((.64 * 10)<7)");
} catch(Exception e) {
//correctly threw exception
}
assertJQ(req("qt",rh, SpellCheckComponent.COMPONENT_NAME, "true", SpellingParams.SPELLCHECK_BUILD, "true", "q","lowerfilt:(this OR brwn)",
"fq", "id:[0 TO 9]", SpellingParams.SPELLCHECK_MAX_RESULTS_FOR_SUGGEST_FQ, "lowerfilt:th*",
SpellingParams.SPELLCHECK_COUNT,"5", SpellingParams.SPELLCHECK_EXTENDED_RESULTS,"false", SpellingParams.SPELLCHECK_MAX_RESULTS_FOR_SUGGEST, ".64")
,"/spellcheck/suggestions=={}");
// there should have been no suggestions ((.64 * 10)<7)
}
@Test
public void testExtendedResultsCount() throws Exception {
assertJQ(req("qt",rh, SpellCheckComponent.COMPONENT_NAME, "true", SpellingParams.SPELLCHECK_BUILD, "true", "q","bluo", SpellingParams.SPELLCHECK_COUNT,"5", SpellingParams.SPELLCHECK_EXTENDED_RESULTS,"false")
,"/spellcheck/suggestions/[0]=='bluo'"
,"/spellcheck/suggestions/[1]/numFound==5"
,"/spellcheck/suggestions/bluo/numFound==5"
);
assertJQ(req("qt",rh, SpellCheckComponent.COMPONENT_NAME, "true", "q","bluo", SpellingParams.SPELLCHECK_COUNT,"3", SpellingParams.SPELLCHECK_EXTENDED_RESULTS,"true")
,"/spellcheck/suggestions/[1]/suggestion==[{'word':'blud','freq':1}, {'word':'blue','freq':1}, {'word':'blee','freq':1}]"
,"/spellcheck/suggestions/bluo/suggestion==[{'word':'blud','freq':1}, {'word':'blue','freq':1}, {'word':'blee','freq':1}]"
);
}
@Test
public void test() throws Exception {
assertJQ(req("qt",rh, SpellCheckComponent.COMPONENT_NAME, "true", "q","documemt")
,"/spellcheck=={'suggestions':['documemt',{'numFound':1,'startOffset':0,'endOffset':8,'suggestion':['document']}]}"
,"/spellcheck=={'suggestions':{'documemt':{'numFound':1,'startOffset':0,'endOffset':8,'suggestion':['document']}}}"
);
}
@Test
public void testNumericQuery() throws Exception {
assertJQ(req("qt",rh, SpellCheckComponent.COMPONENT_NAME, "true", "q","12346")
,"/spellcheck=={'suggestions':['12346',{'numFound':1,'startOffset':0,'endOffset':5,'suggestion':['12345']}]}"
,"/spellcheck=={'suggestions':{'12346':{'numFound':1,'startOffset':0,'endOffset':5,'suggestion':['12345']}}}"
);
}
@ -186,13 +171,21 @@ public class SpellCheckComponentTest extends SolrTestCaseJ4 {
@Test
public void testCollateExtendedResultsWithJsonNl() throws Exception {
final String q = "documemtsss broens";
final String jsonNl = "map";
final String jsonNl = (random().nextBoolean() ? "map" : "arrntv");
final boolean collateExtendedResults = random().nextBoolean();
final List<String> testsList = new ArrayList<String>();
if (collateExtendedResults) {
testsList.add("/spellcheck/collations/collation/collationQuery=='document brown'");
testsList.add("/spellcheck/collations/collation/hits==0");
switch (jsonNl) {
case "arrntv":
testsList.add("/spellcheck/collations/collation/misspellingsAndCorrections/[0]/name=='documemtsss'");
testsList.add("/spellcheck/collations/collation/misspellingsAndCorrections/[0]/type=='str'");
testsList.add("/spellcheck/collations/collation/misspellingsAndCorrections/[0]/value=='document'");
testsList.add("/spellcheck/collations/collation/misspellingsAndCorrections/[1]/name=='broens'");
testsList.add("/spellcheck/collations/collation/misspellingsAndCorrections/[1]/type=='str'");
testsList.add("/spellcheck/collations/collation/misspellingsAndCorrections/[1]/value=='brown'");
break;
case "map":
testsList.add("/spellcheck/collations/collation/misspellingsAndCorrections/documemtsss=='document'");
testsList.add("/spellcheck/collations/collation/misspellingsAndCorrections/broens=='brown'");
@ -311,11 +304,11 @@ public class SpellCheckComponentTest extends SolrTestCaseJ4 {
//while "document" is present.
assertJQ(req("qt",rh, SpellCheckComponent.COMPONENT_NAME, "true", "q","documenq", SpellingParams.SPELLCHECK_DICT, "threshold", SpellingParams.SPELLCHECK_COUNT,"5", SpellingParams.SPELLCHECK_EXTENDED_RESULTS,"true")
,"/spellcheck/suggestions/[1]/suggestion==[{'word':'document','freq':2}]"
,"/spellcheck/suggestions/documenq/suggestion==[{'word':'document','freq':2}]"
);
assertJQ(req("qt",rh, SpellCheckComponent.COMPONENT_NAME, "true", "q","documenq", SpellingParams.SPELLCHECK_DICT, "threshold_direct", SpellingParams.SPELLCHECK_COUNT,"5", SpellingParams.SPELLCHECK_EXTENDED_RESULTS,"true")
,"/spellcheck/suggestions/[1]/suggestion==[{'word':'document','freq':2}]"
,"/spellcheck/suggestions/documenq/suggestion==[{'word':'document','freq':2}]"
);
//TODO: how do we make this into a 1-liner using "assertQ()" ???

View File

@ -25,9 +25,14 @@ import com.codahale.metrics.Metric;
import com.codahale.metrics.MetricRegistry;
import org.apache.solr.metrics.SolrMetricManager;
import org.noggit.ObjectBuilder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.lucene.util.TestUtil;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.update.DirectUpdateHandler2;
import org.apache.solr.update.UpdateLog;
import org.apache.solr.update.UpdateHandler;
@ -37,6 +42,7 @@ import org.junit.Test;
import java.io.File;
import java.io.RandomAccessFile;
import java.lang.invoke.MethodHandles;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.util.ArrayDeque;
@ -53,6 +59,7 @@ import java.util.concurrent.TimeUnit;
import org.apache.solr.update.processor.DistributedUpdateProcessor.DistribPhase;
public class TestRecovery extends SolrTestCaseJ4 {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
// means that we've seen the leader and have version info (i.e. we are a non-leader replica)
private static String FROM_LEADER = DistribPhase.FROMLEADER.toString();
@ -67,6 +74,12 @@ public class TestRecovery extends SolrTestCaseJ4 {
savedFactory = System.getProperty("solr.DirectoryFactory");
System.setProperty("solr.directoryFactory", "org.apache.solr.core.MockFSDirectoryFactory");
initCore("solrconfig-tlog.xml","schema15.xml");
// validate that the schema was not changed to an unexpected state
IndexSchema schema = h.getCore().getLatestSchema();
assertTrue(schema.getFieldOrNull("_version_").hasDocValues() && !schema.getFieldOrNull("_version_").indexed()
&& !schema.getFieldOrNull("_version_").stored());
}
@AfterClass
@ -86,6 +99,7 @@ public class TestRecovery extends SolrTestCaseJ4 {
@Test
public void testLogReplay() throws Exception {
try {
DirectUpdateHandler2.commitOnClose = false;
@ -112,7 +126,8 @@ public class TestRecovery extends SolrTestCaseJ4 {
versions.addFirst(addAndGetVersion(sdoc("id", "A12"), null));
versions.addFirst(deleteByQueryAndGetVersion("id:A11", null));
versions.addFirst(addAndGetVersion(sdoc("id", "A13"), null));
versions.addFirst(addAndGetVersion(sdoc("id", "A12", "val_i_dvo", map("set", 1)), null)); // atomic update
versions.addFirst(addAndGetVersion(sdoc("id", "A12", "val_i_dvo", map("set", 2)), null)); // in-place update
assertJQ(req("q","*:*"),"/response/numFound==0");
assertJQ(req("qt","/get", "getVersions",""+versions.size()) ,"/versions==" + versions);
@ -151,10 +166,11 @@ public class TestRecovery extends SolrTestCaseJ4 {
// wait until recovery has finished
assertTrue(logReplayFinish.tryAcquire(timeout, TimeUnit.SECONDS));
assertJQ(req("q","val_i_dvo:2") ,"/response/numFound==1"); // assert that in-place update is retained
assertJQ(req("q","*:*") ,"/response/numFound==3");
assertEquals(5L, replayDocs.getCount() - initialOps);
assertEquals(7L, replayDocs.getCount() - initialOps);
assertEquals(UpdateLog.State.ACTIVE.ordinal(), state.getValue().intValue());
// make sure we can still access versions after recovery
@ -166,6 +182,7 @@ public class TestRecovery extends SolrTestCaseJ4 {
assertU(adoc("id","A4"));
assertJQ(req("q","*:*") ,"/response/numFound==3");
assertJQ(req("q","val_i_dvo:2") ,"/response/numFound==1"); // assert that in-place update is retained
h.close();
createCore();
@ -185,6 +202,7 @@ public class TestRecovery extends SolrTestCaseJ4 {
// h.getCore().getUpdateHandler().getUpdateLog().recoverFromLog();
assertJQ(req("q","*:*") ,"/response/numFound==5");
assertJQ(req("q","val_i_dvo:2") ,"/response/numFound==1"); // assert that in-place update is retained
Thread.sleep(100);
assertEquals(permits, logReplay.availablePermits()); // no updates, so insure that recovery didn't run
@ -1258,6 +1276,133 @@ public class TestRecovery extends SolrTestCaseJ4 {
}
}
@Test
public void testLogReplayWithInPlaceUpdatesAndDeletes() throws Exception {
try {
DirectUpdateHandler2.commitOnClose = false;
final Semaphore logReplay = new Semaphore(0);
final Semaphore logReplayFinish = new Semaphore(0);
UpdateLog.testing_logReplayHook = () -> {
try {
assertTrue(logReplay.tryAcquire(timeout, TimeUnit.SECONDS));
} catch (Exception e) {
throw new RuntimeException(e);
}
};
UpdateLog.testing_logReplayFinishHook = () -> logReplayFinish.release();
clearIndex();
assertU(commit());
Deque<Long> versions = new ArrayDeque<>();
versions.addFirst(addAndGetVersion(sdoc("id", "A1"), null));
// DBQ of updated document using id
versions.addFirst(addAndGetVersion(sdoc("id", "A2", "val_i_dvo", "1"), null));
versions.addFirst(addAndGetVersion(sdoc("id", "A2", "val_i_dvo", map("set", 2)), null)); // in-place update
versions.addFirst(deleteByQueryAndGetVersion("id:A2", null));
// DBQ of updated document using updated value
versions.addFirst(addAndGetVersion(sdoc("id", "A3", "val_i_dvo", "101"), null));
versions.addFirst(addAndGetVersion(sdoc("id", "A3", "val_i_dvo", map("set", 102)), null)); // in-place update
versions.addFirst(deleteByQueryAndGetVersion("val_i_dvo:102", null));
// DBQ using an intermediate update value (shouldn't delete anything)
versions.addFirst(addAndGetVersion(sdoc("id", "A4", "val_i_dvo", "200"), null));
versions.addFirst(addAndGetVersion(sdoc("id", "A4", "val_i_dvo", map("inc", "1")), null)); // in-place update
versions.addFirst(addAndGetVersion(sdoc("id", "A4", "val_i_dvo", map("inc", "1")), null)); // in-place update
versions.addFirst(deleteByQueryAndGetVersion("val_i_dvo:201", null));
// DBI of updated document
versions.addFirst(addAndGetVersion(sdoc("id", "A5", "val_i_dvo", "300"), null));
versions.addFirst(addAndGetVersion(sdoc("id", "A5", "val_i_dvo", map("inc", "1")), null)); // in-place update
versions.addFirst(addAndGetVersion(sdoc("id", "A5", "val_i_dvo", map("inc", "1")), null)); // in-place update
versions.addFirst(deleteAndGetVersion("A5", null));
assertJQ(req("q","*:*"),"/response/numFound==0");
assertJQ(req("qt","/get", "getVersions",""+versions.size()) ,"/versions==" + versions);
h.close();
createCore();
// Solr should kick this off now
// h.getCore().getUpdateHandler().getUpdateLog().recoverFromLog();
// verify that previous close didn't do a commit
// recovery should be blocked by our hook
assertJQ(req("q","*:*") ,"/response/numFound==0");
// make sure we can still access versions after a restart
assertJQ(req("qt","/get", "getVersions",""+versions.size()),"/versions==" + versions);
// unblock recovery
logReplay.release(1000);
// make sure we can still access versions during recovery
assertJQ(req("qt","/get", "getVersions",""+versions.size()),"/versions==" + versions);
// wait until recovery has finished
assertTrue(logReplayFinish.tryAcquire(timeout, TimeUnit.SECONDS));
assertJQ(req("q","val_i_dvo:202") ,"/response/numFound==1"); // assert that in-place update is retained
assertJQ(req("q","*:*") ,"/response/numFound==2");
assertJQ(req("q","id:A2") ,"/response/numFound==0");
assertJQ(req("q","id:A3") ,"/response/numFound==0");
assertJQ(req("q","id:A4") ,"/response/numFound==1");
assertJQ(req("q","id:A5") ,"/response/numFound==0");
// make sure we can still access versions after recovery
assertJQ(req("qt","/get", "getVersions",""+versions.size()) ,"/versions==" + versions);
assertU(adoc("id","A10"));
h.close();
createCore();
// Solr should kick this off now
// h.getCore().getUpdateHandler().getUpdateLog().recoverFromLog();
// wait until recovery has finished
assertTrue(logReplayFinish.tryAcquire(timeout, TimeUnit.SECONDS));
assertJQ(req("q","*:*") ,"/response/numFound==3");
assertJQ(req("q","id:A2") ,"/response/numFound==0");
assertJQ(req("q","id:A3") ,"/response/numFound==0");
assertJQ(req("q","id:A4") ,"/response/numFound==1");
assertJQ(req("q","id:A5") ,"/response/numFound==0");
assertJQ(req("q","id:A10"),"/response/numFound==1");
// no updates, so insure that recovery does not run
h.close();
int permits = logReplay.availablePermits();
createCore();
// Solr should kick this off now
// h.getCore().getUpdateHandler().getUpdateLog().recoverFromLog();
assertJQ(req("q","*:*") ,"/response/numFound==3");
assertJQ(req("q","val_i_dvo:202") ,"/response/numFound==1"); // assert that in-place update is retained
assertJQ(req("q","id:A2") ,"/response/numFound==0");
assertJQ(req("q","id:A3") ,"/response/numFound==0");
assertJQ(req("q","id:A4") ,"/response/numFound==1");
assertJQ(req("q","id:A5") ,"/response/numFound==0");
assertJQ(req("q","id:A10"),"/response/numFound==1");
Thread.sleep(100);
assertEquals(permits, logReplay.availablePermits()); // no updates, so insure that recovery didn't run
assertEquals(UpdateLog.State.ACTIVE, h.getCore().getUpdateHandler().getUpdateLog().getState());
} finally {
DirectUpdateHandler2.commitOnClose = true;
UpdateLog.testing_logReplayHook = null;
UpdateLog.testing_logReplayFinishHook = null;
}
}
// NOTE: replacement must currently be same size
private static void findReplace(byte[] from, byte[] to, byte[] data) {

View File

@ -16,21 +16,29 @@
*/
package org.apache.solr.update;
import static org.apache.solr.update.processor.DistributingUpdateProcessorFactory.DISTRIB_UPDATE_PARAM;
import java.io.IOException;
import java.util.Arrays;
import java.util.LinkedHashSet;
import java.util.Set;
import org.apache.solr.BaseDistributedSearchTestCase;
import org.apache.solr.SolrTestCaseJ4.SuppressSSL;
import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.request.QueryRequest;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.StrUtils;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.update.processor.DistributedUpdateProcessor;
import org.apache.solr.update.processor.DistributedUpdateProcessor.DistribPhase;
import org.junit.Test;
import java.io.IOException;
import java.util.Arrays;
import static org.apache.solr.update.processor.DistributedUpdateProcessor.DistribPhase;
import static org.apache.solr.update.processor.DistributingUpdateProcessorFactory.DISTRIB_UPDATE_PARAM;
import static org.junit.internal.matchers.StringContains.containsString;
@SuppressSSL(bugUrl = "https://issues.apache.org/jira/browse/SOLR-5776")
public class PeerSyncTest extends BaseDistributedSearchTestCase {
@ -46,11 +54,24 @@ public class PeerSyncTest extends BaseDistributedSearchTestCase {
// TODO: a better way to do this?
configString = "solrconfig-tlog.xml";
schemaString = "schema.xml";
// validate that the schema was not changed to an unexpected state
try {
initCore(configString, schemaString);
} catch (Exception e) {
throw new RuntimeException(e);
}
IndexSchema schema = h.getCore().getLatestSchema();
assertTrue(schema.getFieldOrNull("_version_").hasDocValues() && !schema.getFieldOrNull("_version_").indexed()
&& !schema.getFieldOrNull("_version_").stored());
assertTrue(!schema.getFieldOrNull("val_i_dvo").indexed() && !schema.getFieldOrNull("val_i_dvo").stored() &&
schema.getFieldOrNull("val_i_dvo").hasDocValues());
}
@Test
@ShardsFixed(num = 3)
public void test() throws Exception {
Set<Integer> docsAdded = new LinkedHashSet<>();
handle.clear();
handle.put("timestamp", SKIPVAL);
handle.put("score", SKIPVAL);
@ -91,14 +112,17 @@ public class PeerSyncTest extends BaseDistributedSearchTestCase {
add(client0, seenLeader, addRandFields(sdoc("id","8","_version_",++v)));
add(client0, seenLeader, addRandFields(sdoc("id","9","_version_",++v)));
add(client0, seenLeader, addRandFields(sdoc("id","10","_version_",++v)));
for (int i=0; i<10; i++) docsAdded.add(i+1);
assertSync(client1, numVersions, true, shardsArr[0]);
client0.commit(); client1.commit(); queryAndCompare(params("q", "*:*"), client0, client1);
client0.commit(); client1.commit();
QueryResponse qacResponse = queryAndCompare(params("q", "*:*", "rows", "10000"), client0, client1);
validateQACResponse(docsAdded, qacResponse);
int toAdd = (int)(numVersions *.95);
for (int i=0; i<toAdd; i++) {
add(client0, seenLeader, sdoc("id",Integer.toString(i+11),"_version_",v+i+1));
docsAdded.add(i+11);
}
// sync should fail since there's not enough overlap to give us confidence
@ -111,19 +135,24 @@ public class PeerSyncTest extends BaseDistributedSearchTestCase {
}
assertSync(client1, numVersions, true, shardsArr[0]);
client0.commit(); client1.commit(); queryAndCompare(params("q", "*:*", "sort","_version_ desc"), client0, client1);
client0.commit(); client1.commit();
qacResponse = queryAndCompare(params("q", "*:*", "rows", "10000", "sort","_version_ desc"), client0, client1);
validateQACResponse(docsAdded, qacResponse);
// test delete and deleteByQuery
v=1000;
add(client0, seenLeader, sdoc("id","1000","_version_",++v));
SolrInputDocument doc = sdoc("id","1000","_version_",++v);
add(client0, seenLeader, doc);
add(client0, seenLeader, sdoc("id","1001","_version_",++v));
delQ(client0, params(DISTRIB_UPDATE_PARAM,FROM_LEADER,"_version_",Long.toString(-++v)), "id:1001 OR id:1002");
add(client0, seenLeader, sdoc("id","1002","_version_",++v));
del(client0, params(DISTRIB_UPDATE_PARAM,FROM_LEADER,"_version_",Long.toString(-++v)), "1000");
docsAdded.add(1002); // 1002 added
assertSync(client1, numVersions, true, shardsArr[0]);
client0.commit(); client1.commit();
queryAndCompare(params("q", "*:*", "sort","_version_ desc"), client0, client1);
qacResponse = queryAndCompare(params("q", "*:*", "rows", "10000", "sort","_version_ desc"), client0, client1);
validateQACResponse(docsAdded, qacResponse);
// test that delete by query is returned even if not requested, and that it doesn't delete newer stuff than it should
v=2000;
@ -133,6 +162,7 @@ public class PeerSyncTest extends BaseDistributedSearchTestCase {
delQ(client, params(DISTRIB_UPDATE_PARAM,FROM_LEADER,"_version_",Long.toString(-++v)), "id:2001 OR id:2002");
add(client, seenLeader, sdoc("id","2002","_version_",++v));
del(client, params(DISTRIB_UPDATE_PARAM,FROM_LEADER,"_version_",Long.toString(-++v)), "2000");
docsAdded.add(2002); // 2002 added
v=2000;
client = client1;
@ -144,7 +174,9 @@ public class PeerSyncTest extends BaseDistributedSearchTestCase {
del(client, params(DISTRIB_UPDATE_PARAM,FROM_LEADER,"_version_",Long.toString(-++v)), "2000");
assertSync(client1, numVersions, true, shardsArr[0]);
client0.commit(); client1.commit(); queryAndCompare(params("q", "*:*", "sort","_version_ desc"), client0, client1);
client0.commit(); client1.commit();
qacResponse = queryAndCompare(params("q", "*:*", "rows", "10000", "sort","_version_ desc"), client0, client1);
validateQACResponse(docsAdded, qacResponse);
//
// Test that handling reorders work when applying docs retrieved from peer
@ -155,6 +187,7 @@ public class PeerSyncTest extends BaseDistributedSearchTestCase {
add(client0, seenLeader, sdoc("id","3000","_version_",3001));
add(client1, seenLeader, sdoc("id","3000","_version_",3001));
del(client0, params(DISTRIB_UPDATE_PARAM,FROM_LEADER,"_version_","3000"), "3000");
docsAdded.add(3000);
// this should cause us to retrieve an add tha was previously deleted
add(client0, seenLeader, sdoc("id","3001","_version_",3003));
@ -165,17 +198,23 @@ public class PeerSyncTest extends BaseDistributedSearchTestCase {
add(client0, seenLeader, sdoc("id","3002","_version_",3004));
add(client0, seenLeader, sdoc("id","3002","_version_",3005));
add(client1, seenLeader, sdoc("id","3002","_version_",3005));
docsAdded.add(3001); // 3001 added
docsAdded.add(3002); // 3002 added
assertSync(client1, numVersions, true, shardsArr[0]);
client0.commit(); client1.commit(); queryAndCompare(params("q", "*:*", "sort","_version_ desc"), client0, client1);
client0.commit(); client1.commit();
qacResponse = queryAndCompare(params("q", "*:*", "rows", "10000", "sort","_version_ desc"), client0, client1);
validateQACResponse(docsAdded, qacResponse);
// now lets check fingerprinting causes appropriate fails
v = 4000;
add(client0, seenLeader, sdoc("id",Integer.toString((int)v),"_version_",v));
docsAdded.add(4000);
toAdd = numVersions+10;
for (int i=0; i<toAdd; i++) {
add(client0, seenLeader, sdoc("id",Integer.toString((int)v+i+1),"_version_",v+i+1));
add(client1, seenLeader, sdoc("id",Integer.toString((int)v+i+1),"_version_",v+i+1));
docsAdded.add((int)v+i+1);
}
// client0 now has an additional add beyond our window and the fingerprint should cause this to fail
@ -199,8 +238,80 @@ public class PeerSyncTest extends BaseDistributedSearchTestCase {
}
assertSync(client1, numVersions, true, shardsArr[0]);
}
client0.commit(); client1.commit();
qacResponse = queryAndCompare(params("q", "*:*", "rows", "10000", "sort","_version_ desc"), client0, client1);
validateQACResponse(docsAdded, qacResponse);
// lets add some in-place updates
add(client0, seenLeader, sdoc("id", "5000", "val_i_dvo", 0, "title", "mytitle", "_version_", 5000)); // full update
docsAdded.add(5000);
assertSync(client1, numVersions, true, shardsArr[0]);
// verify the in-place updated document (id=5000) has correct fields
assertEquals(0, client1.getById("5000").get("val_i_dvo"));
assertEquals(client0.getById("5000")+" and "+client1.getById("5000"),
"mytitle", client1.getById("5000").getFirstValue("title"));
ModifiableSolrParams inPlaceParams = new ModifiableSolrParams(seenLeader);
inPlaceParams.set(DistributedUpdateProcessor.DISTRIB_INPLACE_PREVVERSION, "5000");
add(client0, inPlaceParams, sdoc("id", "5000", "val_i_dvo", 1, "_version_", 5001)); // in-place update
assertSync(client1, numVersions, true, shardsArr[0]);
// verify the in-place updated document (id=5000) has correct fields
assertEquals(1, client1.getById("5000").get("val_i_dvo"));
assertEquals(client0.getById("5000")+" and "+client1.getById("5000"),
"mytitle", client1.getById("5000").getFirstValue("title"));
// interleave the in-place updates with a few deletes to other documents
del(client0, params(DISTRIB_UPDATE_PARAM,FROM_LEADER,"_version_","5002"), 4001);
delQ(client0, params(DISTRIB_UPDATE_PARAM,FROM_LEADER,"_version_","5003"), "id:4002");
docsAdded.remove(4001);
docsAdded.remove(4002);
inPlaceParams.set(DistributedUpdateProcessor.DISTRIB_INPLACE_PREVVERSION, "5001");
add(client0, inPlaceParams, sdoc("id", 5000, "val_i_dvo", 2, "_version_", 5004)); // in-place update
assertSync(client1, numVersions, true, shardsArr[0]);
// verify the in-place updated document (id=5000) has correct fields
assertEquals(2, client1.getById("5000").get("val_i_dvo"));
assertEquals(client0.getById("5000")+" and "+client1.getById("5000"),
"mytitle", client1.getById("5000").getFirstValue("title"));
// a DBQ with value
delQ(client0, params(DISTRIB_UPDATE_PARAM,FROM_LEADER,"_version_","5005"), "val_i_dvo:1"); // current val is 2, so this should not delete anything
assertSync(client1, numVersions, true, shardsArr[0]);
boolean deleteTheUpdatedDocument = random().nextBoolean();
if (deleteTheUpdatedDocument) { // if doc with id=5000 is deleted, further in-place-updates should fail
delQ(client0, params(DISTRIB_UPDATE_PARAM,FROM_LEADER,"_version_","5006"), "val_i_dvo:2"); // current val is 2, this will delete id=5000
assertSync(client1, numVersions, true, shardsArr[0]);
SolrException ex = expectThrows(SolrException.class, () -> {
inPlaceParams.set(DistributedUpdateProcessor.DISTRIB_INPLACE_PREVVERSION, "5004");
add(client0, inPlaceParams, sdoc("id", 5000, "val_i_dvo", 3, "_version_", 5007));
});
assertEquals(ex.toString(), SolrException.ErrorCode.SERVER_ERROR.code, ex.code());
assertThat(ex.getMessage(), containsString("Can't find document with id=5000"));
} else {
inPlaceParams.set(DistributedUpdateProcessor.DISTRIB_INPLACE_PREVVERSION, "5004");
add(client0, inPlaceParams, sdoc("id", 5000, "val_i_dvo", 3, "_version_", 5006));
assertSync(client1, numVersions, true, shardsArr[0]);
// verify the in-place updated document (id=5000) has correct fields
assertEquals(3, client1.getById("5000").get("val_i_dvo"));
assertEquals(client0.getById("5000")+" and "+client1.getById("5000"),
"mytitle", client1.getById("5000").getFirstValue("title"));
if (random().nextBoolean()) {
client0.commit(); client1.commit();
qacResponse = queryAndCompare(params("q", "*:*", "rows", "10000", "sort","_version_ desc"), client0, client1);
validateQACResponse(docsAdded, qacResponse);
}
del(client0, params(DISTRIB_UPDATE_PARAM,FROM_LEADER,"_version_","5007"), 5000);
docsAdded.remove(5000);
assertSync(client1, numVersions, true, shardsArr[0]);
client0.commit(); client1.commit();
qacResponse = queryAndCompare(params("q", "*:*", "rows", "10000", "sort","_version_ desc"), client0, client1);
validateQACResponse(docsAdded, qacResponse);
}
}
void assertSync(SolrClient client, int numVersions, boolean expectedResult, String... syncWith) throws IOException, SolrServerException {
QueryRequest qr = new QueryRequest(params("qt","/get", "getVersions",Integer.toString(numVersions), "sync", StrUtils.join(Arrays.asList(syncWith), ',')));
@ -208,4 +319,13 @@ public class PeerSyncTest extends BaseDistributedSearchTestCase {
assertEquals(expectedResult, (Boolean) rsp.get("sync"));
}
void validateQACResponse(Set<Integer> docsAdded, QueryResponse qacResponse) {
Set<Integer> qacDocs = new LinkedHashSet<>();
for (int i=0; i<qacResponse.getResults().size(); i++) {
qacDocs.add(Integer.parseInt(qacResponse.getResults().get(i).getFieldValue("id").toString()));
}
assertEquals(docsAdded, qacDocs);
assertEquals(docsAdded.size(), qacResponse.getResults().getNumFound());
}
}

View File

@ -99,8 +99,8 @@ public class SolrIndexConfigTest extends SolrTestCaseJ4 {
}
public void testSortingMPSolrIndexConfigCreation() throws Exception {
final String expectedFieldName = "timestamp";
final SortField.Type expectedFieldType = SortField.Type.LONG;
final String expectedFieldName = "timestamp_i_dvo";
final SortField.Type expectedFieldType = SortField.Type.INT;
final boolean expectedFieldSortDescending = true;
SolrConfig solrConfig = new SolrConfig(instanceDir, solrConfigFileNameSortingMergePolicyFactory, null);

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -32,7 +32,7 @@ public class TestUpdate extends SolrTestCaseJ4 {
}
@Test
public void testUpdateableDocs() throws Exception {
public void testUpdatableDocs() throws Exception {
// The document may be retrieved from the index or from the transaction log.
// Test both by running the same test with and without commits

View File

@ -0,0 +1,271 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.update;
import java.util.List;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.util.BytesRef;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.handler.component.RealTimeGetComponent;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.update.processor.DistributedUpdateProcessor;
import org.junit.After;
import org.junit.BeforeClass;
import org.junit.Test;
import static org.junit.internal.matchers.StringContains.containsString;
public class UpdateLogTest extends SolrTestCaseJ4 {
/** BytesRef that can be re-used to lookup doc with id "1" */
private static final BytesRef DOC_1_INDEXED_ID = new BytesRef("1");
static UpdateLog ulog = null;
@BeforeClass
public static void beforeClass() throws Exception {
System.setProperty("solr.tests.intClassName", random().nextBoolean()? "TrieIntField": "IntPointField");
System.setProperty("solr.tests.longClassName", random().nextBoolean()? "TrieLongField": "LongPointField");
System.setProperty("solr.tests.floatClassName", random().nextBoolean()? "TrieFloatField": "FloatPointField");
System.setProperty("solr.tests.doubleClassName", random().nextBoolean()? "TrieDoubleField": "DoublePointField");
initCore("solrconfig-tlog.xml", "schema-inplace-updates.xml");
try (SolrQueryRequest req = req()) {
UpdateHandler uhandler = req.getCore().getUpdateHandler();
((DirectUpdateHandler2) uhandler).getCommitTracker().setTimeUpperBound(100);
((DirectUpdateHandler2) uhandler).getCommitTracker().setOpenSearcher(false);
ulog = uhandler.getUpdateLog();
}
}
@After
public void after() {
System.clearProperty("solr.tests.intClassName");
System.clearProperty("solr.tests.longClassName");
System.clearProperty("solr.tests.floatClassName");
System.clearProperty("solr.tests.doubleClassName");
}
@Test
/**
* @see org.apache.solr.update.UpdateLog#applyPartialUpdates(BytesRef,long,long,SolrDocumentBase)
*/
public void testApplyPartialUpdatesOnMultipleInPlaceUpdatesInSequence() {
// Add a full update, two in-place updates and verify applying partial updates is working
ulogAdd(ulog, null, sdoc("id", "1", "title_s", "title1", "val1_i_dvo", "1", "_version_", "100"));
ulogAdd(ulog, 100L, sdoc("id", "1", "price", "1000", "val1_i_dvo", "2", "_version_", "101"));
ulogAdd(ulog, 101L, sdoc("id", "1", "val1_i_dvo", "3", "_version_", "102"));
Object partialUpdate = ulog.lookup(DOC_1_INDEXED_ID);
SolrDocument partialDoc = RealTimeGetComponent.toSolrDoc((SolrInputDocument)((List)partialUpdate).get(4),
h.getCore().getLatestSchema());
long prevVersion = (Long)((List)partialUpdate).get(3);
long prevPointer = (Long)((List)partialUpdate).get(2);
assertEquals(3L, ((NumericDocValuesField)partialDoc.getFieldValue("val1_i_dvo")).numericValue());
assertFalse(partialDoc.containsKey("title_s"));
long returnVal = ulog.applyPartialUpdates(DOC_1_INDEXED_ID, prevPointer, prevVersion, null, partialDoc);
assertEquals(0, returnVal);
assertEquals(1000, Integer.parseInt(partialDoc.getFieldValue("price").toString()));
assertEquals(3L, ((NumericDocValuesField)partialDoc.getFieldValue("val1_i_dvo")).numericValue());
assertEquals("title1", partialDoc.getFieldValue("title_s"));
// Add a full update, commit, then two in-place updates, and verify that applying partial updates is working (since
// the prevTlog and prevTlog2 are retained after a commit
ulogCommit(ulog);
if (random().nextBoolean()) { // sometimes also try a second commit
ulogCommit(ulog);
}
ulogAdd(ulog, 102L, sdoc("id", "1", "price", "2000", "val1_i_dvo", "4", "_version_", "200"));
ulogAdd(ulog, 200L, sdoc("id", "1", "val1_i_dvo", "5", "_version_", "201"));
partialUpdate = ulog.lookup(DOC_1_INDEXED_ID);
partialDoc = RealTimeGetComponent.toSolrDoc((SolrInputDocument)((List)partialUpdate).get(4), h.getCore().getLatestSchema());
prevVersion = (Long)((List)partialUpdate).get(3);
prevPointer = (Long)((List)partialUpdate).get(2);
assertEquals(5L, ((NumericDocValuesField)partialDoc.getFieldValue("val1_i_dvo")).numericValue());
assertFalse(partialDoc.containsKey("title_s"));
returnVal = ulog.applyPartialUpdates(DOC_1_INDEXED_ID, prevPointer, prevVersion, null, partialDoc);
assertEquals(0, returnVal);
assertEquals(2000, Integer.parseInt(partialDoc.getFieldValue("price").toString()));
assertEquals(5L, ((NumericDocValuesField)partialDoc.getFieldValue("val1_i_dvo")).numericValue());
assertEquals("title1", partialDoc.getFieldValue("title_s"));
}
@Test
public void testApplyPartialUpdatesAfterMultipleCommits() {
ulogAdd(ulog, null, sdoc("id", "1", "title_s", "title1", "val1_i_dvo", "1", "_version_", "100"));
ulogAdd(ulog, 100L, sdoc("id", "1", "price", "1000", "val1_i_dvo", "2", "_version_", "101"));
ulogAdd(ulog, 101L, sdoc("id", "1", "val1_i_dvo", "3", "_version_", "102"));
// Do 3 commits, then in-place update, and verify that applying partial updates can't find full doc
for (int i=0; i<3; i++)
ulogCommit(ulog);
ulogAdd(ulog, 101L, sdoc("id", "1", "val1_i_dvo", "6", "_version_", "300"));
Object partialUpdate = ulog.lookup(DOC_1_INDEXED_ID);
SolrDocument partialDoc = RealTimeGetComponent.toSolrDoc((SolrInputDocument)((List)partialUpdate).get(4), h.getCore().getLatestSchema());
long prevVersion = (Long)((List)partialUpdate).get(3);
long prevPointer = (Long)((List)partialUpdate).get(2);
assertEquals(6L, ((NumericDocValuesField)partialDoc.getFieldValue("val1_i_dvo")).numericValue());
assertFalse(partialDoc.containsKey("title_s"));
long returnVal = ulog.applyPartialUpdates(DOC_1_INDEXED_ID, prevPointer, prevVersion, null, partialDoc);
assertEquals(-1, returnVal);
}
@Test
public void testApplyPartialUpdatesDependingOnNonAddShouldThrowException() {
ulogAdd(ulog, null, sdoc("id", "1", "title_s", "title1", "val1_i_dvo", "1", "_version_", "100"));
ulogDelete(ulog, "1", 500L, false); // dbi
ulogAdd(ulog, 500L, sdoc("id", "1", "val1_i_dvo", "2", "_version_", "501"));
ulogAdd(ulog, 501L, sdoc("id", "1", "val1_i_dvo", "3", "_version_", "502"));
Object partialUpdate = ulog.lookup(DOC_1_INDEXED_ID);
SolrDocument partialDoc = RealTimeGetComponent.toSolrDoc((SolrInputDocument)((List)partialUpdate).get(4), h.getCore().getLatestSchema());
long prevVersion = (Long)((List)partialUpdate).get(3);
long prevPointer = (Long)((List)partialUpdate).get(2);
assertEquals(3L, ((NumericDocValuesField)partialDoc.getFieldValue("val1_i_dvo")).numericValue());
assertEquals(502L, ((NumericDocValuesField)partialDoc.getFieldValue("_version_")).numericValue());
assertFalse(partialDoc.containsKey("title_s"));
// If an in-place update depends on a non-add (i.e. DBI), assert that an exception is thrown.
SolrException ex = expectThrows(SolrException.class, () -> {
long returnVal = ulog.applyPartialUpdates(DOC_1_INDEXED_ID, prevPointer, prevVersion, null, partialDoc);
fail("502 depends on 501, 501 depends on 500, but 500 is a"
+ " DELETE. This should've generated an exception. returnVal is: "+returnVal);
});
assertEquals(ex.toString(), SolrException.ErrorCode.INVALID_STATE.code, ex.code());
assertThat(ex.getMessage(), containsString("should've been either ADD or UPDATE_INPLACE"));
assertThat(ex.getMessage(), containsString("looking for id=1"));
}
@Test
public void testApplyPartialUpdatesWithDelete() throws Exception {
ulogAdd(ulog, null, sdoc("id", "1", "title_s", "title1", "val1_i_dvo", "1", "_version_", "100"));
ulogAdd(ulog, 100L, sdoc("id", "1", "val1_i_dvo", "2", "_version_", "101")); // in-place update
ulogAdd(ulog, 101L, sdoc("id", "1", "val1_i_dvo", "3", "_version_", "102")); // in-place update
// sanity check that the update log has one document, and RTG returns the document
assertEquals(1, ulog.map.size());
assertJQ(req("qt","/get", "id","1")
, "=={'doc':{ 'id':'1', 'val1_i_dvo':3, '_version_':102, 'title_s':'title1', "
// fields with default values
+ "'inplace_updatable_int_with_default':666, 'inplace_updatable_float_with_default':42.0}}");
boolean dbq = random().nextBoolean();
ulogDelete(ulog, "1", 200L, dbq); // delete id:1 document
if (dbq) {
assertNull(ulog.lookup(DOC_1_INDEXED_ID)); // any DBQ clears out the ulog, so this document shouldn't exist
assertEquals(0, ulog.map.size());
assertTrue(String.valueOf(ulog.prevMap), ulog.prevMap == null || ulog.prevMap.size() == 0);
assertTrue(String.valueOf(ulog.prevMap2), ulog.prevMap2 == null || ulog.prevMap2.size() == 0);
// verify that the document is deleted, by doing an RTG call
assertJQ(req("qt","/get", "id","1"), "=={'doc':null}");
} else { // dbi
List entry = ((List)ulog.lookup(DOC_1_INDEXED_ID));
assertEquals(UpdateLog.DELETE, (int)entry.get(UpdateLog.FLAGS_IDX) & UpdateLog.OPERATION_MASK);
}
}
/**
* Simulate a commit on a given updateLog
*/
private static void ulogCommit(UpdateLog ulog) {
try (SolrQueryRequest req = req()) {
CommitUpdateCommand commitCmd = new CommitUpdateCommand(req, false);
ulog.preCommit(commitCmd);
ulog.postCommit(commitCmd);
}
}
/**
* Simulate a delete on a given updateLog
*
* @param ulog The UpdateLog to apply a delete against
* @param id of document to be deleted
* @param version Version to use on the DeleteUpdateCommand
* @param dbq if true, an <code>id:$id</code> DBQ will used, instead of delete by id
*/
private static void ulogDelete(UpdateLog ulog, String id, long version, boolean dbq) {
try (SolrQueryRequest req = req()) {
DeleteUpdateCommand cmd = new DeleteUpdateCommand(req);
cmd.setVersion(version);
if (dbq) {
cmd.query = ("id:"+id);
ulog.deleteByQuery(cmd);
} else {
cmd.id = id;
ulog.delete(cmd);
}
}
}
/**
* Simulate an add on a given updateLog.
* <p>
* This method, when prevVersion is passed in (i.e. for in-place update), represents an
* AddUpdateCommand that has undergone the merge process and inc/set operations have now been
* converted into actual values that just need to be written.
* </p>
* <p>
* NOTE: For test simplicity, the Solr input document must include the <code>_version_</code> field.
* </p>
*
* @param ulog The UpdateLog to apply a delete against
* @param prevVersion If non-null, then this AddUpdateCommand represents an in-place update.
* @param sdoc The document to use for the add.
* @see #buildAddUpdateCommand
*/
private static void ulogAdd(UpdateLog ulog, Long prevVersion, SolrInputDocument sdoc) {
try (SolrQueryRequest req = req()) {
AddUpdateCommand cmd = buildAddUpdateCommand(req, sdoc);
if (prevVersion != null) {
cmd.prevVersion = prevVersion;
}
ulog.add(cmd);
}
}
/**
* Helper method to construct an <code>AddUpdateCommand</code> for a <code>SolrInputDocument</code>
* in the context of the specified <code>SolrQueryRequest</code>.
*
* NOTE: For test simplicity, the Solr input document must include the <code>_version_</code> field.
*/
public static AddUpdateCommand buildAddUpdateCommand(final SolrQueryRequest req, final SolrInputDocument sdoc) {
AddUpdateCommand cmd = new AddUpdateCommand(req);
cmd.solrDoc = sdoc;
assertTrue("", cmd.solrDoc.containsKey(DistributedUpdateProcessor.VERSION_FIELD));
cmd.setVersion(Long.parseLong(cmd.solrDoc.getFieldValue(DistributedUpdateProcessor.VERSION_FIELD).toString()));
return cmd;
}
}

View File

@ -17,6 +17,7 @@
package org.apache.solr.update.processor;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.List;
@ -1135,4 +1136,150 @@ public class AtomicUpdatesTest extends SolrTestCaseJ4 {
assertQ(req("q", "cat:ccc", "indent", "true"), "//result[@numFound = '1']");
}
public void testFieldsWithDefaultValuesWhenAtomicUpdatesAgainstTlog() {
for (String fieldToUpdate : Arrays.asList("field_to_update_i1", "field_to_update_i_dvo")) {
clearIndex();
assertU(adoc(sdoc("id", "7", fieldToUpdate, "666")));
assertQ(fieldToUpdate + ": initial RTG"
, req("qt", "/get", "id", "7")
, "count(//doc)=1"
, "//doc/int[@name='id'][.='7']"
, "//doc/int[@name='"+fieldToUpdate+"'][.='666']"
, "//doc/int[@name='intDefault'][.='42']"
, "//doc/int[@name='intDvoDefault'][.='42']"
, "//doc/long[@name='_version_']"
, "//doc/date[@name='timestamp']"
, "//doc/arr[@name='multiDefault']/str[.='muLti-Default']"
, "count(//doc/*)=7"
);
// do atomic update
assertU(adoc(sdoc("id", "7", fieldToUpdate, ImmutableMap.of("inc", -555))));
assertQ(fieldToUpdate + ": RTG after atomic update"
, req("qt", "/get", "id", "7")
, "count(//doc)=1"
, "//doc/int[@name='id'][.='7']"
, "//doc/int[@name='"+fieldToUpdate+"'][.='111']"
, "//doc/int[@name='intDefault'][.='42']"
, "//doc/int[@name='intDvoDefault'][.='42']"
, "//doc/long[@name='_version_']"
, "//doc/date[@name='timestamp']"
, "//doc/arr[@name='multiDefault']/str[.='muLti-Default']"
, "count(//doc/*)=7"
);
assertU(commit());
assertQ(fieldToUpdate + ": post commit RTG"
, req("qt", "/get", "id", "7")
, "count(//doc)=1"
, "//doc/int[@name='id'][.='7']"
, "//doc/int[@name='"+fieldToUpdate+"'][.='111']"
, "//doc/int[@name='intDefault'][.='42']"
, "//doc/int[@name='intDvoDefault'][.='42']"
, "//doc/long[@name='_version_']"
, "//doc/date[@name='timestamp']"
, "//doc/arr[@name='multiDefault']/str[.='muLti-Default']"
, "count(//doc/*)=7"
);
}
}
@AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/SOLR-9838")
public void testAtomicUpdateOfFieldsWithDefaultValue() {
// both fields have the same default value (42)
for (String fieldToUpdate : Arrays.asList("intDefault", "intDvoDefault")) {
clearIndex();
// doc where we immediately attempt to inc the default value
assertU(adoc(sdoc("id", "7", fieldToUpdate, ImmutableMap.of("inc", "666"))));
assertQ(fieldToUpdate + ": initial RTG#7"
, req("qt", "/get", "id", "7")
, "count(//doc)=1"
, "//doc/int[@name='id'][.='7']"
, "//doc/int[@name='"+fieldToUpdate+"'][.='708']"
// whichever field we did *NOT* update
, "//doc/int[@name!='"+fieldToUpdate+"'][.='42']"
, "//doc/long[@name='_version_']"
, "//doc/date[@name='timestamp']"
, "//doc/arr[@name='multiDefault']/str[.='muLti-Default']"
, "count(//doc/*)=6"
);
// do atomic update
assertU(adoc(sdoc("id", "7", fieldToUpdate, ImmutableMap.of("inc", -555))));
assertQ(fieldToUpdate + ": RTG#7 after atomic update"
, req("qt", "/get", "id", "7")
, "count(//doc)=1"
, "//doc/int[@name='id'][.='7']"
, "//doc/int[@name='"+fieldToUpdate+"'][.='153']"
// whichever field we did *NOT* update
, "//doc/int[@name!='"+fieldToUpdate+"'][.='42']"
, "//doc/long[@name='_version_']"
, "//doc/date[@name='timestamp']"
, "//doc/arr[@name='multiDefault']/str[.='muLti-Default']"
, "count(//doc/*)=6"
);
// diff doc where we check that we can overwrite the default value
assertU(adoc(sdoc("id", "8", fieldToUpdate, ImmutableMap.of("set", "666"))));
assertQ(fieldToUpdate + ": initial RTG#8"
, req("qt", "/get", "id", "8")
, "count(//doc)=1"
, "//doc/int[@name='id'][.='8']"
, "//doc/int[@name='"+fieldToUpdate+"'][.='666']"
// whichever field we did *NOT* update
, "//doc/int[@name!='"+fieldToUpdate+"'][.='42']"
, "//doc/long[@name='_version_']"
, "//doc/date[@name='timestamp']"
, "//doc/arr[@name='multiDefault']/str[.='muLti-Default']"
, "count(//doc/*)=6"
);
// do atomic update
assertU(adoc(sdoc("id", "7", fieldToUpdate, ImmutableMap.of("inc", -555))));
assertQ(fieldToUpdate + ": RTG after atomic update"
, req("qt", "/get", "id", "8")
, "count(//doc)=1"
, "//doc/int[@name='id'][.='8']"
, "//doc/int[@name='"+fieldToUpdate+"'][.='111']"
// whichever field we did *NOT* update
, "//doc/int[@name!='"+fieldToUpdate+"'][.='42']"
, "//doc/long[@name='_version_']"
, "//doc/date[@name='timestamp']"
, "//doc/arr[@name='multiDefault']/str[.='muLti-Default']"
, "count(//doc/*)=6"
);
assertU(commit());
assertQ(fieldToUpdate + ": doc7 post commit RTG"
, req("qt", "/get", "id", "7")
, "count(//doc)=1"
, "//doc/int[@name='id'][.='7']"
, "//doc/int[@name='"+fieldToUpdate+"'][.='153']"
// whichever field we did *NOT* update
, "//doc/int[@name!='"+fieldToUpdate+"'][.='42']"
, "//doc/long[@name='_version_']"
, "//doc/date[@name='timestamp']"
, "//doc/arr[@name='multiDefault']/str[.='muLti-Default']"
, "count(//doc/*)=6"
);
assertQ(fieldToUpdate + ": doc8 post commit RTG"
, req("qt", "/get", "id", "8")
, "count(//doc)=1"
, "//doc/int[@name='id'][.='8']"
, "//doc/int[@name='"+fieldToUpdate+"'][.='111']"
// whichever field we did *NOT* update
, "//doc/int[@name!='"+fieldToUpdate+"'][.='42']"
, "//doc/long[@name='_version_']"
, "//doc/date[@name='timestamp']"
, "//doc/arr[@name='multiDefault']/str[.='muLti-Default']"
, "count(//doc/*)=6"
);
}
}
}

View File

@ -189,7 +189,7 @@ public class JavaBinCodec implements PushWriter {
public SimpleOrderedMap<Object> readOrderedMap(DataInputInputStream dis) throws IOException {
int sz = readSize(dis);
SimpleOrderedMap<Object> nl = new SimpleOrderedMap<>();
SimpleOrderedMap<Object> nl = new SimpleOrderedMap<>(sz);
for (int i = 0; i < sz; i++) {
String name = (String) readVal(dis);
Object val = readVal(dis);
@ -200,7 +200,7 @@ public class JavaBinCodec implements PushWriter {
public NamedList<Object> readNamedList(DataInputInputStream dis) throws IOException {
int sz = readSize(dis);
NamedList<Object> nl = new NamedList<>();
NamedList<Object> nl = new NamedList<>(sz);
for (int i = 0; i < sz; i++) {
String name = (String) readVal(dis);
Object val = readVal(dis);
@ -512,7 +512,7 @@ public class JavaBinCodec implements PushWriter {
public SolrDocument readSolrDocument(DataInputInputStream dis) throws IOException {
tagByte = dis.readByte();
int size = readSize(dis);
SolrDocument doc = new SolrDocument();
SolrDocument doc = new SolrDocument(new LinkedHashMap<>(size));
for (int i = 0; i < size; i++) {
String fieldName;
Object obj = readVal(dis); // could be a field name, or a child document
@ -555,7 +555,7 @@ public class JavaBinCodec implements PushWriter {
public SolrInputDocument readSolrInputDocument(DataInputInputStream dis) throws IOException {
int sz = readVInt(dis);
float docBoost = (Float)readVal(dis);
SolrInputDocument sdoc = new SolrInputDocument();
SolrInputDocument sdoc = new SolrInputDocument(new LinkedHashMap<>(sz));
sdoc.setDocumentBoost(docBoost);
for (int i = 0; i < sz; i++) {
float boost = 1.0f;
@ -610,7 +610,7 @@ public class JavaBinCodec implements PushWriter {
public Map<Object,Object> readMap(DataInputInputStream dis)
throws IOException {
int sz = readVInt(dis);
Map<Object,Object> m = new LinkedHashMap<>();
Map<Object,Object> m = new LinkedHashMap<>(sz);
for (int i = 0; i < sz; i++) {
Object key = readVal(dis);
Object val = readVal(dis);

View File

@ -88,6 +88,7 @@ var getPluginTypes = function(data, selected) {
var key = mbeans[i];
var lower = key.toLowerCase();
var plugins = getPlugins(mbeans[i+1]);
if (plugins.length == 0) continue;
keys.push({name: key,
selected: lower == selected,
changes: 0,