SOLR-10832: Fixed VersionInfo.getMaxVersionFromIndex when using PointsField with indexed="true"

This commit is contained in:
Chris Hostetter 2017-06-15 15:53:48 -07:00
parent 5c498b411d
commit 274971c3e3
6 changed files with 156 additions and 82 deletions

View File

@ -386,6 +386,8 @@ Bug Fixes
Trie numeric fields should throw SolrException(BAD_REQUEST) for malformed docValues range queries.
(hossman, Tomás Fernández Löbbe)
* SOLR-10832: Fixed VersionInfo.getMaxVersionFromIndex when using PointsField with indexed="true" (hossman)
Optimizations
----------------------
* SOLR-10634: JSON Facet API: When a field/terms facet will retrieve all buckets (i.e. limit:-1)

View File

@ -1999,19 +1999,17 @@ public class UpdateLog implements PluginInfoInitialized, SolrMetricProducer {
// this method is primarily used for unit testing and is not part of the public API for this class
Long getMaxVersionFromIndex() {
if (maxVersionFromIndex == null && versionInfo != null) {
RefCounted<SolrIndexSearcher> newestSearcher = (uhandler != null && uhandler.core != null)
? uhandler.core.getRealtimeSearcher() : null;
if (newestSearcher == null)
throw new IllegalStateException("No searcher available to lookup max version from index!");
try {
maxVersionFromIndex = seedBucketsWithHighestVersion(newestSearcher.get(), versionInfo);
} finally {
newestSearcher.decref();
}
RefCounted<SolrIndexSearcher> newestSearcher = (uhandler != null && uhandler.core != null)
? uhandler.core.getRealtimeSearcher() : null;
if (newestSearcher == null)
throw new IllegalStateException("No searcher available to lookup max version from index!");
try {
seedBucketsWithHighestVersion(newestSearcher.get());
return getCurrentMaxVersion();
} finally {
newestSearcher.decref();
}
return maxVersionFromIndex;
}
/**

View File

@ -24,6 +24,7 @@ import java.util.concurrent.locks.ReentrantReadWriteLock;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.PointValues;
import org.apache.lucene.index.Terms;
import org.apache.solr.legacy.LegacyNumericUtils;
import org.apache.lucene.queries.function.FunctionValues;
@ -228,37 +229,32 @@ public class VersionInfo {
*/
public Long getMaxVersionFromIndex(IndexSearcher searcher) throws IOException {
String versionFieldName = versionField.getName();
final String versionFieldName = versionField.getName();
log.debug("Refreshing highest value of {} for {} version buckets from index", versionFieldName, buckets.length);
long maxVersionInIndex = 0L;
// if indexed, then we have terms to get the max from
if (versionField.indexed()) {
LeafReader leafReader = SlowCompositeReaderWrapper.wrap(searcher.getIndexReader());
Terms versionTerms = leafReader.terms(versionFieldName);
Long max = (versionTerms != null) ? LegacyNumericUtils.getMaxLong(versionTerms) : null;
if (max != null) {
maxVersionInIndex = max.longValue();
log.debug("Found MAX value {} from Terms for {} in index", maxVersionInIndex, versionFieldName);
if (versionField.getType().isPointField()) {
return getMaxVersionFromIndexedPoints(searcher);
} else {
log.debug("No terms found for {}, cannot seed version bucket highest value from index", versionFieldName);
}
} else {
ValueSource vs = versionField.getType().getValueSource(versionField, null);
Map funcContext = ValueSource.newContext(searcher);
vs.createWeight(funcContext, searcher);
// TODO: multi-thread this
for (LeafReaderContext ctx : searcher.getTopReaderContext().leaves()) {
int maxDoc = ctx.reader().maxDoc();
FunctionValues fv = vs.getValues(funcContext, ctx);
for (int doc = 0; doc < maxDoc; doc++) {
long v = fv.longVal(doc);
maxVersionInIndex = Math.max(v, maxVersionInIndex);
}
return getMaxVersionFromIndexedTerms(searcher);
}
}
// else: not indexed, use docvalues via value source ...
long maxVersionInIndex = 0L;
ValueSource vs = versionField.getType().getValueSource(versionField, null);
Map funcContext = ValueSource.newContext(searcher);
vs.createWeight(funcContext, searcher);
// TODO: multi-thread this
for (LeafReaderContext ctx : searcher.getTopReaderContext().leaves()) {
int maxDoc = ctx.reader().maxDoc();
FunctionValues fv = vs.getValues(funcContext, ctx);
for (int doc = 0; doc < maxDoc; doc++) {
long v = fv.longVal(doc);
maxVersionInIndex = Math.max(v, maxVersionInIndex);
}
}
return maxVersionInIndex;
}
@ -271,4 +267,38 @@ public class VersionInfo {
}
}
}
private long getMaxVersionFromIndexedTerms(IndexSearcher searcher) throws IOException {
assert ! versionField.getType().isPointField();
final String versionFieldName = versionField.getName();
final LeafReader leafReader = SlowCompositeReaderWrapper.wrap(searcher.getIndexReader());
final Terms versionTerms = leafReader.terms(versionFieldName);
final Long max = (versionTerms != null) ? LegacyNumericUtils.getMaxLong(versionTerms) : null;
if (null != max) {
log.debug("Found MAX value {} from Terms for {} in index", max, versionFieldName);
return max.longValue();
}
return 0L;
}
private long getMaxVersionFromIndexedPoints(IndexSearcher searcher) throws IOException {
assert versionField.getType().isPointField();
final String versionFieldName = versionField.getName();
final byte[] maxBytes = PointValues.getMaxPackedValue(searcher.getIndexReader(), versionFieldName);
if (null == maxBytes) {
return 0L;
}
final Object maxObj = versionField.getType().toObject(versionField, new BytesRef(maxBytes));
if (null == maxObj || ! ( maxObj instanceof Number) ) {
// HACK: aparently nothing asserts that the FieldType is numeric (let alone a Long???)
log.error("Unable to convert MAX byte[] from Points for {} in index", versionFieldName);
return 0L;
}
final long max = ((Number)maxObj).longValue();
log.debug("Found MAX value {} from Points for {} in index", max, versionFieldName);
return max;
}
}

View File

@ -16,7 +16,7 @@
limitations under the License.
-->
<schema name="example" version="1.6">
<field name="_version_" type="long" indexed="false" stored="true" docValues="true"/>
<field name="_version_" type="${solr.tests.longClass:plong}" indexed="false" docValues="true" stored="true"/>
<field name="id" type="string" indexed="true" stored="true" required="true" multiValued="false"/>
<field name="text" type="text_general" indexed="true" stored="false" multiValued="true"/>
<field name="signatureField" type="string" indexed="true" stored="false"/>
@ -24,6 +24,7 @@
<uniqueKey>id</uniqueKey>
<fieldType name="string" class="solr.StrField" sortMissingLast="true"/>
<fieldType name="long" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0"/>
<fieldType name="plong" class="solr.LongPointField" positionIncrementGap="0"/>
<fieldType name="text_general" class="solr.TextField" positionIncrementGap="100">
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>

View File

@ -16,7 +16,7 @@
limitations under the License.
-->
<schema name="example" version="1.6">
<field name="_version_" type="long" indexed="true" stored="true"/>
<field name="_version_" type="${solr.tests.longClass:plong}" indexed="true" docValues="false" stored="true" />
<field name="id" type="string" indexed="true" stored="true" required="true" multiValued="false"/>
<field name="text" type="text_general" indexed="true" stored="false" multiValued="true"/>
<field name="signatureField" type="string" indexed="true" stored="false"/>
@ -24,6 +24,7 @@
<uniqueKey>id</uniqueKey>
<fieldType name="string" class="solr.StrField" sortMissingLast="true"/>
<fieldType name="long" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0"/>
<fieldType name="plong" class="solr.LongPointField" positionIncrementGap="0"/>
<fieldType name="text_general" class="solr.TextField" positionIncrementGap="100">
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>

View File

@ -21,6 +21,7 @@ import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.util.Hash;
import org.apache.solr.core.CoreContainer;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.schema.SchemaField;
import org.junit.Test;
public class VersionInfoTest extends SolrTestCaseJ4 {
@ -28,8 +29,13 @@ public class VersionInfoTest extends SolrTestCaseJ4 {
@Test
public void testMaxIndexedVersionFromIndex() throws Exception {
initCore("solrconfig-tlog.xml", "schema-version-indexed.xml");
try {
testMaxVersionLogic(req());
try (SolrQueryRequest r = req()) {
SchemaField v = r.getCore().getUpdateHandler().getUpdateLog().getVersionInfo().getVersionField();
assertNotNull(v);
assertTrue(v.indexed());
assertFalse(v.hasDocValues());
testMaxVersionLogic(r);
} finally {
deleteCore();
}
@ -38,8 +44,13 @@ public class VersionInfoTest extends SolrTestCaseJ4 {
@Test
public void testMaxDocValuesVersionFromIndex() throws Exception {
initCore("solrconfig-tlog.xml","schema-version-dv.xml");
try {
testMaxVersionLogic(req());
try (SolrQueryRequest r = req()) {
SchemaField v = r.getCore().getUpdateHandler().getUpdateLog().getVersionInfo().getVersionField();
assertNotNull(v);
assertFalse(v.indexed());
assertTrue(v.hasDocValues());
testMaxVersionLogic(r);
} finally {
deleteCore();
}
@ -55,76 +66,107 @@ public class VersionInfoTest extends SolrTestCaseJ4 {
// index the first doc
String docId = Integer.toString(1);
BytesRef idBytes = new BytesRef(docId);
assertU(adoc("id", docId));
assertU(commit());
// max from index should not be 0 or null
Long maxVersionFromIndex = ulog.getMaxVersionFromIndex();
assertNotNull(maxVersionFromIndex);
assertTrue(maxVersionFromIndex != 0L);
// max from the ulog should not be 0 or null
Long maxVersionFromUlog = ulog.getMaxVersionFromIndex();
assertNotNull(maxVersionFromUlog);
assertTrue(maxVersionFromUlog != 0L);
// version from index should be less than or equal the version of the first doc indexed
VersionInfo vInfo = ulog.getVersionInfo();
Long version = vInfo.getVersionFromIndex(new BytesRef(docId));
try (SolrQueryRequest newReq = req()) {
// max version direct from the index should not be null, and should match what ulog reports
// (since doc is committed)
Long vInfoMax = vInfo.getMaxVersionFromIndex(newReq.getSearcher());
assertNotNull(vInfoMax);
assertEquals(maxVersionFromUlog, vInfoMax);
}
// max version from ulog (and index) should be exactly the same as our single committed doc
Long version = vInfo.getVersionFromIndex(idBytes);
assertNotNull("version info should not be null for test doc: "+docId, version);
assertTrue("max version from index should be less than or equal to the version of first doc added, diff: "+
(version - maxVersionFromIndex), maxVersionFromIndex <= version);
assertEquals(maxVersionFromUlog, version);
BytesRef idBytes = new BytesRef(docId);
int bucketHash = Hash.murmurhash3_x86_32(idBytes.bytes, idBytes.offset, idBytes.length, 0);
VersionBucket bucket = vInfo.bucket(bucketHash);
assertTrue(bucket.highest == version.longValue());
assertEquals(bucket.highest, version.longValue());
// send 2nd doc ...
// send 2nd doc ... BUT DO NOT COMMIT
docId = Integer.toString(2);
assertU(adoc("id", docId));
assertU(commit());
maxVersionFromIndex = ulog.getMaxVersionFromIndex();
assertNotNull(maxVersionFromIndex);
assertTrue(maxVersionFromIndex != 0L);
vInfo = ulog.getVersionInfo();
version = vInfo.getVersionFromIndex(new BytesRef(docId));
assertNotNull("version info should not be null for test doc: "+docId, version);
assertTrue("max version from index should be less than version of last doc added, diff: "+
(version - maxVersionFromIndex), maxVersionFromIndex < version);
idBytes = new BytesRef(docId);
bucketHash = Hash.murmurhash3_x86_32(idBytes.bytes, idBytes.offset, idBytes.length, 0);
bucket = vInfo.bucket(bucketHash);
assertTrue(bucket.highest == version.longValue());
assertU(adoc("id", docId));
try (SolrQueryRequest newReq = req()) {
// max version direct from the index should not be null, and should still match what ulog
// previously reported (since new doc is un-committed)
Long vInfoMax = vInfo.getMaxVersionFromIndex(newReq.getSearcher());
assertNotNull(vInfoMax);
assertEquals(maxVersionFromUlog, vInfoMax);
}
maxVersionFromUlog = ulog.getMaxVersionFromIndex();
assertNotNull(maxVersionFromUlog);
assertTrue("max version in ulog should have increased since our last committed doc: " +
version + " ?< " + maxVersionFromUlog,
version < maxVersionFromUlog.longValue());
version = vInfo.getVersionFromIndex(idBytes);
assertNull("version info should be null for uncommited test doc: "+docId, version);
Long versionFromTLog = ulog.lookupVersion(idBytes);
Long versionFromIndex = vInfo.getVersionFromIndex(idBytes);
assertNotNull("version from tlog should be non-null for uncommited test doc: "+docId, versionFromTLog);
// now commit that 2nd doc
assertU(commit());
try (SolrQueryRequest newReq = req()) {
// max version direct from the index should match the new doc we just committed
Long vInfoMax = vInfo.getMaxVersionFromIndex(newReq.getSearcher());
assertEquals(versionFromTLog, vInfoMax);
}
assertEquals("committing doc should not have changed version from ulog",
versionFromTLog, ulog.lookupVersion(idBytes));
Long versionFromIndex = version = vInfo.getVersionFromIndex(idBytes);
assertNotNull("version from index should be non-null for commited test doc: "+docId, versionFromIndex);
assertEquals("version from tlog and version from index should be the same",
versionFromTLog, versionFromIndex);
versionFromTLog, versionFromIndex);
bucketHash = Hash.murmurhash3_x86_32(idBytes.bytes, idBytes.offset, idBytes.length, 0);
bucket = vInfo.bucket(bucketHash);
assertEquals(bucket.highest, version.longValue());
// reload the core, which should reset the max
CoreContainer coreContainer = req.getCore().getCoreContainer();
coreContainer.reload(req.getCore().getName());
maxVersionFromIndex = ulog.getMaxVersionFromIndex();
assertEquals("max version from index should be equal to version of last doc added after reload",
maxVersionFromIndex, version);
maxVersionFromUlog = ulog.getMaxVersionFromIndex();
assertEquals("after reload, max version from ulog should be equal to version of last doc added",
maxVersionFromUlog, versionFromIndex);
// one more doc after reload
docId = Integer.toString(3);
idBytes = new BytesRef(docId);
assertU(adoc("id", docId));
assertU(commit());
maxVersionFromIndex = ulog.getMaxVersionFromIndex();
assertNotNull(maxVersionFromIndex);
assertTrue(maxVersionFromIndex != 0L);
maxVersionFromUlog = ulog.getMaxVersionFromIndex();
assertNotNull(maxVersionFromUlog);
assertTrue(maxVersionFromUlog != 0L);
vInfo = ulog.getVersionInfo();
version = vInfo.getVersionFromIndex(new BytesRef(docId));
try (SolrQueryRequest newReq = req()) {
// max version direct from the index should not be null, and should match what ulog reports
// (since doc is committed)
Long vInfoMax = vInfo.getMaxVersionFromIndex(newReq.getSearcher());
assertNotNull(vInfoMax);
assertEquals(maxVersionFromUlog, vInfoMax);
}
version = vInfo.getVersionFromIndex(idBytes);
assertNotNull("version info should not be null for test doc: "+docId, version);
assertTrue("max version from index should be less than version of last doc added, diff: "+
(version - maxVersionFromIndex), maxVersionFromIndex < version);
assertEquals(maxVersionFromUlog, version);
idBytes = new BytesRef(docId);
bucketHash = Hash.murmurhash3_x86_32(idBytes.bytes, idBytes.offset, idBytes.length, 0);
bucket = vInfo.bucket(bucketHash);
assertTrue(bucket.highest == version.longValue());
assertEquals(bucket.highest, version.longValue());
}
}