SOLR-12368: inplace update for field that doesn't yet exist in any doc

If the field is non-stored, non-indexed and docvalue enabled numeric field
then inplace update can be done. previously, lucene didn't support
docvalue update for field that is not yet present in indexWriter but
LUCENE-8316 added support for this.
This adds support to update field which satisfies inplace conditions
but which doesn't yet exist in any docs
This commit is contained in:
Munendra S N 2019-07-17 21:25:57 +05:30
parent 96dc45b649
commit 1ecd02deb5
5 changed files with 127 additions and 22 deletions

View File

@ -75,8 +75,11 @@ Velocity 2.0 and Velocity Tools 3.0
Apache ZooKeeper 3.5.5 Apache ZooKeeper 3.5.5
Jetty 9.4.19.v20190610 Jetty 9.4.19.v20190610
Improvements
----------------------
(No Changes) * SOLR-12368: Support InPlace DV updates for a field that does not yet exist in any documents
(hossman, Simon Willnauer, Adrien Grand, Munendra S N)
================== 8.2.0 ================== ================== 8.2.0 ==================

View File

@ -236,19 +236,14 @@ public class AtomicUpdateDocumentMerger {
// third pass: requiring checks against the actual IndexWriter due to internal DV update limitations // third pass: requiring checks against the actual IndexWriter due to internal DV update limitations
SolrCore core = cmd.getReq().getCore(); SolrCore core = cmd.getReq().getCore();
RefCounted<IndexWriter> holder = core.getSolrCoreState().getIndexWriter(core); RefCounted<IndexWriter> holder = core.getSolrCoreState().getIndexWriter(core);
Set<String> fieldNamesFromIndexWriter = null;
Set<String> segmentSortingFields = null; Set<String> segmentSortingFields = null;
try { try {
IndexWriter iw = holder.get(); IndexWriter iw = holder.get();
fieldNamesFromIndexWriter = iw.getFieldNames(); // This shouldn't be needed once LUCENE-7659 is resolved
segmentSortingFields = iw.getConfig().getIndexSortFields(); segmentSortingFields = iw.getConfig().getIndexSortFields();
} finally { } finally {
holder.decref(); holder.decref();
} }
for (String fieldName: candidateFields) { for (String fieldName: candidateFields) {
if (! fieldNamesFromIndexWriter.contains(fieldName) ) {
return Collections.emptySet(); // if this field doesn't exist, DV update can't work
}
if (segmentSortingFields.contains(fieldName) ) { if (segmentSortingFields.contains(fieldName) ) {
return Collections.emptySet(); // if this is used for segment sorting, DV updates can't work return Collections.emptySet(); // if this is used for segment sorting, DV updates can't work
} }

View File

@ -19,6 +19,7 @@
<uniqueKey>id</uniqueKey> <uniqueKey>id</uniqueKey>
<field name="id" type="string" indexed="true" stored="true" docValues="true"/> <field name="id" type="string" indexed="true" stored="true" docValues="true"/>
<field name="_root_" type="string" indexed="true" stored="true" docValues="true"/>
<field name="_version_" type="long" indexed="false" stored="false" docValues="true" /> <field name="_version_" type="long" indexed="false" stored="false" docValues="true" />
<field name="shardName" type="string" multiValued="false" indexed="false" required="false" stored="true"/> <field name="shardName" type="string" multiValued="false" indexed="false" required="false" stored="true"/>

View File

@ -392,7 +392,7 @@ public class TestInPlaceUpdatesDistrib extends AbstractFullDistribZkTestBase {
buildRandomIndex(101.0F, ids); buildRandomIndex(101.0F, ids);
List<Integer> luceneDocids = new ArrayList<>(numDocs); List<Integer> luceneDocids = new ArrayList<>(numDocs);
List<Float> valuesList = new ArrayList<Float>(numDocs); List<Number> valuesList = new ArrayList<>(numDocs);
SolrParams params = params("q", "id:[0 TO *]", "fl", "*,[docid]", "rows", String.valueOf(numDocs), "sort", "id_i asc"); SolrParams params = params("q", "id:[0 TO *]", "fl", "*,[docid]", "rows", String.valueOf(numDocs), "sort", "id_i asc");
SolrDocumentList results = LEADER.query(params).getResults(); SolrDocumentList results = LEADER.query(params).getResults();
assertEquals(numDocs, results.size()); assertEquals(numDocs, results.size());
@ -403,7 +403,7 @@ public class TestInPlaceUpdatesDistrib extends AbstractFullDistribZkTestBase {
log.info("Initial results: "+results); log.info("Initial results: "+results);
// before we do any atomic operations, sanity check our results against all clients // before we do any atomic operations, sanity check our results against all clients
assertDocIdsAndValuesAgainstAllClients("sanitycheck", params, luceneDocids, valuesList); assertDocIdsAndValuesAgainstAllClients("sanitycheck", params, luceneDocids, "inplace_updatable_float", valuesList);
// now we're going to overwrite the value for all of our testing docs // now we're going to overwrite the value for all of our testing docs
// giving them a value between -5 and +5 // giving them a value between -5 and +5
@ -430,7 +430,7 @@ public class TestInPlaceUpdatesDistrib extends AbstractFullDistribZkTestBase {
"fq", "id:[0 TO *]"), "fq", "id:[0 TO *]"),
// existing sort & fl that we want... // existing sort & fl that we want...
params), params),
luceneDocids, valuesList); luceneDocids, "inplace_updatable_float", valuesList);
// update doc, w/increment // update doc, w/increment
log.info("Updating the documents..."); log.info("Updating the documents...");
@ -441,7 +441,7 @@ public class TestInPlaceUpdatesDistrib extends AbstractFullDistribZkTestBase {
// 0 test docs matching the query inplace_updatable_float:[-10 TO 10] // 0 test docs matching the query inplace_updatable_float:[-10 TO 10]
final float inc = (r.nextBoolean() ? -1.0F : 1.0F) * (r.nextFloat() + (float)atLeast(20)); final float inc = (r.nextBoolean() ? -1.0F : 1.0F) * (r.nextFloat() + (float)atLeast(20));
assert 20 < Math.abs(inc); assert 20 < Math.abs(inc);
final float value = valuesList.get(id) + inc; final float value = (float)valuesList.get(id) + inc;
assert value < -10 || 10 < value; assert value < -10 || 10 < value;
valuesList.set(id, value); valuesList.set(id, value);
@ -454,7 +454,22 @@ public class TestInPlaceUpdatesDistrib extends AbstractFullDistribZkTestBase {
"fq", "id:[0 TO *]"), "fq", "id:[0 TO *]"),
// existing sort & fl that we want... // existing sort & fl that we want...
params), params),
luceneDocids, valuesList); luceneDocids, "inplace_updatable_float", valuesList);
log.info("Updating the documents with new field...");
Collections.shuffle(ids, r);
for (int id : ids) {
final int val = random().nextInt(20);
valuesList.set(id, val);
index("id", id, "inplace_updatable_int", map((random().nextBoolean()?"inc": "set"), val));
}
commit();
assertDocIdsAndValuesAgainstAllClients
("inplace_for_first_field_update", SolrParams.wrapDefaults(params("q", "inplace_updatable_int:[* TO *]",
"fq", "id:[0 TO *]"),
params),
luceneDocids, "inplace_updatable_int", valuesList);
log.info("docValuesUpdateTest: This test passed fine..."); log.info("docValuesUpdateTest: This test passed fine...");
} }
@ -534,12 +549,14 @@ public class TestInPlaceUpdatesDistrib extends AbstractFullDistribZkTestBase {
* @param debug used in log and assertion messages * @param debug used in log and assertion messages
* @param req the query to execut, should include rows &amp; sort params such that the results can be compared to luceneDocids and valuesList * @param req the query to execut, should include rows &amp; sort params such that the results can be compared to luceneDocids and valuesList
* @param luceneDocids a list of "[docid]" values to be tested against each doc in the req results (in order) * @param luceneDocids a list of "[docid]" values to be tested against each doc in the req results (in order)
* @param valuesList a list of "inplace_updatable_float" values to be tested against each doc in the req results (in order) * @param fieldName used to get value from the doc to validate with valuesList
* @param valuesList a list of given fieldName values to be tested against each doc in results (in order)
*/ */
private void assertDocIdsAndValuesAgainstAllClients(final String debug, private void assertDocIdsAndValuesAgainstAllClients(final String debug,
final SolrParams req, final SolrParams req,
final List<Integer> luceneDocids, final List<Integer> luceneDocids,
final List<Float> valuesList) throws Exception { final String fieldName,
final List<Number> valuesList) throws Exception {
assert luceneDocids.size() == valuesList.size(); assert luceneDocids.size() == valuesList.size();
final long numFoundExpected = luceneDocids.size(); final long numFoundExpected = luceneDocids.size();
@ -564,7 +581,7 @@ public class TestInPlaceUpdatesDistrib extends AbstractFullDistribZkTestBase {
Thread.sleep(WAIT_TIME); Thread.sleep(WAIT_TIME);
} }
assertDocIdsAndValuesInResults(msg, results, luceneDocids, valuesList); assertDocIdsAndValuesInResults(msg, results, luceneDocids, fieldName, valuesList);
} }
} }
@ -575,12 +592,14 @@ public class TestInPlaceUpdatesDistrib extends AbstractFullDistribZkTestBase {
* @param msgPre used as a prefix for assertion messages * @param msgPre used as a prefix for assertion messages
* @param results the sorted results of some query, such that all matches are included (ie: rows = numFound) * @param results the sorted results of some query, such that all matches are included (ie: rows = numFound)
* @param luceneDocids a list of "[docid]" values to be tested against each doc in results (in order) * @param luceneDocids a list of "[docid]" values to be tested against each doc in results (in order)
* @param valuesList a list of "inplace_updatable_float" values to be tested against each doc in results (in order) * @param fieldName used to get value from the doc to validate with valuesList
* @param valuesList a list of given fieldName values to be tested against each doc in results (in order)
*/ */
private void assertDocIdsAndValuesInResults(final String msgPre, private void assertDocIdsAndValuesInResults(final String msgPre,
final SolrDocumentList results, final SolrDocumentList results,
final List<Integer> luceneDocids, final List<Integer> luceneDocids,
final List<Float> valuesList) { final String fieldName,
final List<Number> valuesList) {
assert luceneDocids.size() == valuesList.size(); assert luceneDocids.size() == valuesList.size();
assertEquals(msgPre + ": rows param wasn't big enough, we need to compare all results matching the query", assertEquals(msgPre + ": rows param wasn't big enough, we need to compare all results matching the query",
@ -590,7 +609,7 @@ public class TestInPlaceUpdatesDistrib extends AbstractFullDistribZkTestBase {
for (SolrDocument doc : results) { for (SolrDocument doc : results) {
final int id = Integer.parseInt(doc.get("id").toString()); final int id = Integer.parseInt(doc.get("id").toString());
final Object val = doc.get("inplace_updatable_float"); final Object val = doc.get(fieldName);
final Object docid = doc.get("[docid]"); final Object docid = doc.get("[docid]");
assertEquals(msgPre + " wrong val for " + doc.toString(), valuesList.get(id), val); assertEquals(msgPre + " wrong val for " + doc.toString(), valuesList.get(id), val);
assertEquals(msgPre + " wrong [docid] for " + doc.toString(), luceneDocids.get(id), docid); assertEquals(msgPre + " wrong [docid] for " + doc.toString(), luceneDocids.get(id), docid);

View File

@ -289,6 +289,93 @@ public class TestInPlaceUpdatesStandalone extends SolrTestCaseJ4 {
"//result/doc[2]/float[@name='inplace_updatable_float'][.='102.0']"); "//result/doc[2]/float[@name='inplace_updatable_float'][.='102.0']");
} }
@Test
public void testUpdatingFieldNotPresentInDoc() throws Exception {
long version1 = addAndGetVersion(sdoc("id", "1", "title_s", "first"), null);
long version2 = addAndGetVersion(sdoc("id", "2", "title_s", "second"), null);
long version3 = addAndGetVersion(sdoc("id", "3", "title_s", "third"), null);
assertU(commit("softCommit", "false"));
assertQ(req("q", "*:*"), "//*[@numFound='3']");
// subsequent updates shouldn't cause docid changes
int docid1 = getDocId("1");
int docid2 = getDocId("2");
int docid3 = getDocId("3");
// updating fields which are not present in the document
// tests both set and inc with different fields
version1 = addAndAssertVersion(version1, "id", "1", "inplace_updatable_float", map("set", 200));
version2 = addAndAssertVersion(version2, "id", "2", "inplace_updatable_float", map("inc", 100));
version3 = addAndAssertVersion(version3, "id", "3", "inplace_updatable_float", map("set", 300));
version1 = addAndAssertVersion(version1, "id", "1", "inplace_updatable_int", map("set", 300));
assertU(commit("softCommit", "false"));
assertQ(req("q", "*:*", "sort", "id asc", "fl", "*,[docid]"),
"//*[@numFound='3']",
"//result/doc[1]/float[@name='inplace_updatable_float'][.='200.0']",
"//result/doc[1]/int[@name='inplace_updatable_int'][.='300']",
"//result/doc[2]/float[@name='inplace_updatable_float'][.='100.0']",
"//result/doc[3]/float[@name='inplace_updatable_float'][.='300.0']",
"//result/doc[1]/long[@name='_version_'][.='"+version1+"']",
"//result/doc[2]/long[@name='_version_'][.='"+version2+"']",
"//result/doc[3]/long[@name='_version_'][.='"+version3+"']",
"//result/doc[1]/int[@name='[docid]'][.='"+docid1+"']",
"//result/doc[2]/int[@name='[docid]'][.='"+docid2+"']",
"//result/doc[3]/int[@name='[docid]'][.='"+docid3+"']"
);
// adding new field which is not present in any docs but matches dynamic field rule
// and satisfies inplace condition should be treated as inplace update
version1 = addAndAssertVersion(version1, "id", "1", "inplace_updatable_i_dvo", map("set", 200));
assertU(commit("softCommit", "false"));
assertQ(req("q", "id:1", "sort", "id asc", "fl", "*,[docid]"),
"//*[@numFound='1']",
"//result/doc[1]/float[@name='inplace_updatable_float'][.='200.0']",
"//result/doc[1]/int[@name='inplace_updatable_int'][.='300']",
"//result/doc[1]/int[@name='[docid]'][.='"+docid1+"']",
"//result/doc[1]/int[@name='inplace_updatable_i_dvo'][.='200']"
);
// delete everything
deleteAllAndCommit();
// test for document with child documents
SolrInputDocument doc = new SolrInputDocument();
doc.setField("id", "1");
doc.setField("title_s", "parent");
SolrInputDocument child1 = new SolrInputDocument();
child1.setField("id", "1_1");
child1.setField("title_s", "child1");
SolrInputDocument child2 = new SolrInputDocument();
child2.setField("id", "1_2");
child2.setField("title_s", "child2");
doc.addChildDocument(child1);
doc.addChildDocument(child2);
long version = addAndGetVersion(doc, null);
assertU(commit("softCommit", "false"));
assertQ(req("q", "*:*"), "//*[@numFound='3']");
int parentDocId = getDocId("1");
int childDocid1 = getDocId("1_1");
int childDocid2 = getDocId("1_2");
version = addAndAssertVersion(version, "id", "1", "inplace_updatable_float", map("set", 200));
version = addAndAssertVersion(version, "id", "1", "inplace_updatable_int", map("inc", 300));
assertU(commit("softCommit", "false"));
// first child docs would be returned followed by parent doc
assertQ(req("q", "*:*", "fl", "*,[docid]"),
"//*[@numFound='3']",
"//result/doc[3]/float[@name='inplace_updatable_float'][.='200.0']",
"//result/doc[3]/int[@name='inplace_updatable_int'][.='300']",
"//result/doc[3]/int[@name='[docid]'][.='"+parentDocId+"']",
"//result/doc[1]/int[@name='[docid]'][.='"+childDocid1+"']",
"//result/doc[2]/int[@name='[docid]'][.='"+childDocid2+"']"
);
}
@Test @Test
public void testUpdateTwoDifferentFields() throws Exception { public void testUpdateTwoDifferentFields() throws Exception {
long version1 = addAndGetVersion(sdoc("id", "1", "title_s", "first", "inplace_updatable_float", 42), null); long version1 = addAndGetVersion(sdoc("id", "1", "title_s", "first", "inplace_updatable_float", 42), null);
@ -423,7 +510,7 @@ public class TestInPlaceUpdatesStandalone extends SolrTestCaseJ4 {
* Helper method to search for the specified (uniqueKey field) id using <code>fl=[docid]</code> * Helper method to search for the specified (uniqueKey field) id using <code>fl=[docid]</code>
* and return the internal lucene docid. * and return the internal lucene docid.
*/ */
private int getDocId(String id) throws NumberFormatException, Exception { private int getDocId(String id) throws Exception {
SolrDocumentList results = client.query(params("q","id:" + id, "fl", "[docid]")).getResults(); SolrDocumentList results = client.query(params("q","id:" + id, "fl", "[docid]")).getResults();
assertEquals(1, results.getNumFound()); assertEquals(1, results.getNumFound());
assertEquals(1, results.size()); assertEquals(1, results.size());
@ -985,10 +1072,10 @@ public class TestInPlaceUpdatesStandalone extends SolrTestCaseJ4 {
"inplace_updatable_int_with_default"); "inplace_updatable_int_with_default");
Collections.shuffle(fieldsToCheck, random()); // ... and regardless of order checked Collections.shuffle(fieldsToCheck, random()); // ... and regardless of order checked
for (String field : fieldsToCheck) { for (String field : fieldsToCheck) {
// In-place updatable field updated before it exists SHOULD NOT BE in-place updated: // In-place updatable field updated before it exists SHOULD NOW BE in-place updated (since LUCENE-8316):
inPlaceUpdatedFields = callComputeInPlaceUpdatableFields(sdoc("id", "1", "_version_", 42L, inPlaceUpdatedFields = callComputeInPlaceUpdatableFields(sdoc("id", "1", "_version_", 42L,
field, map("set", 10))); field, map("set", 10)));
assertFalse(field, inPlaceUpdatedFields.contains(field)); assertTrue(field, inPlaceUpdatedFields.contains(field));
// In-place updatable field updated after it exists SHOULD BE in-place updated: // In-place updatable field updated after it exists SHOULD BE in-place updated:
addAndGetVersion(sdoc("id", "1", field, "0"), params()); // setting up the dv addAndGetVersion(sdoc("id", "1", field, "0"), params()); // setting up the dv
@ -1023,7 +1110,7 @@ public class TestInPlaceUpdatesStandalone extends SolrTestCaseJ4 {
callComputeInPlaceUpdatableFields(sdoc("id", "1", "_version_", 42L, callComputeInPlaceUpdatableFields(sdoc("id", "1", "_version_", 42L,
"inplace_updatable_int_with_default", "100")).isEmpty()); "inplace_updatable_int_with_default", "100")).isEmpty());
assertTrue("non existent dynamic dv field updated first time", assertFalse("non existent dynamic dv field updated first time",
callComputeInPlaceUpdatableFields(sdoc("id", "1", "_version_", 42L, callComputeInPlaceUpdatableFields(sdoc("id", "1", "_version_", 42L,
"new_updatable_int_i_dvo", map("set", 10))).isEmpty()); "new_updatable_int_i_dvo", map("set", 10))).isEmpty());
@ -1036,7 +1123,7 @@ public class TestInPlaceUpdatesStandalone extends SolrTestCaseJ4 {
"new_updatable_int_i_dvo", map("set", 10))); "new_updatable_int_i_dvo", map("set", 10)));
assertTrue(inPlaceUpdatedFields.contains("new_updatable_int_i_dvo")); assertTrue(inPlaceUpdatedFields.contains("new_updatable_int_i_dvo"));
// for copy fields, regardless of wether the source & target support inplace updates, // for copy fields, regardless of whether the source & target support inplace updates,
// it won't be inplace if the DVs don't exist yet... // it won't be inplace if the DVs don't exist yet...
assertTrue("inplace fields should be empty when doc has no copyfield src values yet", assertTrue("inplace fields should be empty when doc has no copyfield src values yet",
callComputeInPlaceUpdatableFields(sdoc("id", "1", "_version_", 42L, callComputeInPlaceUpdatableFields(sdoc("id", "1", "_version_", 42L,