SOLR-12368: inplace update for field that doesn't yet exist in any doc

If the field is non-stored, non-indexed and docvalue enabled numeric field
then inplace update can be done. previously, lucene didn't support
docvalue update for field that is not yet present in indexWriter but
LUCENE-8316 added support for this.
This adds support to update field which satisfies inplace conditions
but which doesn't yet exist in any docs
This commit is contained in:
Munendra S N 2019-07-17 21:25:57 +05:30
parent 96dc45b649
commit 1ecd02deb5
5 changed files with 127 additions and 22 deletions

View File

@ -75,8 +75,11 @@ Velocity 2.0 and Velocity Tools 3.0
Apache ZooKeeper 3.5.5
Jetty 9.4.19.v20190610
Improvements
----------------------
(No Changes)
* SOLR-12368: Support InPlace DV updates for a field that does not yet exist in any documents
(hossman, Simon Willnauer, Adrien Grand, Munendra S N)
================== 8.2.0 ==================

View File

@ -236,19 +236,14 @@ public class AtomicUpdateDocumentMerger {
// third pass: requiring checks against the actual IndexWriter due to internal DV update limitations
SolrCore core = cmd.getReq().getCore();
RefCounted<IndexWriter> holder = core.getSolrCoreState().getIndexWriter(core);
Set<String> fieldNamesFromIndexWriter = null;
Set<String> segmentSortingFields = null;
try {
IndexWriter iw = holder.get();
fieldNamesFromIndexWriter = iw.getFieldNames(); // This shouldn't be needed once LUCENE-7659 is resolved
segmentSortingFields = iw.getConfig().getIndexSortFields();
} finally {
holder.decref();
}
for (String fieldName: candidateFields) {
if (! fieldNamesFromIndexWriter.contains(fieldName) ) {
return Collections.emptySet(); // if this field doesn't exist, DV update can't work
}
if (segmentSortingFields.contains(fieldName) ) {
return Collections.emptySet(); // if this is used for segment sorting, DV updates can't work
}

View File

@ -19,6 +19,7 @@
<uniqueKey>id</uniqueKey>
<field name="id" type="string" indexed="true" stored="true" docValues="true"/>
<field name="_root_" type="string" indexed="true" stored="true" docValues="true"/>
<field name="_version_" type="long" indexed="false" stored="false" docValues="true" />
<field name="shardName" type="string" multiValued="false" indexed="false" required="false" stored="true"/>

View File

@ -392,7 +392,7 @@ public class TestInPlaceUpdatesDistrib extends AbstractFullDistribZkTestBase {
buildRandomIndex(101.0F, ids);
List<Integer> luceneDocids = new ArrayList<>(numDocs);
List<Float> valuesList = new ArrayList<Float>(numDocs);
List<Number> valuesList = new ArrayList<>(numDocs);
SolrParams params = params("q", "id:[0 TO *]", "fl", "*,[docid]", "rows", String.valueOf(numDocs), "sort", "id_i asc");
SolrDocumentList results = LEADER.query(params).getResults();
assertEquals(numDocs, results.size());
@ -403,7 +403,7 @@ public class TestInPlaceUpdatesDistrib extends AbstractFullDistribZkTestBase {
log.info("Initial results: "+results);
// before we do any atomic operations, sanity check our results against all clients
assertDocIdsAndValuesAgainstAllClients("sanitycheck", params, luceneDocids, valuesList);
assertDocIdsAndValuesAgainstAllClients("sanitycheck", params, luceneDocids, "inplace_updatable_float", valuesList);
// now we're going to overwrite the value for all of our testing docs
// giving them a value between -5 and +5
@ -430,7 +430,7 @@ public class TestInPlaceUpdatesDistrib extends AbstractFullDistribZkTestBase {
"fq", "id:[0 TO *]"),
// existing sort & fl that we want...
params),
luceneDocids, valuesList);
luceneDocids, "inplace_updatable_float", valuesList);
// update doc, w/increment
log.info("Updating the documents...");
@ -441,7 +441,7 @@ public class TestInPlaceUpdatesDistrib extends AbstractFullDistribZkTestBase {
// 0 test docs matching the query inplace_updatable_float:[-10 TO 10]
final float inc = (r.nextBoolean() ? -1.0F : 1.0F) * (r.nextFloat() + (float)atLeast(20));
assert 20 < Math.abs(inc);
final float value = valuesList.get(id) + inc;
final float value = (float)valuesList.get(id) + inc;
assert value < -10 || 10 < value;
valuesList.set(id, value);
@ -454,7 +454,22 @@ public class TestInPlaceUpdatesDistrib extends AbstractFullDistribZkTestBase {
"fq", "id:[0 TO *]"),
// existing sort & fl that we want...
params),
luceneDocids, valuesList);
luceneDocids, "inplace_updatable_float", valuesList);
log.info("Updating the documents with new field...");
Collections.shuffle(ids, r);
for (int id : ids) {
final int val = random().nextInt(20);
valuesList.set(id, val);
index("id", id, "inplace_updatable_int", map((random().nextBoolean()?"inc": "set"), val));
}
commit();
assertDocIdsAndValuesAgainstAllClients
("inplace_for_first_field_update", SolrParams.wrapDefaults(params("q", "inplace_updatable_int:[* TO *]",
"fq", "id:[0 TO *]"),
params),
luceneDocids, "inplace_updatable_int", valuesList);
log.info("docValuesUpdateTest: This test passed fine...");
}
@ -534,12 +549,14 @@ public class TestInPlaceUpdatesDistrib extends AbstractFullDistribZkTestBase {
* @param debug used in log and assertion messages
* @param req the query to execut, should include rows &amp; sort params such that the results can be compared to luceneDocids and valuesList
* @param luceneDocids a list of "[docid]" values to be tested against each doc in the req results (in order)
* @param valuesList a list of "inplace_updatable_float" values to be tested against each doc in the req results (in order)
* @param fieldName used to get value from the doc to validate with valuesList
* @param valuesList a list of given fieldName values to be tested against each doc in results (in order)
*/
private void assertDocIdsAndValuesAgainstAllClients(final String debug,
final SolrParams req,
final List<Integer> luceneDocids,
final List<Float> valuesList) throws Exception {
final String fieldName,
final List<Number> valuesList) throws Exception {
assert luceneDocids.size() == valuesList.size();
final long numFoundExpected = luceneDocids.size();
@ -564,7 +581,7 @@ public class TestInPlaceUpdatesDistrib extends AbstractFullDistribZkTestBase {
Thread.sleep(WAIT_TIME);
}
assertDocIdsAndValuesInResults(msg, results, luceneDocids, valuesList);
assertDocIdsAndValuesInResults(msg, results, luceneDocids, fieldName, valuesList);
}
}
@ -575,12 +592,14 @@ public class TestInPlaceUpdatesDistrib extends AbstractFullDistribZkTestBase {
* @param msgPre used as a prefix for assertion messages
* @param results the sorted results of some query, such that all matches are included (ie: rows = numFound)
* @param luceneDocids a list of "[docid]" values to be tested against each doc in results (in order)
* @param valuesList a list of "inplace_updatable_float" values to be tested against each doc in results (in order)
* @param fieldName used to get value from the doc to validate with valuesList
* @param valuesList a list of given fieldName values to be tested against each doc in results (in order)
*/
private void assertDocIdsAndValuesInResults(final String msgPre,
final SolrDocumentList results,
final List<Integer> luceneDocids,
final List<Float> valuesList) {
final String fieldName,
final List<Number> valuesList) {
assert luceneDocids.size() == valuesList.size();
assertEquals(msgPre + ": rows param wasn't big enough, we need to compare all results matching the query",
@ -590,7 +609,7 @@ public class TestInPlaceUpdatesDistrib extends AbstractFullDistribZkTestBase {
for (SolrDocument doc : results) {
final int id = Integer.parseInt(doc.get("id").toString());
final Object val = doc.get("inplace_updatable_float");
final Object val = doc.get(fieldName);
final Object docid = doc.get("[docid]");
assertEquals(msgPre + " wrong val for " + doc.toString(), valuesList.get(id), val);
assertEquals(msgPre + " wrong [docid] for " + doc.toString(), luceneDocids.get(id), docid);

View File

@ -289,6 +289,93 @@ public class TestInPlaceUpdatesStandalone extends SolrTestCaseJ4 {
"//result/doc[2]/float[@name='inplace_updatable_float'][.='102.0']");
}
@Test
public void testUpdatingFieldNotPresentInDoc() throws Exception {
long version1 = addAndGetVersion(sdoc("id", "1", "title_s", "first"), null);
long version2 = addAndGetVersion(sdoc("id", "2", "title_s", "second"), null);
long version3 = addAndGetVersion(sdoc("id", "3", "title_s", "third"), null);
assertU(commit("softCommit", "false"));
assertQ(req("q", "*:*"), "//*[@numFound='3']");
// subsequent updates shouldn't cause docid changes
int docid1 = getDocId("1");
int docid2 = getDocId("2");
int docid3 = getDocId("3");
// updating fields which are not present in the document
// tests both set and inc with different fields
version1 = addAndAssertVersion(version1, "id", "1", "inplace_updatable_float", map("set", 200));
version2 = addAndAssertVersion(version2, "id", "2", "inplace_updatable_float", map("inc", 100));
version3 = addAndAssertVersion(version3, "id", "3", "inplace_updatable_float", map("set", 300));
version1 = addAndAssertVersion(version1, "id", "1", "inplace_updatable_int", map("set", 300));
assertU(commit("softCommit", "false"));
assertQ(req("q", "*:*", "sort", "id asc", "fl", "*,[docid]"),
"//*[@numFound='3']",
"//result/doc[1]/float[@name='inplace_updatable_float'][.='200.0']",
"//result/doc[1]/int[@name='inplace_updatable_int'][.='300']",
"//result/doc[2]/float[@name='inplace_updatable_float'][.='100.0']",
"//result/doc[3]/float[@name='inplace_updatable_float'][.='300.0']",
"//result/doc[1]/long[@name='_version_'][.='"+version1+"']",
"//result/doc[2]/long[@name='_version_'][.='"+version2+"']",
"//result/doc[3]/long[@name='_version_'][.='"+version3+"']",
"//result/doc[1]/int[@name='[docid]'][.='"+docid1+"']",
"//result/doc[2]/int[@name='[docid]'][.='"+docid2+"']",
"//result/doc[3]/int[@name='[docid]'][.='"+docid3+"']"
);
// adding new field which is not present in any docs but matches dynamic field rule
// and satisfies inplace condition should be treated as inplace update
version1 = addAndAssertVersion(version1, "id", "1", "inplace_updatable_i_dvo", map("set", 200));
assertU(commit("softCommit", "false"));
assertQ(req("q", "id:1", "sort", "id asc", "fl", "*,[docid]"),
"//*[@numFound='1']",
"//result/doc[1]/float[@name='inplace_updatable_float'][.='200.0']",
"//result/doc[1]/int[@name='inplace_updatable_int'][.='300']",
"//result/doc[1]/int[@name='[docid]'][.='"+docid1+"']",
"//result/doc[1]/int[@name='inplace_updatable_i_dvo'][.='200']"
);
// delete everything
deleteAllAndCommit();
// test for document with child documents
SolrInputDocument doc = new SolrInputDocument();
doc.setField("id", "1");
doc.setField("title_s", "parent");
SolrInputDocument child1 = new SolrInputDocument();
child1.setField("id", "1_1");
child1.setField("title_s", "child1");
SolrInputDocument child2 = new SolrInputDocument();
child2.setField("id", "1_2");
child2.setField("title_s", "child2");
doc.addChildDocument(child1);
doc.addChildDocument(child2);
long version = addAndGetVersion(doc, null);
assertU(commit("softCommit", "false"));
assertQ(req("q", "*:*"), "//*[@numFound='3']");
int parentDocId = getDocId("1");
int childDocid1 = getDocId("1_1");
int childDocid2 = getDocId("1_2");
version = addAndAssertVersion(version, "id", "1", "inplace_updatable_float", map("set", 200));
version = addAndAssertVersion(version, "id", "1", "inplace_updatable_int", map("inc", 300));
assertU(commit("softCommit", "false"));
// first child docs would be returned followed by parent doc
assertQ(req("q", "*:*", "fl", "*,[docid]"),
"//*[@numFound='3']",
"//result/doc[3]/float[@name='inplace_updatable_float'][.='200.0']",
"//result/doc[3]/int[@name='inplace_updatable_int'][.='300']",
"//result/doc[3]/int[@name='[docid]'][.='"+parentDocId+"']",
"//result/doc[1]/int[@name='[docid]'][.='"+childDocid1+"']",
"//result/doc[2]/int[@name='[docid]'][.='"+childDocid2+"']"
);
}
@Test
public void testUpdateTwoDifferentFields() throws Exception {
long version1 = addAndGetVersion(sdoc("id", "1", "title_s", "first", "inplace_updatable_float", 42), null);
@ -423,7 +510,7 @@ public class TestInPlaceUpdatesStandalone extends SolrTestCaseJ4 {
* Helper method to search for the specified (uniqueKey field) id using <code>fl=[docid]</code>
* and return the internal lucene docid.
*/
private int getDocId(String id) throws NumberFormatException, Exception {
private int getDocId(String id) throws Exception {
SolrDocumentList results = client.query(params("q","id:" + id, "fl", "[docid]")).getResults();
assertEquals(1, results.getNumFound());
assertEquals(1, results.size());
@ -985,10 +1072,10 @@ public class TestInPlaceUpdatesStandalone extends SolrTestCaseJ4 {
"inplace_updatable_int_with_default");
Collections.shuffle(fieldsToCheck, random()); // ... and regardless of order checked
for (String field : fieldsToCheck) {
// In-place updatable field updated before it exists SHOULD NOT BE in-place updated:
// In-place updatable field updated before it exists SHOULD NOW BE in-place updated (since LUCENE-8316):
inPlaceUpdatedFields = callComputeInPlaceUpdatableFields(sdoc("id", "1", "_version_", 42L,
field, map("set", 10)));
assertFalse(field, inPlaceUpdatedFields.contains(field));
assertTrue(field, inPlaceUpdatedFields.contains(field));
// In-place updatable field updated after it exists SHOULD BE in-place updated:
addAndGetVersion(sdoc("id", "1", field, "0"), params()); // setting up the dv
@ -1023,7 +1110,7 @@ public class TestInPlaceUpdatesStandalone extends SolrTestCaseJ4 {
callComputeInPlaceUpdatableFields(sdoc("id", "1", "_version_", 42L,
"inplace_updatable_int_with_default", "100")).isEmpty());
assertTrue("non existent dynamic dv field updated first time",
assertFalse("non existent dynamic dv field updated first time",
callComputeInPlaceUpdatableFields(sdoc("id", "1", "_version_", 42L,
"new_updatable_int_i_dvo", map("set", 10))).isEmpty());
@ -1036,7 +1123,7 @@ public class TestInPlaceUpdatesStandalone extends SolrTestCaseJ4 {
"new_updatable_int_i_dvo", map("set", 10)));
assertTrue(inPlaceUpdatedFields.contains("new_updatable_int_i_dvo"));
// for copy fields, regardless of wether the source & target support inplace updates,
// for copy fields, regardless of whether the source & target support inplace updates,
// it won't be inplace if the DVs don't exist yet...
assertTrue("inplace fields should be empty when doc has no copyfield src values yet",
callComputeInPlaceUpdatableFields(sdoc("id", "1", "_version_", 42L,