SOLR-11231: Guard against unset fields when performing language detection. This closes #232

This commit is contained in:
Steve Rowe 2017-11-13 17:57:24 -05:00
parent 3045349140
commit c3513e9281
3 changed files with 29 additions and 5 deletions

View File

@ -105,6 +105,9 @@ Bug Fixes
* SOLR-11619: V2 requests that needed to be forwarded to other nodes would get an NPE. (David Smiley)
* SOLR-11231: Guard against unset fields when performing language detection.
(Chris Beer via Steve Rowe)
Optimizations
----------------------
* SOLR-11285: Refactor autoscaling framework to avoid direct references to Zookeeper and Solr

View File

@ -112,13 +112,21 @@ public class TikaLanguageIdentifierUpdateProcessor extends LanguageIdentifierUpd
private int getExpectedSize(SolrInputDocument doc, String[] fields) {
int docSize = 0;
for (String field : fields) {
if (doc.containsKey(field)) {
Collection<Object> contents = doc.getFieldValues(field);
if (contents != null) {
for (Object content : contents) {
if (content instanceof String) {
docSize += Math.min(((String) content).length(), maxFieldValueChars);
}
}
docSize = Math.min(docSize, maxTotalChars);
if (docSize > maxTotalChars) {
docSize = maxTotalChars;
break;
}
}
}
}
return docSize;
}

View File

@ -211,6 +211,19 @@ public abstract class LanguageIdentifierUpdateProcessorFactoryTestCase extends S
assertEquals("", liProcessor.process(doc).getFieldValue("language"));
}
@Test
public void testMissingFieldEmptyString() throws Exception {
SolrInputDocument doc;
ModifiableSolrParams parameters = new ModifiableSolrParams();
parameters.add("langid.fl", "no_such_field");
parameters.add("langid.langField", "language");
parameters.add("langid.enforceSchema", "false");
liProcessor = createLangIdProcessor(parameters);
doc = new SolrInputDocument();
assertEquals("", liProcessor.process(doc).getFieldValue("language"));
}
@Test
public void testFallback() throws Exception {
SolrInputDocument doc;