diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index d151353e939..f872b969a2e 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -105,6 +105,9 @@ Bug Fixes * SOLR-11619: V2 requests that needed to be forwarded to other nodes would get an NPE. (David Smiley) +* SOLR-11231: Guard against unset fields when performing language detection. + (Chris Beer via Steve Rowe) + Optimizations ---------------------- * SOLR-11285: Refactor autoscaling framework to avoid direct references to Zookeeper and Solr diff --git a/solr/contrib/langid/src/java/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessor.java b/solr/contrib/langid/src/java/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessor.java index 836a3bf6795..df0e5f7fa25 100644 --- a/solr/contrib/langid/src/java/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessor.java +++ b/solr/contrib/langid/src/java/org/apache/solr/update/processor/TikaLanguageIdentifierUpdateProcessor.java @@ -112,13 +112,21 @@ public class TikaLanguageIdentifierUpdateProcessor extends LanguageIdentifierUpd private int getExpectedSize(SolrInputDocument doc, String[] fields) { int docSize = 0; for (String field : fields) { - Collection contents = doc.getFieldValues(field); - for (Object content : contents) { - if (content instanceof String) { - docSize += Math.min(((String) content).length(), maxFieldValueChars); + if (doc.containsKey(field)) { + Collection contents = doc.getFieldValues(field); + if (contents != null) { + for (Object content : contents) { + if (content instanceof String) { + docSize += Math.min(((String) content).length(), maxFieldValueChars); + } + } + + if (docSize > maxTotalChars) { + docSize = maxTotalChars; + break; + } } } - docSize = Math.min(docSize, maxTotalChars); } return docSize; } diff --git a/solr/contrib/langid/src/test/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessorFactoryTestCase.java b/solr/contrib/langid/src/test/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessorFactoryTestCase.java index c7381a918e7..b90f54a4d3f 100644 --- a/solr/contrib/langid/src/test/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessorFactoryTestCase.java +++ b/solr/contrib/langid/src/test/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessorFactoryTestCase.java @@ -211,6 +211,19 @@ public abstract class LanguageIdentifierUpdateProcessorFactoryTestCase extends S assertEquals("", liProcessor.process(doc).getFieldValue("language")); } + @Test + public void testMissingFieldEmptyString() throws Exception { + SolrInputDocument doc; + ModifiableSolrParams parameters = new ModifiableSolrParams(); + parameters.add("langid.fl", "no_such_field"); + parameters.add("langid.langField", "language"); + parameters.add("langid.enforceSchema", "false"); + liProcessor = createLangIdProcessor(parameters); + + doc = new SolrInputDocument(); + assertEquals("", liProcessor.process(doc).getFieldValue("language")); + } + @Test public void testFallback() throws Exception { SolrInputDocument doc;