mirror of https://github.com/apache/lucene.git
SOLR-11231: Guard against unset fields when performing language detection. This closes #232
This commit is contained in:
parent
3045349140
commit
c3513e9281
|
@ -105,6 +105,9 @@ Bug Fixes
|
||||||
|
|
||||||
* SOLR-11619: V2 requests that needed to be forwarded to other nodes would get an NPE. (David Smiley)
|
* SOLR-11619: V2 requests that needed to be forwarded to other nodes would get an NPE. (David Smiley)
|
||||||
|
|
||||||
|
* SOLR-11231: Guard against unset fields when performing language detection.
|
||||||
|
(Chris Beer via Steve Rowe)
|
||||||
|
|
||||||
Optimizations
|
Optimizations
|
||||||
----------------------
|
----------------------
|
||||||
* SOLR-11285: Refactor autoscaling framework to avoid direct references to Zookeeper and Solr
|
* SOLR-11285: Refactor autoscaling framework to avoid direct references to Zookeeper and Solr
|
||||||
|
|
|
@ -112,13 +112,21 @@ public class TikaLanguageIdentifierUpdateProcessor extends LanguageIdentifierUpd
|
||||||
private int getExpectedSize(SolrInputDocument doc, String[] fields) {
|
private int getExpectedSize(SolrInputDocument doc, String[] fields) {
|
||||||
int docSize = 0;
|
int docSize = 0;
|
||||||
for (String field : fields) {
|
for (String field : fields) {
|
||||||
|
if (doc.containsKey(field)) {
|
||||||
Collection<Object> contents = doc.getFieldValues(field);
|
Collection<Object> contents = doc.getFieldValues(field);
|
||||||
|
if (contents != null) {
|
||||||
for (Object content : contents) {
|
for (Object content : contents) {
|
||||||
if (content instanceof String) {
|
if (content instanceof String) {
|
||||||
docSize += Math.min(((String) content).length(), maxFieldValueChars);
|
docSize += Math.min(((String) content).length(), maxFieldValueChars);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
docSize = Math.min(docSize, maxTotalChars);
|
|
||||||
|
if (docSize > maxTotalChars) {
|
||||||
|
docSize = maxTotalChars;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return docSize;
|
return docSize;
|
||||||
}
|
}
|
||||||
|
|
|
@ -211,6 +211,19 @@ public abstract class LanguageIdentifierUpdateProcessorFactoryTestCase extends S
|
||||||
assertEquals("", liProcessor.process(doc).getFieldValue("language"));
|
assertEquals("", liProcessor.process(doc).getFieldValue("language"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testMissingFieldEmptyString() throws Exception {
|
||||||
|
SolrInputDocument doc;
|
||||||
|
ModifiableSolrParams parameters = new ModifiableSolrParams();
|
||||||
|
parameters.add("langid.fl", "no_such_field");
|
||||||
|
parameters.add("langid.langField", "language");
|
||||||
|
parameters.add("langid.enforceSchema", "false");
|
||||||
|
liProcessor = createLangIdProcessor(parameters);
|
||||||
|
|
||||||
|
doc = new SolrInputDocument();
|
||||||
|
assertEquals("", liProcessor.process(doc).getFieldValue("language"));
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testFallback() throws Exception {
|
public void testFallback() throws Exception {
|
||||||
SolrInputDocument doc;
|
SolrInputDocument doc;
|
||||||
|
|
Loading…
Reference in New Issue