LUCENE-8805: Parameter changes for stringField() in StoredFieldVisitor

Signed-off-by: Namgyu Kim <kng0828@gmail.com>
Signed-off-by: Adrien Grand <jpountz@gmail.com>
This commit is contained in:
Namgyu Kim 2019-05-21 22:18:14 +09:00 committed by Adrien Grand
parent 644af43c12
commit 5a694ea26f
14 changed files with 53 additions and 43 deletions

View File

@ -18,6 +18,10 @@ API Changes
* LUCENE-3041: The deprecated Weight#extractTerms() method has been * LUCENE-3041: The deprecated Weight#extractTerms() method has been
removed (Alan Woodward, Simon Willnauer, David Smiley, Luca Cavanna) removed (Alan Woodward, Simon Willnauer, David Smiley, Luca Cavanna)
* LUCENE-8805: StoredFieldVisitor#stringField now takes a String rather than a
byte[] that stores the UTF-8 bytes of the stored string.
(Namgyu Kim via Adrien Grand)
Bug fixes Bug fixes
* LUCENE-8663: NRTCachingDirectory.slowFileExists may open a file while * LUCENE-8663: NRTCachingDirectory.slowFileExists may open a file while

View File

@ -17,9 +17,9 @@
package org.apache.lucene.benchmark.quality.utils; package org.apache.lucene.benchmark.quality.utils;
import java.io.IOException; import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.Objects;
import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.StoredFieldVisitor; import org.apache.lucene.index.StoredFieldVisitor;
@ -51,9 +51,8 @@ public class DocNameExtractor {
final List<String> name = new ArrayList<>(); final List<String> name = new ArrayList<>();
searcher.getIndexReader().document(docid, new StoredFieldVisitor() { searcher.getIndexReader().document(docid, new StoredFieldVisitor() {
@Override @Override
public void stringField(FieldInfo fieldInfo, byte[] bytes) { public void stringField(FieldInfo fieldInfo, String value) {
String value = new String(bytes, StandardCharsets.UTF_8); name.add(Objects.requireNonNull(value, "String value should not be null"));
name.add(value);
} }
@Override @Override

View File

@ -18,6 +18,7 @@ package org.apache.lucene.codecs.simpletext;
import java.io.IOException; import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Arrays; import java.util.Arrays;
import org.apache.lucene.codecs.StoredFieldsReader; import org.apache.lucene.codecs.StoredFieldsReader;
@ -155,7 +156,7 @@ public class SimpleTextStoredFieldsReader extends StoredFieldsReader {
if (type == TYPE_STRING) { if (type == TYPE_STRING) {
byte[] bytes = new byte[scratch.length() - VALUE.length]; byte[] bytes = new byte[scratch.length() - VALUE.length];
System.arraycopy(scratch.bytes(), VALUE.length, bytes, 0, bytes.length); System.arraycopy(scratch.bytes(), VALUE.length, bytes, 0, bytes.length);
visitor.stringField(fieldInfo, bytes); visitor.stringField(fieldInfo, new String(bytes, 0, bytes.length, StandardCharsets.UTF_8));
} else if (type == TYPE_BINARY) { } else if (type == TYPE_BINARY) {
byte[] copy = new byte[scratch.length()-VALUE.length]; byte[] copy = new byte[scratch.length()-VALUE.length];
System.arraycopy(scratch.bytes(), VALUE.length, copy, 0, copy.length); System.arraycopy(scratch.bytes(), VALUE.length, copy, 0, copy.length);

View File

@ -19,9 +19,9 @@ package org.apache.lucene.codecs;
import java.io.Closeable; import java.io.Closeable;
import java.io.IOException; import java.io.IOException;
import java.io.Reader; import java.io.Reader;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.Objects;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
@ -180,10 +180,9 @@ public abstract class StoredFieldsWriter implements Closeable {
} }
@Override @Override
public void stringField(FieldInfo fieldInfo, byte[] value) throws IOException { public void stringField(FieldInfo fieldInfo, String value) throws IOException {
reset(fieldInfo); reset(fieldInfo);
// TODO: can we avoid new String here? stringValue = Objects.requireNonNull(value, "String value should not be null");
stringValue = new String(value, StandardCharsets.UTF_8);
write(); write();
} }

View File

@ -211,10 +211,7 @@ public final class CompressingStoredFieldsReader extends StoredFieldsReader {
visitor.binaryField(info, data); visitor.binaryField(info, data);
break; break;
case STRING: case STRING:
length = in.readVInt(); visitor.stringField(info, in.readString());
data = new byte[length];
in.readBytes(data, 0, length);
visitor.stringField(info, data);
break; break;
case NUMERIC_INT: case NUMERIC_INT:
visitor.intField(info, in.readZInt()); visitor.intField(info, in.readZInt());

View File

@ -17,8 +17,8 @@
package org.apache.lucene.document; package org.apache.lucene.document;
import java.io.IOException; import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.HashSet; import java.util.HashSet;
import java.util.Objects;
import java.util.Set; import java.util.Set;
import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfo;
@ -67,12 +67,12 @@ public class DocumentStoredFieldVisitor extends StoredFieldVisitor {
} }
@Override @Override
public void stringField(FieldInfo fieldInfo, byte[] value) throws IOException { public void stringField(FieldInfo fieldInfo, String value) throws IOException {
final FieldType ft = new FieldType(TextField.TYPE_STORED); final FieldType ft = new FieldType(TextField.TYPE_STORED);
ft.setStoreTermVectors(fieldInfo.hasVectors()); ft.setStoreTermVectors(fieldInfo.hasVectors());
ft.setOmitNorms(fieldInfo.omitsNorms()); ft.setOmitNorms(fieldInfo.omitsNorms());
ft.setIndexOptions(fieldInfo.getIndexOptions()); ft.setIndexOptions(fieldInfo.getIndexOptions());
doc.add(new StoredField(fieldInfo.name, new String(value, StandardCharsets.UTF_8), ft)); doc.add(new StoredField(fieldInfo.name, Objects.requireNonNull(value, "String value should not be null"), ft));
} }
@Override @Override

View File

@ -19,8 +19,8 @@ package org.apache.lucene.index;
import java.io.IOException; import java.io.IOException;
import java.io.Reader; import java.io.Reader;
import java.nio.charset.StandardCharsets;
import java.util.Map; import java.util.Map;
import java.util.Objects;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
@ -114,10 +114,9 @@ final class SortingStoredFieldsConsumer extends StoredFieldsConsumer {
} }
@Override @Override
public void stringField(FieldInfo fieldInfo, byte[] value) throws IOException { public void stringField(FieldInfo fieldInfo, String value) throws IOException {
reset(fieldInfo); reset(fieldInfo);
// TODO: can we avoid new String here? stringValue = Objects.requireNonNull(value, "String value should not be null");
stringValue = new String(value, StandardCharsets.UTF_8);
write(); write();
} }

View File

@ -53,8 +53,8 @@ public abstract class StoredFieldVisitor {
public void binaryField(FieldInfo fieldInfo, byte[] value) throws IOException { public void binaryField(FieldInfo fieldInfo, byte[] value) throws IOException {
} }
/** Process a string field; the provided byte[] value is a UTF-8 encoded string value. */ /** Process a string field. */
public void stringField(FieldInfo fieldInfo, byte[] value) throws IOException { public void stringField(FieldInfo fieldInfo, String value) throws IOException {
} }
/** Process a int numeric field. */ /** Process a int numeric field. */

View File

@ -17,7 +17,6 @@
package org.apache.lucene.search.uhighlight; package org.apache.lucene.search.uhighlight;
import java.io.IOException; import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.text.BreakIterator; import java.text.BreakIterator;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
@ -995,9 +994,9 @@ public class UnifiedHighlighter {
} }
@Override @Override
public void stringField(FieldInfo fieldInfo, byte[] byteValue) throws IOException { public void stringField(FieldInfo fieldInfo, String value) throws IOException {
String value = new String(byteValue, StandardCharsets.UTF_8);
assert currentField >= 0; assert currentField >= 0;
Objects.requireNonNull(value, "String value should not be null");
CharSequence curValue = values[currentField]; CharSequence curValue = values[currentField];
if (curValue == null) { if (curValue == null) {
//question: if truncate due to maxLength, should we try and avoid keeping the other chars in-memory on //question: if truncate due to maxLength, should we try and avoid keeping the other chars in-memory on

View File

@ -17,7 +17,6 @@
package org.apache.lucene.search.vectorhighlight; package org.apache.lucene.search.vectorhighlight;
import java.io.IOException; import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collections; import java.util.Collections;
import java.util.Comparator; import java.util.Comparator;
@ -25,6 +24,7 @@ import java.util.HashMap;
import java.util.Iterator; import java.util.Iterator;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Objects;
import org.apache.lucene.document.Field; import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType; import org.apache.lucene.document.FieldType;
@ -152,8 +152,8 @@ public abstract class BaseFragmentsBuilder implements FragmentsBuilder {
reader.document(docId, new StoredFieldVisitor() { reader.document(docId, new StoredFieldVisitor() {
@Override @Override
public void stringField(FieldInfo fieldInfo, byte[] bytes) { public void stringField(FieldInfo fieldInfo, String value) {
String value = new String(bytes, StandardCharsets.UTF_8); Objects.requireNonNull(value, "String value should not be null");
FieldType ft = new FieldType(TextField.TYPE_STORED); FieldType ft = new FieldType(TextField.TYPE_STORED);
ft.setStoreTermVectors(fieldInfo.hasVectors()); ft.setStoreTermVectors(fieldInfo.hasVectors());
fields.add(new Field(fieldInfo.name, value, ft)); fields.add(new Field(fieldInfo.name, value, ft));

View File

@ -17,11 +17,11 @@
package org.apache.lucene.document; package org.apache.lucene.document;
import java.io.IOException; import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Arrays; import java.util.Arrays;
import java.util.HashMap; import java.util.HashMap;
import java.util.HashSet; import java.util.HashSet;
import java.util.Map; import java.util.Map;
import java.util.Objects;
import java.util.Set; import java.util.Set;
import org.apache.lucene.analysis.*; import org.apache.lucene.analysis.*;
@ -209,12 +209,12 @@ public class TestLazyDocument extends LuceneTestCase {
} }
@Override @Override
public void stringField(FieldInfo fieldInfo, byte[] bytes) throws IOException { public void stringField(FieldInfo fieldInfo, String value) throws IOException {
String value = new String(bytes, StandardCharsets.UTF_8);
final FieldType ft = new FieldType(TextField.TYPE_STORED); final FieldType ft = new FieldType(TextField.TYPE_STORED);
ft.setStoreTermVectors(fieldInfo.hasVectors()); ft.setStoreTermVectors(fieldInfo.hasVectors());
ft.setOmitNorms(fieldInfo.omitsNorms()); ft.setOmitNorms(fieldInfo.omitsNorms());
ft.setIndexOptions(fieldInfo.getIndexOptions()); ft.setIndexOptions(fieldInfo.getIndexOptions());
Objects.requireNonNull(value, "String value should not be null");
doc.add(new Field(fieldInfo.name, value, ft)); doc.add(new Field(fieldInfo.name, value, ft));
} }

View File

@ -19,6 +19,7 @@ package org.apache.lucene.index;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Iterator; import java.util.Iterator;
import java.util.Objects;
import java.util.Set; import java.util.Set;
import org.apache.lucene.util.FilterIterator; import org.apache.lucene.util.FilterIterator;
@ -76,8 +77,8 @@ public final class FieldFilterLeafReader extends FilterLeafReader {
} }
@Override @Override
public void stringField(FieldInfo fieldInfo, byte[] value) throws IOException { public void stringField(FieldInfo fieldInfo, String value) throws IOException {
visitor.stringField(fieldInfo, value); visitor.stringField(fieldInfo, Objects.requireNonNull(value, "String value should not be null"));
} }
@Override @Override

View File

@ -20,6 +20,7 @@ import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collections; import java.util.Collections;
import java.util.List; import java.util.List;
import java.util.Objects;
import java.util.Random; import java.util.Random;
/** /**
@ -105,8 +106,8 @@ public class MismatchedLeafReader extends FilterLeafReader {
} }
@Override @Override
public void stringField(FieldInfo fieldInfo, byte[] value) throws IOException { public void stringField(FieldInfo fieldInfo, String value) throws IOException {
in.stringField(renumber(fieldInfo), value); in.stringField(renumber(fieldInfo), Objects.requireNonNull(value, "String value should not be null"));
} }
@Override @Override

View File

@ -29,6 +29,7 @@ import java.util.Date;
import java.util.HashSet; import java.util.HashSet;
import java.util.LinkedList; import java.util.LinkedList;
import java.util.List; import java.util.List;
import java.util.Objects;
import java.util.Set; import java.util.Set;
import java.util.function.Predicate; import java.util.function.Predicate;
import java.util.function.Supplier; import java.util.function.Supplier;
@ -62,7 +63,6 @@ import org.apache.lucene.util.NumericUtils;
import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentBase; import org.apache.solr.common.SolrDocumentBase;
import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException;
import org.apache.solr.common.util.ByteArrayUtf8CharSequence;
import org.apache.solr.core.SolrConfig; import org.apache.solr.core.SolrConfig;
import org.apache.solr.response.DocsStreamer; import org.apache.solr.response.DocsStreamer;
import org.apache.solr.response.ResultContext; import org.apache.solr.response.ResultContext;
@ -299,14 +299,15 @@ public class SolrDocumentFetcher {
@Override @Override
public void stringField(FieldInfo fieldInfo, byte[] value) throws IOException { public void stringField(FieldInfo fieldInfo, String value) throws IOException {
Predicate<String> readAsBytes = ResultContext.READASBYTES.get(); Predicate<String> readAsBytes = ResultContext.READASBYTES.get();
if (readAsBytes != null && readAsBytes.test(fieldInfo.name)) { if (readAsBytes != null && readAsBytes.test(fieldInfo.name)) {
final FieldType ft = new FieldType(TextField.TYPE_STORED); final FieldType ft = new FieldType(TextField.TYPE_STORED);
ft.setStoreTermVectors(fieldInfo.hasVectors()); ft.setStoreTermVectors(fieldInfo.hasVectors());
ft.setOmitNorms(fieldInfo.omitsNorms()); ft.setOmitNorms(fieldInfo.omitsNorms());
ft.setIndexOptions(fieldInfo.getIndexOptions()); ft.setIndexOptions(fieldInfo.getIndexOptions());
doc.add(new StoredField(fieldInfo.name, new ByteArrayUtf8CharSequence(value, 0, value.length), ft)); Objects.requireNonNull(value, "String value should not be null");
doc.add(new StoredField(fieldInfo.name, value, ft));
} else { } else {
super.stringField(fieldInfo, value); super.stringField(fieldInfo, value);
} }
@ -371,9 +372,9 @@ public class SolrDocumentFetcher {
} }
// must be String // must be String
if (f instanceof LargeLazyField) { // optimization to avoid premature string conversion if (f instanceof LargeLazyField) { // optimization to avoid premature string conversion
visitor.stringField(info, toByteArrayUnwrapIfPossible(((LargeLazyField) f).readBytes())); visitor.stringField(info, toStringUnwrapIfPossible(((LargeLazyField) f).readBytes()));
} else { } else {
visitor.stringField(info, f.stringValue().getBytes(StandardCharsets.UTF_8)); visitor.stringField(info, f.stringValue());
} }
} }
} }
@ -386,6 +387,14 @@ public class SolrDocumentFetcher {
} }
} }
private String toStringUnwrapIfPossible(BytesRef bytesRef) {
if (bytesRef.offset == 0 && bytesRef.bytes.length == bytesRef.length) {
return new String(bytesRef.bytes, StandardCharsets.UTF_8);
} else {
return new String(bytesRef.bytes, bytesRef.offset, bytesRef.offset + bytesRef.length, StandardCharsets.UTF_8);
}
}
/** Unlike LazyDocument.LazyField, we (a) don't cache large values, and (b) provide access to the byte[]. */ /** Unlike LazyDocument.LazyField, we (a) don't cache large values, and (b) provide access to the byte[]. */
class LargeLazyField implements IndexableField { class LargeLazyField implements IndexableField {
@ -450,9 +459,10 @@ public class SolrDocumentFetcher {
} }
@Override @Override
public void stringField(FieldInfo fieldInfo, byte[] value) throws IOException { public void stringField(FieldInfo fieldInfo, String value) throws IOException {
bytesRef.bytes = value; Objects.requireNonNull(value, "String value should not be null");
bytesRef.length = value.length; bytesRef.bytes = value.getBytes(StandardCharsets.UTF_8);
bytesRef.length = value.length();
done = true; done = true;
} }