mirror of https://github.com/apache/lucene.git
LUCENE-8805: Parameter changes for stringField() in StoredFieldVisitor
Signed-off-by: Namgyu Kim <kng0828@gmail.com> Signed-off-by: Adrien Grand <jpountz@gmail.com>
This commit is contained in:
parent
644af43c12
commit
5a694ea26f
|
@ -18,6 +18,10 @@ API Changes
|
|||
* LUCENE-3041: The deprecated Weight#extractTerms() method has been
|
||||
removed (Alan Woodward, Simon Willnauer, David Smiley, Luca Cavanna)
|
||||
|
||||
* LUCENE-8805: StoredFieldVisitor#stringField now takes a String rather than a
|
||||
byte[] that stores the UTF-8 bytes of the stored string.
|
||||
(Namgyu Kim via Adrien Grand)
|
||||
|
||||
Bug fixes
|
||||
|
||||
* LUCENE-8663: NRTCachingDirectory.slowFileExists may open a file while
|
||||
|
|
|
@ -17,9 +17,9 @@
|
|||
package org.apache.lucene.benchmark.quality.utils;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.StoredFieldVisitor;
|
||||
|
@ -51,9 +51,8 @@ public class DocNameExtractor {
|
|||
final List<String> name = new ArrayList<>();
|
||||
searcher.getIndexReader().document(docid, new StoredFieldVisitor() {
|
||||
@Override
|
||||
public void stringField(FieldInfo fieldInfo, byte[] bytes) {
|
||||
String value = new String(bytes, StandardCharsets.UTF_8);
|
||||
name.add(value);
|
||||
public void stringField(FieldInfo fieldInfo, String value) {
|
||||
name.add(Objects.requireNonNull(value, "String value should not be null"));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.lucene.codecs.simpletext;
|
|||
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.Arrays;
|
||||
|
||||
import org.apache.lucene.codecs.StoredFieldsReader;
|
||||
|
@ -155,7 +156,7 @@ public class SimpleTextStoredFieldsReader extends StoredFieldsReader {
|
|||
if (type == TYPE_STRING) {
|
||||
byte[] bytes = new byte[scratch.length() - VALUE.length];
|
||||
System.arraycopy(scratch.bytes(), VALUE.length, bytes, 0, bytes.length);
|
||||
visitor.stringField(fieldInfo, bytes);
|
||||
visitor.stringField(fieldInfo, new String(bytes, 0, bytes.length, StandardCharsets.UTF_8));
|
||||
} else if (type == TYPE_BINARY) {
|
||||
byte[] copy = new byte[scratch.length()-VALUE.length];
|
||||
System.arraycopy(scratch.bytes(), VALUE.length, copy, 0, copy.length);
|
||||
|
|
|
@ -19,9 +19,9 @@ package org.apache.lucene.codecs;
|
|||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
|
@ -180,10 +180,9 @@ public abstract class StoredFieldsWriter implements Closeable {
|
|||
}
|
||||
|
||||
@Override
|
||||
public void stringField(FieldInfo fieldInfo, byte[] value) throws IOException {
|
||||
public void stringField(FieldInfo fieldInfo, String value) throws IOException {
|
||||
reset(fieldInfo);
|
||||
// TODO: can we avoid new String here?
|
||||
stringValue = new String(value, StandardCharsets.UTF_8);
|
||||
stringValue = Objects.requireNonNull(value, "String value should not be null");
|
||||
write();
|
||||
}
|
||||
|
||||
|
|
|
@ -211,10 +211,7 @@ public final class CompressingStoredFieldsReader extends StoredFieldsReader {
|
|||
visitor.binaryField(info, data);
|
||||
break;
|
||||
case STRING:
|
||||
length = in.readVInt();
|
||||
data = new byte[length];
|
||||
in.readBytes(data, 0, length);
|
||||
visitor.stringField(info, data);
|
||||
visitor.stringField(info, in.readString());
|
||||
break;
|
||||
case NUMERIC_INT:
|
||||
visitor.intField(info, in.readZInt());
|
||||
|
|
|
@ -17,8 +17,8 @@
|
|||
package org.apache.lucene.document;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.HashSet;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
|
@ -67,12 +67,12 @@ public class DocumentStoredFieldVisitor extends StoredFieldVisitor {
|
|||
}
|
||||
|
||||
@Override
|
||||
public void stringField(FieldInfo fieldInfo, byte[] value) throws IOException {
|
||||
public void stringField(FieldInfo fieldInfo, String value) throws IOException {
|
||||
final FieldType ft = new FieldType(TextField.TYPE_STORED);
|
||||
ft.setStoreTermVectors(fieldInfo.hasVectors());
|
||||
ft.setOmitNorms(fieldInfo.omitsNorms());
|
||||
ft.setIndexOptions(fieldInfo.getIndexOptions());
|
||||
doc.add(new StoredField(fieldInfo.name, new String(value, StandardCharsets.UTF_8), ft));
|
||||
doc.add(new StoredField(fieldInfo.name, Objects.requireNonNull(value, "String value should not be null"), ft));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -19,8 +19,8 @@ package org.apache.lucene.index;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
|
@ -114,10 +114,9 @@ final class SortingStoredFieldsConsumer extends StoredFieldsConsumer {
|
|||
}
|
||||
|
||||
@Override
|
||||
public void stringField(FieldInfo fieldInfo, byte[] value) throws IOException {
|
||||
public void stringField(FieldInfo fieldInfo, String value) throws IOException {
|
||||
reset(fieldInfo);
|
||||
// TODO: can we avoid new String here?
|
||||
stringValue = new String(value, StandardCharsets.UTF_8);
|
||||
stringValue = Objects.requireNonNull(value, "String value should not be null");
|
||||
write();
|
||||
}
|
||||
|
||||
|
|
|
@ -53,8 +53,8 @@ public abstract class StoredFieldVisitor {
|
|||
public void binaryField(FieldInfo fieldInfo, byte[] value) throws IOException {
|
||||
}
|
||||
|
||||
/** Process a string field; the provided byte[] value is a UTF-8 encoded string value. */
|
||||
public void stringField(FieldInfo fieldInfo, byte[] value) throws IOException {
|
||||
/** Process a string field. */
|
||||
public void stringField(FieldInfo fieldInfo, String value) throws IOException {
|
||||
}
|
||||
|
||||
/** Process a int numeric field. */
|
||||
|
|
|
@ -17,7 +17,6 @@
|
|||
package org.apache.lucene.search.uhighlight;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.text.BreakIterator;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
|
@ -995,9 +994,9 @@ public class UnifiedHighlighter {
|
|||
}
|
||||
|
||||
@Override
|
||||
public void stringField(FieldInfo fieldInfo, byte[] byteValue) throws IOException {
|
||||
String value = new String(byteValue, StandardCharsets.UTF_8);
|
||||
public void stringField(FieldInfo fieldInfo, String value) throws IOException {
|
||||
assert currentField >= 0;
|
||||
Objects.requireNonNull(value, "String value should not be null");
|
||||
CharSequence curValue = values[currentField];
|
||||
if (curValue == null) {
|
||||
//question: if truncate due to maxLength, should we try and avoid keeping the other chars in-memory on
|
||||
|
|
|
@ -17,7 +17,6 @@
|
|||
package org.apache.lucene.search.vectorhighlight;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
|
@ -25,6 +24,7 @@ import java.util.HashMap;
|
|||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
|
@ -152,8 +152,8 @@ public abstract class BaseFragmentsBuilder implements FragmentsBuilder {
|
|||
reader.document(docId, new StoredFieldVisitor() {
|
||||
|
||||
@Override
|
||||
public void stringField(FieldInfo fieldInfo, byte[] bytes) {
|
||||
String value = new String(bytes, StandardCharsets.UTF_8);
|
||||
public void stringField(FieldInfo fieldInfo, String value) {
|
||||
Objects.requireNonNull(value, "String value should not be null");
|
||||
FieldType ft = new FieldType(TextField.TYPE_STORED);
|
||||
ft.setStoreTermVectors(fieldInfo.hasVectors());
|
||||
fields.add(new Field(fieldInfo.name, value, ft));
|
||||
|
|
|
@ -17,11 +17,11 @@
|
|||
package org.apache.lucene.document;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.analysis.*;
|
||||
|
@ -209,12 +209,12 @@ public class TestLazyDocument extends LuceneTestCase {
|
|||
}
|
||||
|
||||
@Override
|
||||
public void stringField(FieldInfo fieldInfo, byte[] bytes) throws IOException {
|
||||
String value = new String(bytes, StandardCharsets.UTF_8);
|
||||
public void stringField(FieldInfo fieldInfo, String value) throws IOException {
|
||||
final FieldType ft = new FieldType(TextField.TYPE_STORED);
|
||||
ft.setStoreTermVectors(fieldInfo.hasVectors());
|
||||
ft.setOmitNorms(fieldInfo.omitsNorms());
|
||||
ft.setIndexOptions(fieldInfo.getIndexOptions());
|
||||
Objects.requireNonNull(value, "String value should not be null");
|
||||
doc.add(new Field(fieldInfo.name, value, ft));
|
||||
}
|
||||
|
||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.lucene.index;
|
|||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.util.FilterIterator;
|
||||
|
@ -76,8 +77,8 @@ public final class FieldFilterLeafReader extends FilterLeafReader {
|
|||
}
|
||||
|
||||
@Override
|
||||
public void stringField(FieldInfo fieldInfo, byte[] value) throws IOException {
|
||||
visitor.stringField(fieldInfo, value);
|
||||
public void stringField(FieldInfo fieldInfo, String value) throws IOException {
|
||||
visitor.stringField(fieldInfo, Objects.requireNonNull(value, "String value should not be null"));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -20,6 +20,7 @@ import java.io.IOException;
|
|||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import java.util.Random;
|
||||
|
||||
/**
|
||||
|
@ -105,8 +106,8 @@ public class MismatchedLeafReader extends FilterLeafReader {
|
|||
}
|
||||
|
||||
@Override
|
||||
public void stringField(FieldInfo fieldInfo, byte[] value) throws IOException {
|
||||
in.stringField(renumber(fieldInfo), value);
|
||||
public void stringField(FieldInfo fieldInfo, String value) throws IOException {
|
||||
in.stringField(renumber(fieldInfo), Objects.requireNonNull(value, "String value should not be null"));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -29,6 +29,7 @@ import java.util.Date;
|
|||
import java.util.HashSet;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
import java.util.function.Predicate;
|
||||
import java.util.function.Supplier;
|
||||
|
@ -62,7 +63,6 @@ import org.apache.lucene.util.NumericUtils;
|
|||
import org.apache.solr.common.SolrDocument;
|
||||
import org.apache.solr.common.SolrDocumentBase;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.util.ByteArrayUtf8CharSequence;
|
||||
import org.apache.solr.core.SolrConfig;
|
||||
import org.apache.solr.response.DocsStreamer;
|
||||
import org.apache.solr.response.ResultContext;
|
||||
|
@ -299,14 +299,15 @@ public class SolrDocumentFetcher {
|
|||
|
||||
|
||||
@Override
|
||||
public void stringField(FieldInfo fieldInfo, byte[] value) throws IOException {
|
||||
public void stringField(FieldInfo fieldInfo, String value) throws IOException {
|
||||
Predicate<String> readAsBytes = ResultContext.READASBYTES.get();
|
||||
if (readAsBytes != null && readAsBytes.test(fieldInfo.name)) {
|
||||
final FieldType ft = new FieldType(TextField.TYPE_STORED);
|
||||
ft.setStoreTermVectors(fieldInfo.hasVectors());
|
||||
ft.setOmitNorms(fieldInfo.omitsNorms());
|
||||
ft.setIndexOptions(fieldInfo.getIndexOptions());
|
||||
doc.add(new StoredField(fieldInfo.name, new ByteArrayUtf8CharSequence(value, 0, value.length), ft));
|
||||
Objects.requireNonNull(value, "String value should not be null");
|
||||
doc.add(new StoredField(fieldInfo.name, value, ft));
|
||||
} else {
|
||||
super.stringField(fieldInfo, value);
|
||||
}
|
||||
|
@ -371,9 +372,9 @@ public class SolrDocumentFetcher {
|
|||
}
|
||||
// must be String
|
||||
if (f instanceof LargeLazyField) { // optimization to avoid premature string conversion
|
||||
visitor.stringField(info, toByteArrayUnwrapIfPossible(((LargeLazyField) f).readBytes()));
|
||||
visitor.stringField(info, toStringUnwrapIfPossible(((LargeLazyField) f).readBytes()));
|
||||
} else {
|
||||
visitor.stringField(info, f.stringValue().getBytes(StandardCharsets.UTF_8));
|
||||
visitor.stringField(info, f.stringValue());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -386,6 +387,14 @@ public class SolrDocumentFetcher {
|
|||
}
|
||||
}
|
||||
|
||||
private String toStringUnwrapIfPossible(BytesRef bytesRef) {
|
||||
if (bytesRef.offset == 0 && bytesRef.bytes.length == bytesRef.length) {
|
||||
return new String(bytesRef.bytes, StandardCharsets.UTF_8);
|
||||
} else {
|
||||
return new String(bytesRef.bytes, bytesRef.offset, bytesRef.offset + bytesRef.length, StandardCharsets.UTF_8);
|
||||
}
|
||||
}
|
||||
|
||||
/** Unlike LazyDocument.LazyField, we (a) don't cache large values, and (b) provide access to the byte[]. */
|
||||
class LargeLazyField implements IndexableField {
|
||||
|
||||
|
@ -450,9 +459,10 @@ public class SolrDocumentFetcher {
|
|||
}
|
||||
|
||||
@Override
|
||||
public void stringField(FieldInfo fieldInfo, byte[] value) throws IOException {
|
||||
bytesRef.bytes = value;
|
||||
bytesRef.length = value.length;
|
||||
public void stringField(FieldInfo fieldInfo, String value) throws IOException {
|
||||
Objects.requireNonNull(value, "String value should not be null");
|
||||
bytesRef.bytes = value.getBytes(StandardCharsets.UTF_8);
|
||||
bytesRef.length = value.length();
|
||||
done = true;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue