mirror of https://github.com/apache/lucene.git
LUCENE-8805: Parameter changes for stringField() in StoredFieldVisitor
Signed-off-by: Namgyu Kim <kng0828@gmail.com> Signed-off-by: Adrien Grand <jpountz@gmail.com>
This commit is contained in:
parent
644af43c12
commit
5a694ea26f
|
@ -18,6 +18,10 @@ API Changes
|
||||||
* LUCENE-3041: The deprecated Weight#extractTerms() method has been
|
* LUCENE-3041: The deprecated Weight#extractTerms() method has been
|
||||||
removed (Alan Woodward, Simon Willnauer, David Smiley, Luca Cavanna)
|
removed (Alan Woodward, Simon Willnauer, David Smiley, Luca Cavanna)
|
||||||
|
|
||||||
|
* LUCENE-8805: StoredFieldVisitor#stringField now takes a String rather than a
|
||||||
|
byte[] that stores the UTF-8 bytes of the stored string.
|
||||||
|
(Namgyu Kim via Adrien Grand)
|
||||||
|
|
||||||
Bug fixes
|
Bug fixes
|
||||||
|
|
||||||
* LUCENE-8663: NRTCachingDirectory.slowFileExists may open a file while
|
* LUCENE-8663: NRTCachingDirectory.slowFileExists may open a file while
|
||||||
|
|
|
@ -17,9 +17,9 @@
|
||||||
package org.apache.lucene.benchmark.quality.utils;
|
package org.apache.lucene.benchmark.quality.utils;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.nio.charset.StandardCharsets;
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Objects;
|
||||||
|
|
||||||
import org.apache.lucene.index.FieldInfo;
|
import org.apache.lucene.index.FieldInfo;
|
||||||
import org.apache.lucene.index.StoredFieldVisitor;
|
import org.apache.lucene.index.StoredFieldVisitor;
|
||||||
|
@ -51,9 +51,8 @@ public class DocNameExtractor {
|
||||||
final List<String> name = new ArrayList<>();
|
final List<String> name = new ArrayList<>();
|
||||||
searcher.getIndexReader().document(docid, new StoredFieldVisitor() {
|
searcher.getIndexReader().document(docid, new StoredFieldVisitor() {
|
||||||
@Override
|
@Override
|
||||||
public void stringField(FieldInfo fieldInfo, byte[] bytes) {
|
public void stringField(FieldInfo fieldInfo, String value) {
|
||||||
String value = new String(bytes, StandardCharsets.UTF_8);
|
name.add(Objects.requireNonNull(value, "String value should not be null"));
|
||||||
name.add(value);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.lucene.codecs.simpletext;
|
||||||
|
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
|
||||||
import org.apache.lucene.codecs.StoredFieldsReader;
|
import org.apache.lucene.codecs.StoredFieldsReader;
|
||||||
|
@ -155,7 +156,7 @@ public class SimpleTextStoredFieldsReader extends StoredFieldsReader {
|
||||||
if (type == TYPE_STRING) {
|
if (type == TYPE_STRING) {
|
||||||
byte[] bytes = new byte[scratch.length() - VALUE.length];
|
byte[] bytes = new byte[scratch.length() - VALUE.length];
|
||||||
System.arraycopy(scratch.bytes(), VALUE.length, bytes, 0, bytes.length);
|
System.arraycopy(scratch.bytes(), VALUE.length, bytes, 0, bytes.length);
|
||||||
visitor.stringField(fieldInfo, bytes);
|
visitor.stringField(fieldInfo, new String(bytes, 0, bytes.length, StandardCharsets.UTF_8));
|
||||||
} else if (type == TYPE_BINARY) {
|
} else if (type == TYPE_BINARY) {
|
||||||
byte[] copy = new byte[scratch.length()-VALUE.length];
|
byte[] copy = new byte[scratch.length()-VALUE.length];
|
||||||
System.arraycopy(scratch.bytes(), VALUE.length, copy, 0, copy.length);
|
System.arraycopy(scratch.bytes(), VALUE.length, copy, 0, copy.length);
|
||||||
|
|
|
@ -19,9 +19,9 @@ package org.apache.lucene.codecs;
|
||||||
import java.io.Closeable;
|
import java.io.Closeable;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
import java.nio.charset.StandardCharsets;
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Objects;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
@ -180,10 +180,9 @@ public abstract class StoredFieldsWriter implements Closeable {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void stringField(FieldInfo fieldInfo, byte[] value) throws IOException {
|
public void stringField(FieldInfo fieldInfo, String value) throws IOException {
|
||||||
reset(fieldInfo);
|
reset(fieldInfo);
|
||||||
// TODO: can we avoid new String here?
|
stringValue = Objects.requireNonNull(value, "String value should not be null");
|
||||||
stringValue = new String(value, StandardCharsets.UTF_8);
|
|
||||||
write();
|
write();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -211,10 +211,7 @@ public final class CompressingStoredFieldsReader extends StoredFieldsReader {
|
||||||
visitor.binaryField(info, data);
|
visitor.binaryField(info, data);
|
||||||
break;
|
break;
|
||||||
case STRING:
|
case STRING:
|
||||||
length = in.readVInt();
|
visitor.stringField(info, in.readString());
|
||||||
data = new byte[length];
|
|
||||||
in.readBytes(data, 0, length);
|
|
||||||
visitor.stringField(info, data);
|
|
||||||
break;
|
break;
|
||||||
case NUMERIC_INT:
|
case NUMERIC_INT:
|
||||||
visitor.intField(info, in.readZInt());
|
visitor.intField(info, in.readZInt());
|
||||||
|
|
|
@ -17,8 +17,8 @@
|
||||||
package org.apache.lucene.document;
|
package org.apache.lucene.document;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.nio.charset.StandardCharsets;
|
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
|
import java.util.Objects;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
import org.apache.lucene.index.FieldInfo;
|
import org.apache.lucene.index.FieldInfo;
|
||||||
|
@ -67,12 +67,12 @@ public class DocumentStoredFieldVisitor extends StoredFieldVisitor {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void stringField(FieldInfo fieldInfo, byte[] value) throws IOException {
|
public void stringField(FieldInfo fieldInfo, String value) throws IOException {
|
||||||
final FieldType ft = new FieldType(TextField.TYPE_STORED);
|
final FieldType ft = new FieldType(TextField.TYPE_STORED);
|
||||||
ft.setStoreTermVectors(fieldInfo.hasVectors());
|
ft.setStoreTermVectors(fieldInfo.hasVectors());
|
||||||
ft.setOmitNorms(fieldInfo.omitsNorms());
|
ft.setOmitNorms(fieldInfo.omitsNorms());
|
||||||
ft.setIndexOptions(fieldInfo.getIndexOptions());
|
ft.setIndexOptions(fieldInfo.getIndexOptions());
|
||||||
doc.add(new StoredField(fieldInfo.name, new String(value, StandardCharsets.UTF_8), ft));
|
doc.add(new StoredField(fieldInfo.name, Objects.requireNonNull(value, "String value should not be null"), ft));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -19,8 +19,8 @@ package org.apache.lucene.index;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
import java.nio.charset.StandardCharsets;
|
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
import java.util.Objects;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
@ -114,10 +114,9 @@ final class SortingStoredFieldsConsumer extends StoredFieldsConsumer {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void stringField(FieldInfo fieldInfo, byte[] value) throws IOException {
|
public void stringField(FieldInfo fieldInfo, String value) throws IOException {
|
||||||
reset(fieldInfo);
|
reset(fieldInfo);
|
||||||
// TODO: can we avoid new String here?
|
stringValue = Objects.requireNonNull(value, "String value should not be null");
|
||||||
stringValue = new String(value, StandardCharsets.UTF_8);
|
|
||||||
write();
|
write();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -53,8 +53,8 @@ public abstract class StoredFieldVisitor {
|
||||||
public void binaryField(FieldInfo fieldInfo, byte[] value) throws IOException {
|
public void binaryField(FieldInfo fieldInfo, byte[] value) throws IOException {
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Process a string field; the provided byte[] value is a UTF-8 encoded string value. */
|
/** Process a string field. */
|
||||||
public void stringField(FieldInfo fieldInfo, byte[] value) throws IOException {
|
public void stringField(FieldInfo fieldInfo, String value) throws IOException {
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Process a int numeric field. */
|
/** Process a int numeric field. */
|
||||||
|
|
|
@ -17,7 +17,6 @@
|
||||||
package org.apache.lucene.search.uhighlight;
|
package org.apache.lucene.search.uhighlight;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.nio.charset.StandardCharsets;
|
|
||||||
import java.text.BreakIterator;
|
import java.text.BreakIterator;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
@ -995,9 +994,9 @@ public class UnifiedHighlighter {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void stringField(FieldInfo fieldInfo, byte[] byteValue) throws IOException {
|
public void stringField(FieldInfo fieldInfo, String value) throws IOException {
|
||||||
String value = new String(byteValue, StandardCharsets.UTF_8);
|
|
||||||
assert currentField >= 0;
|
assert currentField >= 0;
|
||||||
|
Objects.requireNonNull(value, "String value should not be null");
|
||||||
CharSequence curValue = values[currentField];
|
CharSequence curValue = values[currentField];
|
||||||
if (curValue == null) {
|
if (curValue == null) {
|
||||||
//question: if truncate due to maxLength, should we try and avoid keeping the other chars in-memory on
|
//question: if truncate due to maxLength, should we try and avoid keeping the other chars in-memory on
|
||||||
|
|
|
@ -17,7 +17,6 @@
|
||||||
package org.apache.lucene.search.vectorhighlight;
|
package org.apache.lucene.search.vectorhighlight;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.nio.charset.StandardCharsets;
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.Comparator;
|
import java.util.Comparator;
|
||||||
|
@ -25,6 +24,7 @@ import java.util.HashMap;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
import java.util.Objects;
|
||||||
|
|
||||||
import org.apache.lucene.document.Field;
|
import org.apache.lucene.document.Field;
|
||||||
import org.apache.lucene.document.FieldType;
|
import org.apache.lucene.document.FieldType;
|
||||||
|
@ -152,8 +152,8 @@ public abstract class BaseFragmentsBuilder implements FragmentsBuilder {
|
||||||
reader.document(docId, new StoredFieldVisitor() {
|
reader.document(docId, new StoredFieldVisitor() {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void stringField(FieldInfo fieldInfo, byte[] bytes) {
|
public void stringField(FieldInfo fieldInfo, String value) {
|
||||||
String value = new String(bytes, StandardCharsets.UTF_8);
|
Objects.requireNonNull(value, "String value should not be null");
|
||||||
FieldType ft = new FieldType(TextField.TYPE_STORED);
|
FieldType ft = new FieldType(TextField.TYPE_STORED);
|
||||||
ft.setStoreTermVectors(fieldInfo.hasVectors());
|
ft.setStoreTermVectors(fieldInfo.hasVectors());
|
||||||
fields.add(new Field(fieldInfo.name, value, ft));
|
fields.add(new Field(fieldInfo.name, value, ft));
|
||||||
|
|
|
@ -17,11 +17,11 @@
|
||||||
package org.apache.lucene.document;
|
package org.apache.lucene.document;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.nio.charset.StandardCharsets;
|
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
import java.util.Objects;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.*;
|
import org.apache.lucene.analysis.*;
|
||||||
|
@ -209,12 +209,12 @@ public class TestLazyDocument extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void stringField(FieldInfo fieldInfo, byte[] bytes) throws IOException {
|
public void stringField(FieldInfo fieldInfo, String value) throws IOException {
|
||||||
String value = new String(bytes, StandardCharsets.UTF_8);
|
|
||||||
final FieldType ft = new FieldType(TextField.TYPE_STORED);
|
final FieldType ft = new FieldType(TextField.TYPE_STORED);
|
||||||
ft.setStoreTermVectors(fieldInfo.hasVectors());
|
ft.setStoreTermVectors(fieldInfo.hasVectors());
|
||||||
ft.setOmitNorms(fieldInfo.omitsNorms());
|
ft.setOmitNorms(fieldInfo.omitsNorms());
|
||||||
ft.setIndexOptions(fieldInfo.getIndexOptions());
|
ft.setIndexOptions(fieldInfo.getIndexOptions());
|
||||||
|
Objects.requireNonNull(value, "String value should not be null");
|
||||||
doc.add(new Field(fieldInfo.name, value, ft));
|
doc.add(new Field(fieldInfo.name, value, ft));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.lucene.index;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
|
import java.util.Objects;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
import org.apache.lucene.util.FilterIterator;
|
import org.apache.lucene.util.FilterIterator;
|
||||||
|
@ -76,8 +77,8 @@ public final class FieldFilterLeafReader extends FilterLeafReader {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void stringField(FieldInfo fieldInfo, byte[] value) throws IOException {
|
public void stringField(FieldInfo fieldInfo, String value) throws IOException {
|
||||||
visitor.stringField(fieldInfo, value);
|
visitor.stringField(fieldInfo, Objects.requireNonNull(value, "String value should not be null"));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -20,6 +20,7 @@ import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Objects;
|
||||||
import java.util.Random;
|
import java.util.Random;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -105,8 +106,8 @@ public class MismatchedLeafReader extends FilterLeafReader {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void stringField(FieldInfo fieldInfo, byte[] value) throws IOException {
|
public void stringField(FieldInfo fieldInfo, String value) throws IOException {
|
||||||
in.stringField(renumber(fieldInfo), value);
|
in.stringField(renumber(fieldInfo), Objects.requireNonNull(value, "String value should not be null"));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -29,6 +29,7 @@ import java.util.Date;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.LinkedList;
|
import java.util.LinkedList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Objects;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.function.Predicate;
|
import java.util.function.Predicate;
|
||||||
import java.util.function.Supplier;
|
import java.util.function.Supplier;
|
||||||
|
@ -62,7 +63,6 @@ import org.apache.lucene.util.NumericUtils;
|
||||||
import org.apache.solr.common.SolrDocument;
|
import org.apache.solr.common.SolrDocument;
|
||||||
import org.apache.solr.common.SolrDocumentBase;
|
import org.apache.solr.common.SolrDocumentBase;
|
||||||
import org.apache.solr.common.SolrException;
|
import org.apache.solr.common.SolrException;
|
||||||
import org.apache.solr.common.util.ByteArrayUtf8CharSequence;
|
|
||||||
import org.apache.solr.core.SolrConfig;
|
import org.apache.solr.core.SolrConfig;
|
||||||
import org.apache.solr.response.DocsStreamer;
|
import org.apache.solr.response.DocsStreamer;
|
||||||
import org.apache.solr.response.ResultContext;
|
import org.apache.solr.response.ResultContext;
|
||||||
|
@ -299,14 +299,15 @@ public class SolrDocumentFetcher {
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void stringField(FieldInfo fieldInfo, byte[] value) throws IOException {
|
public void stringField(FieldInfo fieldInfo, String value) throws IOException {
|
||||||
Predicate<String> readAsBytes = ResultContext.READASBYTES.get();
|
Predicate<String> readAsBytes = ResultContext.READASBYTES.get();
|
||||||
if (readAsBytes != null && readAsBytes.test(fieldInfo.name)) {
|
if (readAsBytes != null && readAsBytes.test(fieldInfo.name)) {
|
||||||
final FieldType ft = new FieldType(TextField.TYPE_STORED);
|
final FieldType ft = new FieldType(TextField.TYPE_STORED);
|
||||||
ft.setStoreTermVectors(fieldInfo.hasVectors());
|
ft.setStoreTermVectors(fieldInfo.hasVectors());
|
||||||
ft.setOmitNorms(fieldInfo.omitsNorms());
|
ft.setOmitNorms(fieldInfo.omitsNorms());
|
||||||
ft.setIndexOptions(fieldInfo.getIndexOptions());
|
ft.setIndexOptions(fieldInfo.getIndexOptions());
|
||||||
doc.add(new StoredField(fieldInfo.name, new ByteArrayUtf8CharSequence(value, 0, value.length), ft));
|
Objects.requireNonNull(value, "String value should not be null");
|
||||||
|
doc.add(new StoredField(fieldInfo.name, value, ft));
|
||||||
} else {
|
} else {
|
||||||
super.stringField(fieldInfo, value);
|
super.stringField(fieldInfo, value);
|
||||||
}
|
}
|
||||||
|
@ -371,9 +372,9 @@ public class SolrDocumentFetcher {
|
||||||
}
|
}
|
||||||
// must be String
|
// must be String
|
||||||
if (f instanceof LargeLazyField) { // optimization to avoid premature string conversion
|
if (f instanceof LargeLazyField) { // optimization to avoid premature string conversion
|
||||||
visitor.stringField(info, toByteArrayUnwrapIfPossible(((LargeLazyField) f).readBytes()));
|
visitor.stringField(info, toStringUnwrapIfPossible(((LargeLazyField) f).readBytes()));
|
||||||
} else {
|
} else {
|
||||||
visitor.stringField(info, f.stringValue().getBytes(StandardCharsets.UTF_8));
|
visitor.stringField(info, f.stringValue());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -386,6 +387,14 @@ public class SolrDocumentFetcher {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private String toStringUnwrapIfPossible(BytesRef bytesRef) {
|
||||||
|
if (bytesRef.offset == 0 && bytesRef.bytes.length == bytesRef.length) {
|
||||||
|
return new String(bytesRef.bytes, StandardCharsets.UTF_8);
|
||||||
|
} else {
|
||||||
|
return new String(bytesRef.bytes, bytesRef.offset, bytesRef.offset + bytesRef.length, StandardCharsets.UTF_8);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/** Unlike LazyDocument.LazyField, we (a) don't cache large values, and (b) provide access to the byte[]. */
|
/** Unlike LazyDocument.LazyField, we (a) don't cache large values, and (b) provide access to the byte[]. */
|
||||||
class LargeLazyField implements IndexableField {
|
class LargeLazyField implements IndexableField {
|
||||||
|
|
||||||
|
@ -450,9 +459,10 @@ public class SolrDocumentFetcher {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void stringField(FieldInfo fieldInfo, byte[] value) throws IOException {
|
public void stringField(FieldInfo fieldInfo, String value) throws IOException {
|
||||||
bytesRef.bytes = value;
|
Objects.requireNonNull(value, "String value should not be null");
|
||||||
bytesRef.length = value.length;
|
bytesRef.bytes = value.getBytes(StandardCharsets.UTF_8);
|
||||||
|
bytesRef.length = value.length();
|
||||||
done = true;
|
done = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue