LUCENE-8805: Parameter changes for stringField() in StoredFieldVisitor

Signed-off-by: Namgyu Kim <kng0828@gmail.com>
Signed-off-by: Adrien Grand <jpountz@gmail.com>
This commit is contained in:
Namgyu Kim 2019-05-21 22:18:14 +09:00 committed by Adrien Grand
parent 644af43c12
commit 5a694ea26f
14 changed files with 53 additions and 43 deletions

View File

@ -18,6 +18,10 @@ API Changes
* LUCENE-3041: The deprecated Weight#extractTerms() method has been
removed (Alan Woodward, Simon Willnauer, David Smiley, Luca Cavanna)
* LUCENE-8805: StoredFieldVisitor#stringField now takes a String rather than a
byte[] that stores the UTF-8 bytes of the stored string.
(Namgyu Kim via Adrien Grand)
Bug fixes
* LUCENE-8663: NRTCachingDirectory.slowFileExists may open a file while

View File

@ -17,9 +17,9 @@
package org.apache.lucene.benchmark.quality.utils;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.StoredFieldVisitor;
@ -51,9 +51,8 @@ public class DocNameExtractor {
final List<String> name = new ArrayList<>();
searcher.getIndexReader().document(docid, new StoredFieldVisitor() {
@Override
public void stringField(FieldInfo fieldInfo, byte[] bytes) {
String value = new String(bytes, StandardCharsets.UTF_8);
name.add(value);
public void stringField(FieldInfo fieldInfo, String value) {
name.add(Objects.requireNonNull(value, "String value should not be null"));
}
@Override

View File

@ -18,6 +18,7 @@ package org.apache.lucene.codecs.simpletext;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import org.apache.lucene.codecs.StoredFieldsReader;
@ -155,7 +156,7 @@ public class SimpleTextStoredFieldsReader extends StoredFieldsReader {
if (type == TYPE_STRING) {
byte[] bytes = new byte[scratch.length() - VALUE.length];
System.arraycopy(scratch.bytes(), VALUE.length, bytes, 0, bytes.length);
visitor.stringField(fieldInfo, bytes);
visitor.stringField(fieldInfo, new String(bytes, 0, bytes.length, StandardCharsets.UTF_8));
} else if (type == TYPE_BINARY) {
byte[] copy = new byte[scratch.length()-VALUE.length];
System.arraycopy(scratch.bytes(), VALUE.length, copy, 0, copy.length);

View File

@ -19,9 +19,9 @@ package org.apache.lucene.codecs;
import java.io.Closeable;
import java.io.IOException;
import java.io.Reader;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
@ -180,10 +180,9 @@ public abstract class StoredFieldsWriter implements Closeable {
}
@Override
public void stringField(FieldInfo fieldInfo, byte[] value) throws IOException {
public void stringField(FieldInfo fieldInfo, String value) throws IOException {
reset(fieldInfo);
// TODO: can we avoid new String here?
stringValue = new String(value, StandardCharsets.UTF_8);
stringValue = Objects.requireNonNull(value, "String value should not be null");
write();
}

View File

@ -211,10 +211,7 @@ public final class CompressingStoredFieldsReader extends StoredFieldsReader {
visitor.binaryField(info, data);
break;
case STRING:
length = in.readVInt();
data = new byte[length];
in.readBytes(data, 0, length);
visitor.stringField(info, data);
visitor.stringField(info, in.readString());
break;
case NUMERIC_INT:
visitor.intField(info, in.readZInt());

View File

@ -17,8 +17,8 @@
package org.apache.lucene.document;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.HashSet;
import java.util.Objects;
import java.util.Set;
import org.apache.lucene.index.FieldInfo;
@ -67,12 +67,12 @@ public class DocumentStoredFieldVisitor extends StoredFieldVisitor {
}
@Override
public void stringField(FieldInfo fieldInfo, byte[] value) throws IOException {
public void stringField(FieldInfo fieldInfo, String value) throws IOException {
final FieldType ft = new FieldType(TextField.TYPE_STORED);
ft.setStoreTermVectors(fieldInfo.hasVectors());
ft.setOmitNorms(fieldInfo.omitsNorms());
ft.setIndexOptions(fieldInfo.getIndexOptions());
doc.add(new StoredField(fieldInfo.name, new String(value, StandardCharsets.UTF_8), ft));
doc.add(new StoredField(fieldInfo.name, Objects.requireNonNull(value, "String value should not be null"), ft));
}
@Override

View File

@ -19,8 +19,8 @@ package org.apache.lucene.index;
import java.io.IOException;
import java.io.Reader;
import java.nio.charset.StandardCharsets;
import java.util.Map;
import java.util.Objects;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
@ -114,10 +114,9 @@ final class SortingStoredFieldsConsumer extends StoredFieldsConsumer {
}
@Override
public void stringField(FieldInfo fieldInfo, byte[] value) throws IOException {
public void stringField(FieldInfo fieldInfo, String value) throws IOException {
reset(fieldInfo);
// TODO: can we avoid new String here?
stringValue = new String(value, StandardCharsets.UTF_8);
stringValue = Objects.requireNonNull(value, "String value should not be null");
write();
}

View File

@ -53,8 +53,8 @@ public abstract class StoredFieldVisitor {
public void binaryField(FieldInfo fieldInfo, byte[] value) throws IOException {
}
/** Process a string field; the provided byte[] value is a UTF-8 encoded string value. */
public void stringField(FieldInfo fieldInfo, byte[] value) throws IOException {
/** Process a string field. */
public void stringField(FieldInfo fieldInfo, String value) throws IOException {
}
/** Process a int numeric field. */

View File

@ -17,7 +17,6 @@
package org.apache.lucene.search.uhighlight;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.text.BreakIterator;
import java.util.ArrayList;
import java.util.Arrays;
@ -995,9 +994,9 @@ public class UnifiedHighlighter {
}
@Override
public void stringField(FieldInfo fieldInfo, byte[] byteValue) throws IOException {
String value = new String(byteValue, StandardCharsets.UTF_8);
public void stringField(FieldInfo fieldInfo, String value) throws IOException {
assert currentField >= 0;
Objects.requireNonNull(value, "String value should not be null");
CharSequence curValue = values[currentField];
if (curValue == null) {
//question: if truncate due to maxLength, should we try and avoid keeping the other chars in-memory on

View File

@ -17,7 +17,6 @@
package org.apache.lucene.search.vectorhighlight;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
@ -25,6 +24,7 @@ import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
@ -152,8 +152,8 @@ public abstract class BaseFragmentsBuilder implements FragmentsBuilder {
reader.document(docId, new StoredFieldVisitor() {
@Override
public void stringField(FieldInfo fieldInfo, byte[] bytes) {
String value = new String(bytes, StandardCharsets.UTF_8);
public void stringField(FieldInfo fieldInfo, String value) {
Objects.requireNonNull(value, "String value should not be null");
FieldType ft = new FieldType(TextField.TYPE_STORED);
ft.setStoreTermVectors(fieldInfo.hasVectors());
fields.add(new Field(fieldInfo.name, value, ft));

View File

@ -17,11 +17,11 @@
package org.apache.lucene.document;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import org.apache.lucene.analysis.*;
@ -209,12 +209,12 @@ public class TestLazyDocument extends LuceneTestCase {
}
@Override
public void stringField(FieldInfo fieldInfo, byte[] bytes) throws IOException {
String value = new String(bytes, StandardCharsets.UTF_8);
public void stringField(FieldInfo fieldInfo, String value) throws IOException {
final FieldType ft = new FieldType(TextField.TYPE_STORED);
ft.setStoreTermVectors(fieldInfo.hasVectors());
ft.setOmitNorms(fieldInfo.omitsNorms());
ft.setIndexOptions(fieldInfo.getIndexOptions());
Objects.requireNonNull(value, "String value should not be null");
doc.add(new Field(fieldInfo.name, value, ft));
}

View File

@ -19,6 +19,7 @@ package org.apache.lucene.index;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.Objects;
import java.util.Set;
import org.apache.lucene.util.FilterIterator;
@ -76,8 +77,8 @@ public final class FieldFilterLeafReader extends FilterLeafReader {
}
@Override
public void stringField(FieldInfo fieldInfo, byte[] value) throws IOException {
visitor.stringField(fieldInfo, value);
public void stringField(FieldInfo fieldInfo, String value) throws IOException {
visitor.stringField(fieldInfo, Objects.requireNonNull(value, "String value should not be null"));
}
@Override

View File

@ -20,6 +20,7 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Objects;
import java.util.Random;
/**
@ -105,8 +106,8 @@ public class MismatchedLeafReader extends FilterLeafReader {
}
@Override
public void stringField(FieldInfo fieldInfo, byte[] value) throws IOException {
in.stringField(renumber(fieldInfo), value);
public void stringField(FieldInfo fieldInfo, String value) throws IOException {
in.stringField(renumber(fieldInfo), Objects.requireNonNull(value, "String value should not be null"));
}
@Override

View File

@ -29,6 +29,7 @@ import java.util.Date;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Objects;
import java.util.Set;
import java.util.function.Predicate;
import java.util.function.Supplier;
@ -62,7 +63,6 @@ import org.apache.lucene.util.NumericUtils;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentBase;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.util.ByteArrayUtf8CharSequence;
import org.apache.solr.core.SolrConfig;
import org.apache.solr.response.DocsStreamer;
import org.apache.solr.response.ResultContext;
@ -299,14 +299,15 @@ public class SolrDocumentFetcher {
@Override
public void stringField(FieldInfo fieldInfo, byte[] value) throws IOException {
public void stringField(FieldInfo fieldInfo, String value) throws IOException {
Predicate<String> readAsBytes = ResultContext.READASBYTES.get();
if (readAsBytes != null && readAsBytes.test(fieldInfo.name)) {
final FieldType ft = new FieldType(TextField.TYPE_STORED);
ft.setStoreTermVectors(fieldInfo.hasVectors());
ft.setOmitNorms(fieldInfo.omitsNorms());
ft.setIndexOptions(fieldInfo.getIndexOptions());
doc.add(new StoredField(fieldInfo.name, new ByteArrayUtf8CharSequence(value, 0, value.length), ft));
Objects.requireNonNull(value, "String value should not be null");
doc.add(new StoredField(fieldInfo.name, value, ft));
} else {
super.stringField(fieldInfo, value);
}
@ -371,9 +372,9 @@ public class SolrDocumentFetcher {
}
// must be String
if (f instanceof LargeLazyField) { // optimization to avoid premature string conversion
visitor.stringField(info, toByteArrayUnwrapIfPossible(((LargeLazyField) f).readBytes()));
visitor.stringField(info, toStringUnwrapIfPossible(((LargeLazyField) f).readBytes()));
} else {
visitor.stringField(info, f.stringValue().getBytes(StandardCharsets.UTF_8));
visitor.stringField(info, f.stringValue());
}
}
}
@ -386,6 +387,14 @@ public class SolrDocumentFetcher {
}
}
private String toStringUnwrapIfPossible(BytesRef bytesRef) {
if (bytesRef.offset == 0 && bytesRef.bytes.length == bytesRef.length) {
return new String(bytesRef.bytes, StandardCharsets.UTF_8);
} else {
return new String(bytesRef.bytes, bytesRef.offset, bytesRef.offset + bytesRef.length, StandardCharsets.UTF_8);
}
}
/** Unlike LazyDocument.LazyField, we (a) don't cache large values, and (b) provide access to the byte[]. */
class LargeLazyField implements IndexableField {
@ -450,9 +459,10 @@ public class SolrDocumentFetcher {
}
@Override
public void stringField(FieldInfo fieldInfo, byte[] value) throws IOException {
bytesRef.bytes = value;
bytesRef.length = value.length;
public void stringField(FieldInfo fieldInfo, String value) throws IOException {
Objects.requireNonNull(value, "String value should not be null");
bytesRef.bytes = value.getBytes(StandardCharsets.UTF_8);
bytesRef.length = value.length();
done = true;
}