mirror of https://github.com/apache/lucene.git
LUCENE-5989: allow passing BytesRef to StringField to make it easier to index arbitrary binary tokens
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1672781 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
09ddb7d912
commit
249d0d25fe
|
@ -58,6 +58,9 @@ New Features
|
|||
accuracy of SDV. Includes optimized Intersect predicate to avoid many
|
||||
geometry checks. Uses TwoPhaseIterator. (David Smiley)
|
||||
|
||||
* LUCENE-5989: Allow passing BytesRef to StringField to make it easier
|
||||
to index arbitrary binary tokens (Mike McCandless)
|
||||
|
||||
Optimizations
|
||||
|
||||
* LUCENE-6379: IndexWriter.deleteDocuments(Query...) now detects if
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
package org.apache.lucene.benchmark.quality.utils;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
|
@ -50,7 +51,8 @@ public class DocNameExtractor {
|
|||
final List<String> name = new ArrayList<>();
|
||||
searcher.getIndexReader().document(docid, new StoredFieldVisitor() {
|
||||
@Override
|
||||
public void stringField(FieldInfo fieldInfo, String value) {
|
||||
public void stringField(FieldInfo fieldInfo, byte[] bytes) {
|
||||
String value = new String(bytes, StandardCharsets.UTF_8);
|
||||
name.add(value);
|
||||
}
|
||||
|
||||
|
|
|
@ -18,7 +18,6 @@ package org.apache.lucene.codecs.simpletext;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
|
||||
import org.apache.lucene.codecs.StoredFieldsReader;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
|
@ -153,7 +152,9 @@ public class SimpleTextStoredFieldsReader extends StoredFieldsReader {
|
|||
readLine();
|
||||
assert StringHelper.startsWith(scratch.get(), VALUE);
|
||||
if (type == TYPE_STRING) {
|
||||
visitor.stringField(fieldInfo, new String(scratch.bytes(), VALUE.length, scratch.length()-VALUE.length, StandardCharsets.UTF_8));
|
||||
byte[] bytes = new byte[scratch.length() - VALUE.length];
|
||||
System.arraycopy(scratch.bytes(), VALUE.length, bytes, 0, bytes.length);
|
||||
visitor.stringField(fieldInfo, bytes);
|
||||
} else if (type == TYPE_BINARY) {
|
||||
byte[] copy = new byte[scratch.length()-VALUE.length];
|
||||
System.arraycopy(scratch.bytes(), VALUE.length, copy, 0, copy.length);
|
||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.lucene.codecs;
|
|||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
|
||||
import org.apache.lucene.document.StoredField;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
|
@ -140,14 +141,16 @@ public abstract class StoredFieldsWriter implements Closeable {
|
|||
@Override
|
||||
public void binaryField(FieldInfo fieldInfo, byte[] value) throws IOException {
|
||||
reset(fieldInfo);
|
||||
// TODO: can we avoid new BR here?
|
||||
binaryValue = new BytesRef(value);
|
||||
write();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void stringField(FieldInfo fieldInfo, String value) throws IOException {
|
||||
public void stringField(FieldInfo fieldInfo, byte[] value) throws IOException {
|
||||
reset(fieldInfo);
|
||||
stringValue = value;
|
||||
// TODO: can we avoid new String here?
|
||||
stringValue = new String(value, StandardCharsets.UTF_8);
|
||||
write();
|
||||
}
|
||||
|
||||
|
|
|
@ -41,7 +41,6 @@ import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter
|
|||
|
||||
import java.io.EOFException;
|
||||
import java.io.IOException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
|
@ -220,7 +219,7 @@ public final class CompressingStoredFieldsReader extends StoredFieldsReader {
|
|||
length = in.readVInt();
|
||||
data = new byte[length];
|
||||
in.readBytes(data, 0, length);
|
||||
visitor.stringField(info, new String(data, StandardCharsets.UTF_8));
|
||||
visitor.stringField(info, data);
|
||||
break;
|
||||
case NUMERIC_INT:
|
||||
visitor.intField(info, in.readZInt());
|
||||
|
|
|
@ -18,8 +18,9 @@ package org.apache.lucene.document;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Set;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
|
@ -66,12 +67,12 @@ public class DocumentStoredFieldVisitor extends StoredFieldVisitor {
|
|||
}
|
||||
|
||||
@Override
|
||||
public void stringField(FieldInfo fieldInfo, String value) throws IOException {
|
||||
public void stringField(FieldInfo fieldInfo, byte[] value) throws IOException {
|
||||
final FieldType ft = new FieldType(TextField.TYPE_STORED);
|
||||
ft.setStoreTermVectors(fieldInfo.hasVectors());
|
||||
ft.setOmitNorms(fieldInfo.omitsNorms());
|
||||
ft.setIndexOptions(fieldInfo.getIndexOptions());
|
||||
doc.add(new StoredField(fieldInfo.name, value, ft));
|
||||
doc.add(new StoredField(fieldInfo.name, new String(value, StandardCharsets.UTF_8), ft));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -25,6 +25,7 @@ import org.apache.lucene.analysis.NumericTokenStream;
|
|||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
|
||||
import org.apache.lucene.document.FieldType.NumericType;
|
||||
import org.apache.lucene.index.FieldInvertState; // javadocs
|
||||
import org.apache.lucene.index.IndexOptions;
|
||||
|
@ -32,6 +33,7 @@ import org.apache.lucene.index.IndexWriter; // javadocs
|
|||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.index.IndexableFieldType;
|
||||
import org.apache.lucene.index.StorableField;
|
||||
import org.apache.lucene.util.AttributeImpl;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/**
|
||||
|
@ -215,9 +217,6 @@ public class Field implements IndexableField, StorableField {
|
|||
if (bytes == null) {
|
||||
throw new IllegalArgumentException("bytes cannot be null");
|
||||
}
|
||||
if (type.indexOptions() != IndexOptions.NONE) {
|
||||
throw new IllegalArgumentException("Fields with BytesRef values cannot be indexed");
|
||||
}
|
||||
this.fieldsData = bytes;
|
||||
this.type = type;
|
||||
this.name = name;
|
||||
|
@ -538,16 +537,25 @@ public class Field implements IndexableField, StorableField {
|
|||
}
|
||||
|
||||
if (!fieldType().tokenized()) {
|
||||
if (stringValue() == null) {
|
||||
if (stringValue() != null) {
|
||||
if (!(reuse instanceof StringTokenStream)) {
|
||||
// lazy init the TokenStream as it is heavy to instantiate
|
||||
// (attributes,...) if not needed
|
||||
reuse = new StringTokenStream();
|
||||
}
|
||||
((StringTokenStream) reuse).setValue(stringValue());
|
||||
return reuse;
|
||||
} else if (binaryValue() != null) {
|
||||
if (!(reuse instanceof BinaryTokenStream)) {
|
||||
// lazy init the TokenStream as it is heavy to instantiate
|
||||
// (attributes,...) if not needed
|
||||
reuse = new BinaryTokenStream();
|
||||
}
|
||||
((BinaryTokenStream) reuse).setValue(binaryValue());
|
||||
return reuse;
|
||||
} else {
|
||||
throw new IllegalArgumentException("Non-Tokenized Fields must have a String value");
|
||||
}
|
||||
if (!(reuse instanceof StringTokenStream)) {
|
||||
// lazy init the TokenStream as it is heavy to instantiate
|
||||
// (attributes,...) if not needed (stored field loading)
|
||||
reuse = new StringTokenStream();
|
||||
}
|
||||
((StringTokenStream) reuse).setValue(stringValue());
|
||||
return reuse;
|
||||
}
|
||||
|
||||
if (tokenStream != null) {
|
||||
|
@ -561,7 +569,69 @@ public class Field implements IndexableField, StorableField {
|
|||
throw new IllegalArgumentException("Field must have either TokenStream, String, Reader or Number value; got " + this);
|
||||
}
|
||||
|
||||
static final class StringTokenStream extends TokenStream {
|
||||
private static final class BinaryTokenStream extends TokenStream {
|
||||
private final ByteTermAttribute bytesAtt = addAttribute(ByteTermAttribute.class);
|
||||
|
||||
// Do not init this to true, becase caller must first call reset:
|
||||
private boolean available;
|
||||
|
||||
public BinaryTokenStream() {
|
||||
}
|
||||
|
||||
public void setValue(BytesRef value) {
|
||||
bytesAtt.setBytesRef(value);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean incrementToken() {
|
||||
if (available) {
|
||||
clearAttributes();
|
||||
available = false;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void reset() {
|
||||
available = true;
|
||||
}
|
||||
|
||||
public interface ByteTermAttribute extends TermToBytesRefAttribute {
|
||||
public void setBytesRef(BytesRef bytes);
|
||||
}
|
||||
|
||||
public static class ByteTermAttributeImpl extends AttributeImpl implements ByteTermAttribute, TermToBytesRefAttribute {
|
||||
private BytesRef bytes;
|
||||
|
||||
@Override
|
||||
public void fillBytesRef() {
|
||||
// no-op: the bytes was already filled by our owner's incrementToken
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef getBytesRef() {
|
||||
return bytes;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setBytesRef(BytesRef bytes) {
|
||||
this.bytes = bytes;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void clear() {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void copyTo(AttributeImpl target) {
|
||||
ByteTermAttributeImpl other = (ByteTermAttributeImpl) target;
|
||||
other.bytes = bytes;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static final class StringTokenStream extends TokenStream {
|
||||
private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class);
|
||||
private final OffsetAttribute offsetAttribute = addAttribute(OffsetAttribute.class);
|
||||
private boolean used = false;
|
||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.lucene.document;
|
|||
*/
|
||||
|
||||
import org.apache.lucene.index.IndexOptions;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/** A field that is indexed but not tokenized: the entire
|
||||
* String value is indexed as a single token. For example
|
||||
|
@ -48,7 +49,9 @@ public final class StringField extends Field {
|
|||
TYPE_STORED.freeze();
|
||||
}
|
||||
|
||||
/** Creates a new StringField.
|
||||
/** Creates a new textual StringField, indexing the provided String value
|
||||
* as a single token.
|
||||
*
|
||||
* @param name field name
|
||||
* @param value String value
|
||||
* @param stored Store.YES if the content should also be stored
|
||||
|
@ -57,4 +60,18 @@ public final class StringField extends Field {
|
|||
public StringField(String name, String value, Store stored) {
|
||||
super(name, value, stored == Store.YES ? TYPE_STORED : TYPE_NOT_STORED);
|
||||
}
|
||||
|
||||
/** Creates a new binary StringField, indexing the provided binary (BytesRef)
|
||||
* value as a single token.
|
||||
*
|
||||
* @param name field name
|
||||
* @param value BytesRef value. The provided value is not cloned so
|
||||
* you must not change it until the document(s) holding it
|
||||
* have been indexed.
|
||||
* @param stored Store.YES if the content should also be stored
|
||||
* @throws IllegalArgumentException if the field name or value is null.
|
||||
*/
|
||||
public StringField(String name, BytesRef value, Store stored) {
|
||||
super(name, value, stored == Store.YES ? TYPE_STORED : TYPE_NOT_STORED);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -53,8 +53,8 @@ public abstract class StoredFieldVisitor {
|
|||
public void binaryField(FieldInfo fieldInfo, byte[] value) throws IOException {
|
||||
}
|
||||
|
||||
/** Process a string field */
|
||||
public void stringField(FieldInfo fieldInfo, String value) throws IOException {
|
||||
/** Process a string field; the provided byte[] value is a UTF-8 encoded string value. */
|
||||
public void stringField(FieldInfo fieldInfo, byte[] value) throws IOException {
|
||||
}
|
||||
|
||||
/** Process a int numeric field. */
|
||||
|
|
|
@ -22,6 +22,14 @@ import java.nio.charset.StandardCharsets;
|
|||
|
||||
import org.apache.lucene.analysis.CannedTokenStream;
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.StoredDocument;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
|
@ -409,6 +417,28 @@ public class TestField extends LuceneTestCase {
|
|||
assertEquals(5L, field.numericValue().longValue());
|
||||
}
|
||||
|
||||
public void testIndexedBinaryField() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
|
||||
Document doc = new Document();
|
||||
BytesRef br = new BytesRef(new byte[5]);
|
||||
Field field = new StringField("binary", br, Field.Store.YES);
|
||||
assertEquals(br, field.binaryValue());
|
||||
doc.add(field);
|
||||
w.addDocument(doc);
|
||||
IndexReader r = w.getReader();
|
||||
|
||||
IndexSearcher s = newSearcher(r);
|
||||
TopDocs hits = s.search(new TermQuery(new Term("binary", br)), 1);
|
||||
assertEquals(1, hits.totalHits);
|
||||
StoredDocument storedDoc = s.doc(hits.scoreDocs[0].doc);
|
||||
assertEquals(br, storedDoc.getField("binary").binaryValue());
|
||||
|
||||
r.close();
|
||||
w.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
private void trySetByteValue(Field f) {
|
||||
try {
|
||||
f.setByteValue((byte) 10);
|
||||
|
|
|
@ -213,8 +213,9 @@ public class TestRollingUpdates extends LuceneTestCase {
|
|||
DirectoryReader open = null;
|
||||
for (int i = 0; i < num; i++) {
|
||||
Document doc = new Document();// docs.nextDoc();
|
||||
doc.add(newStringField("id", "test", Field.Store.NO));
|
||||
writer.updateDocument(new Term("id", "test"), doc);
|
||||
BytesRef br = new BytesRef("test");
|
||||
doc.add(newStringField("id", br, Field.Store.NO));
|
||||
writer.updateDocument(new Term("id", br), doc);
|
||||
if (random().nextInt(3) == 0) {
|
||||
if (open == null) {
|
||||
open = DirectoryReader.open(writer, true);
|
||||
|
|
|
@ -31,13 +31,13 @@ import org.apache.lucene.analysis.MockAnalyzer;
|
|||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.IntField;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.StoredDocument;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
|
||||
|
@ -108,7 +108,7 @@ public class TestLiveFieldValues extends LuceneTestCase {
|
|||
if (threadRandom.nextDouble() <= addChance) {
|
||||
String id = String.format(Locale.ROOT, "%d_%04x", threadID, threadRandom.nextInt(idCount));
|
||||
Integer field = threadRandom.nextInt(Integer.MAX_VALUE);
|
||||
doc.add(new StringField("id", id, Field.Store.YES));
|
||||
doc.add(newStringField("id", new BytesRef(id), Field.Store.YES));
|
||||
doc.add(new IntField("field", field.intValue(), Field.Store.YES));
|
||||
w.updateDocument(new Term("id", id), doc);
|
||||
rt.add(id, field);
|
||||
|
@ -119,7 +119,7 @@ public class TestLiveFieldValues extends LuceneTestCase {
|
|||
|
||||
if (allIDs.size() > 0 && threadRandom.nextDouble() <= deleteChance) {
|
||||
String randomID = allIDs.get(threadRandom.nextInt(allIDs.size()));
|
||||
w.deleteDocuments(new Term("id", randomID));
|
||||
w.deleteDocuments(new Term("id", new BytesRef(randomID)));
|
||||
rt.delete(randomID);
|
||||
values.put(randomID, missing);
|
||||
}
|
||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.lucene.search.postingshighlight;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.text.BreakIterator;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
|
@ -792,7 +793,8 @@ public class PostingsHighlighter {
|
|||
}
|
||||
|
||||
@Override
|
||||
public void stringField(FieldInfo fieldInfo, String value) throws IOException {
|
||||
public void stringField(FieldInfo fieldInfo, byte[] bytes) throws IOException {
|
||||
String value = new String(bytes, StandardCharsets.UTF_8);
|
||||
assert currentField >= 0;
|
||||
StringBuilder builder = builders[currentField];
|
||||
if (builder.length() > 0 && builder.length() < maxLength) {
|
||||
|
|
|
@ -17,6 +17,16 @@ package org.apache.lucene.search.vectorhighlight;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.TextField;
|
||||
|
@ -25,19 +35,10 @@ import org.apache.lucene.index.IndexReader;
|
|||
import org.apache.lucene.index.StoredFieldVisitor;
|
||||
import org.apache.lucene.search.highlight.DefaultEncoder;
|
||||
import org.apache.lucene.search.highlight.Encoder;
|
||||
import org.apache.lucene.search.vectorhighlight.FieldFragList.WeightedFragInfo;
|
||||
import org.apache.lucene.search.vectorhighlight.FieldFragList.WeightedFragInfo.SubInfo;
|
||||
import org.apache.lucene.search.vectorhighlight.FieldFragList.WeightedFragInfo;
|
||||
import org.apache.lucene.search.vectorhighlight.FieldPhraseList.WeightedPhraseInfo.Toffs;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Base FragmentsBuilder implementation that supports colored pre/post
|
||||
* tags and multivalued fields.
|
||||
|
@ -152,7 +153,8 @@ public abstract class BaseFragmentsBuilder implements FragmentsBuilder {
|
|||
reader.document(docId, new StoredFieldVisitor() {
|
||||
|
||||
@Override
|
||||
public void stringField(FieldInfo fieldInfo, String value) {
|
||||
public void stringField(FieldInfo fieldInfo, byte[] bytes) {
|
||||
String value = new String(bytes, StandardCharsets.UTF_8);
|
||||
FieldType ft = new FieldType(TextField.TYPE_STORED);
|
||||
ft.setStoreTermVectors(fieldInfo.hasVectors());
|
||||
fields.add(new Field(fieldInfo.name, value, ft));
|
||||
|
|
|
@ -16,19 +16,19 @@
|
|||
*/
|
||||
package org.apache.lucene.document;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.Arrays;
|
||||
import java.util.Set;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Map;
|
||||
import java.util.HashMap;
|
||||
import java.io.IOException;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.store.*;
|
||||
import org.apache.lucene.analysis.*;
|
||||
import org.apache.lucene.index.*;
|
||||
import org.apache.lucene.search.*;
|
||||
|
||||
import org.apache.lucene.store.*;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.junit.After;
|
||||
import org.junit.Before;
|
||||
|
||||
|
@ -209,7 +209,8 @@ public class TestLazyDocument extends LuceneTestCase {
|
|||
}
|
||||
|
||||
@Override
|
||||
public void stringField(FieldInfo fieldInfo, String value) throws IOException {
|
||||
public void stringField(FieldInfo fieldInfo, byte[] bytes) throws IOException {
|
||||
String value = new String(bytes, StandardCharsets.UTF_8);
|
||||
final FieldType ft = new FieldType(TextField.TYPE_STORED);
|
||||
ft.setStoreTermVectors(fieldInfo.hasVectors());
|
||||
ft.setOmitNorms(fieldInfo.omitsNorms());
|
||||
|
|
|
@ -78,7 +78,7 @@ public final class FieldFilterLeafReader extends FilterLeafReader {
|
|||
}
|
||||
|
||||
@Override
|
||||
public void stringField(FieldInfo fieldInfo, String value) throws IOException {
|
||||
public void stringField(FieldInfo fieldInfo, byte[] value) throws IOException {
|
||||
visitor.stringField(fieldInfo, value);
|
||||
}
|
||||
|
||||
|
|
|
@ -92,7 +92,7 @@ public class MismatchedLeafReader extends FilterLeafReader {
|
|||
}
|
||||
|
||||
@Override
|
||||
public void stringField(FieldInfo fieldInfo, String value) throws IOException {
|
||||
public void stringField(FieldInfo fieldInfo, byte[] value) throws IOException {
|
||||
in.stringField(renumber(fieldInfo), value);
|
||||
}
|
||||
|
||||
|
|
|
@ -56,49 +56,24 @@ import java.util.concurrent.TimeUnit;
|
|||
import java.util.concurrent.atomic.AtomicReference;
|
||||
import java.util.logging.Logger;
|
||||
|
||||
import com.carrotsearch.randomizedtesting.JUnit4MethodProvider;
|
||||
import com.carrotsearch.randomizedtesting.LifecycleScope;
|
||||
import com.carrotsearch.randomizedtesting.MixWithSuiteName;
|
||||
import com.carrotsearch.randomizedtesting.RandomizedContext;
|
||||
import com.carrotsearch.randomizedtesting.RandomizedRunner;
|
||||
import com.carrotsearch.randomizedtesting.RandomizedTest;
|
||||
import com.carrotsearch.randomizedtesting.annotations.Listeners;
|
||||
import com.carrotsearch.randomizedtesting.annotations.SeedDecorators;
|
||||
import com.carrotsearch.randomizedtesting.annotations.TestGroup;
|
||||
import com.carrotsearch.randomizedtesting.annotations.TestMethodProviders;
|
||||
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakAction;
|
||||
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakAction.Action;
|
||||
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters;
|
||||
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakGroup;
|
||||
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakGroup.Group;
|
||||
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakLingering;
|
||||
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope;
|
||||
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope.Scope;
|
||||
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakZombies;
|
||||
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakZombies.Consequence;
|
||||
import com.carrotsearch.randomizedtesting.annotations.TimeoutSuite;
|
||||
import com.carrotsearch.randomizedtesting.generators.RandomPicks;
|
||||
import com.carrotsearch.randomizedtesting.rules.NoClassHooksShadowingRule;
|
||||
import com.carrotsearch.randomizedtesting.rules.NoInstanceHooksOverridesRule;
|
||||
import com.carrotsearch.randomizedtesting.rules.StaticFieldsInvariantRule;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.Field.Store;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.*;
|
||||
import org.apache.lucene.index.IndexReader.ReaderClosedListener;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.TermsEnum.SeekStatus;
|
||||
import org.apache.lucene.search.AssertingIndexSearcher;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.LRUQueryCache;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.QueryCache;
|
||||
import org.apache.lucene.search.QueryCachingPolicy;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.QueryUtils.FCInvisibleMultiReader;
|
||||
import org.apache.lucene.store.BaseDirectoryWrapper;
|
||||
import org.apache.lucene.store.Directory;
|
||||
|
@ -108,8 +83,8 @@ import org.apache.lucene.store.FlushInfo;
|
|||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.LockFactory;
|
||||
import org.apache.lucene.store.MergeInfo;
|
||||
import org.apache.lucene.store.MockDirectoryWrapper;
|
||||
import org.apache.lucene.store.MockDirectoryWrapper.Throttling;
|
||||
import org.apache.lucene.store.MockDirectoryWrapper;
|
||||
import org.apache.lucene.store.NRTCachingDirectory;
|
||||
import org.apache.lucene.store.RawDirectoryWrapper;
|
||||
import org.apache.lucene.util.automaton.AutomatonTestUtil;
|
||||
|
@ -126,6 +101,31 @@ import org.junit.Test;
|
|||
import org.junit.rules.RuleChain;
|
||||
import org.junit.rules.TestRule;
|
||||
import org.junit.runner.RunWith;
|
||||
import com.carrotsearch.randomizedtesting.JUnit4MethodProvider;
|
||||
import com.carrotsearch.randomizedtesting.LifecycleScope;
|
||||
import com.carrotsearch.randomizedtesting.MixWithSuiteName;
|
||||
import com.carrotsearch.randomizedtesting.RandomizedContext;
|
||||
import com.carrotsearch.randomizedtesting.RandomizedRunner;
|
||||
import com.carrotsearch.randomizedtesting.RandomizedTest;
|
||||
import com.carrotsearch.randomizedtesting.annotations.Listeners;
|
||||
import com.carrotsearch.randomizedtesting.annotations.SeedDecorators;
|
||||
import com.carrotsearch.randomizedtesting.annotations.TestGroup;
|
||||
import com.carrotsearch.randomizedtesting.annotations.TestMethodProviders;
|
||||
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakAction.Action;
|
||||
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakAction;
|
||||
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters;
|
||||
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakGroup.Group;
|
||||
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakGroup;
|
||||
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakLingering;
|
||||
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope.Scope;
|
||||
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope;
|
||||
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakZombies.Consequence;
|
||||
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakZombies;
|
||||
import com.carrotsearch.randomizedtesting.annotations.TimeoutSuite;
|
||||
import com.carrotsearch.randomizedtesting.generators.RandomPicks;
|
||||
import com.carrotsearch.randomizedtesting.rules.NoClassHooksShadowingRule;
|
||||
import com.carrotsearch.randomizedtesting.rules.NoInstanceHooksOverridesRule;
|
||||
import com.carrotsearch.randomizedtesting.rules.StaticFieldsInvariantRule;
|
||||
|
||||
import static com.carrotsearch.randomizedtesting.RandomizedTest.systemPropertyAsBoolean;
|
||||
import static com.carrotsearch.randomizedtesting.RandomizedTest.systemPropertyAsInt;
|
||||
|
@ -1336,6 +1336,10 @@ public abstract class LuceneTestCase extends Assert {
|
|||
return newField(random(), name, value, stored == Store.YES ? StringField.TYPE_STORED : StringField.TYPE_NOT_STORED);
|
||||
}
|
||||
|
||||
public static Field newStringField(String name, BytesRef value, Store stored) {
|
||||
return newField(random(), name, value, stored == Store.YES ? StringField.TYPE_STORED : StringField.TYPE_NOT_STORED);
|
||||
}
|
||||
|
||||
public static Field newTextField(String name, String value, Store stored) {
|
||||
return newField(random(), name, value, stored == Store.YES ? TextField.TYPE_STORED : TextField.TYPE_NOT_STORED);
|
||||
}
|
||||
|
@ -1344,6 +1348,10 @@ public abstract class LuceneTestCase extends Assert {
|
|||
return newField(random, name, value, stored == Store.YES ? StringField.TYPE_STORED : StringField.TYPE_NOT_STORED);
|
||||
}
|
||||
|
||||
public static Field newStringField(Random random, String name, BytesRef value, Store stored) {
|
||||
return newField(random, name, value, stored == Store.YES ? StringField.TYPE_STORED : StringField.TYPE_NOT_STORED);
|
||||
}
|
||||
|
||||
public static Field newTextField(Random random, String name, String value, Store stored) {
|
||||
return newField(random, name, value, stored == Store.YES ? TextField.TYPE_STORED : TextField.TYPE_NOT_STORED);
|
||||
}
|
||||
|
@ -1372,7 +1380,7 @@ public abstract class LuceneTestCase extends Assert {
|
|||
// write-once schema sort of helper class then we can
|
||||
// remove the sync here. We can also fold the random
|
||||
// "enable norms" (now commented out, below) into that:
|
||||
public synchronized static Field newField(Random random, String name, String value, FieldType type) {
|
||||
public synchronized static Field newField(Random random, String name, Object value, FieldType type) {
|
||||
|
||||
// Defeat any consumers that illegally rely on intern'd
|
||||
// strings (we removed this from Lucene a while back):
|
||||
|
@ -1388,7 +1396,7 @@ public abstract class LuceneTestCase extends Assert {
|
|||
type = mergeTermVectorOptions(type, prevType);
|
||||
}
|
||||
|
||||
return new Field(name, value, type);
|
||||
return createField(name, value, type);
|
||||
}
|
||||
|
||||
// TODO: once all core & test codecs can index
|
||||
|
@ -1434,7 +1442,17 @@ public abstract class LuceneTestCase extends Assert {
|
|||
}
|
||||
*/
|
||||
|
||||
return new Field(name, value, newType);
|
||||
return createField(name, value, newType);
|
||||
}
|
||||
|
||||
private static Field createField(String name, Object value, FieldType fieldType) {
|
||||
if (value instanceof String) {
|
||||
return new Field(name, (String) value, fieldType);
|
||||
} else if (value instanceof BytesRef) {
|
||||
return new Field(name, (BytesRef) value, fieldType);
|
||||
} else {
|
||||
throw new IllegalArgumentException("value must be String or BytesRef");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
package org.apache.solr.handler.component;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
|
@ -275,8 +276,8 @@ public class TermVectorComponent extends SearchComponent implements SolrCoreAwar
|
|||
// once we find it...
|
||||
final StoredFieldVisitor getUniqValue = new StoredFieldVisitor() {
|
||||
@Override
|
||||
public void stringField(FieldInfo fieldInfo, String value) {
|
||||
uniqValues.add(value);
|
||||
public void stringField(FieldInfo fieldInfo, byte[] bytes) {
|
||||
uniqValues.add(new String(bytes, StandardCharsets.UTF_8));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -20,6 +20,7 @@ package org.apache.solr.search;
|
|||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
import java.net.URL;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
|
@ -80,10 +81,10 @@ import org.apache.solr.core.SolrInfoMBean;
|
|||
import org.apache.solr.request.LocalSolrQueryRequest;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.request.SolrRequestInfo;
|
||||
import org.apache.solr.search.facet.UnInvertedField;
|
||||
import org.apache.solr.response.SolrQueryResponse;
|
||||
import org.apache.solr.schema.IndexSchema;
|
||||
import org.apache.solr.schema.SchemaField;
|
||||
import org.apache.solr.search.facet.UnInvertedField;
|
||||
import org.apache.solr.search.stats.StatsSource;
|
||||
import org.apache.solr.update.SolrIndexConfig;
|
||||
import org.slf4j.Logger;
|
||||
|
@ -618,7 +619,8 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable,SolrIn
|
|||
}
|
||||
|
||||
@Override
|
||||
public void stringField(FieldInfo fieldInfo, String value) throws IOException {
|
||||
public void stringField(FieldInfo fieldInfo, byte[] bytes) throws IOException {
|
||||
String value = new String(bytes, StandardCharsets.UTF_8);
|
||||
final FieldType ft = new FieldType(TextField.TYPE_STORED);
|
||||
ft.setStoreTermVectors(fieldInfo.hasVectors());
|
||||
ft.setOmitNorms(fieldInfo.omitsNorms());
|
||||
|
@ -708,7 +710,7 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable,SolrIn
|
|||
throw new AssertionError();
|
||||
}
|
||||
} else {
|
||||
visitor.stringField(info, f.stringValue());
|
||||
visitor.stringField(info, f.stringValue().getBytes(StandardCharsets.UTF_8));
|
||||
}
|
||||
break;
|
||||
case NO:
|
||||
|
|
Loading…
Reference in New Issue