mirror of https://github.com/apache/lucene.git
LUCENE-1960: Remove deprecated Field.Store.COMPRESS.
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@822978 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
39b4a0e4dc
commit
6be57e324e
|
@ -34,6 +34,8 @@ API Changes
|
|||
* LUCENE-1957: Remove Filter.bits(IndexReader) method and make
|
||||
Filter.getDocIdSet(IndexReader) abstract. (Michael Busch)
|
||||
|
||||
* LUCENE-1960: Remove deprecated Field.Store.COMPRESS. (Michael Busch)
|
||||
|
||||
Bug fixes
|
||||
|
||||
New features
|
||||
|
|
|
@ -42,7 +42,7 @@
|
|||
<property name="Name" value="Lucene"/>
|
||||
<property name="dev.version" value="3.0-dev"/>
|
||||
<property name="version" value="${dev.version}"/>
|
||||
<property name="compatibility.tag" value="lucene_2_9_back_compat_tests_20091007b"/>
|
||||
<property name="compatibility.tag" value="lucene_2_9_back_compat_tests_20091007c"/>
|
||||
<property name="spec.version" value="${version}"/>
|
||||
<property name="year" value="2000-${current.year}"/>
|
||||
<property name="final.name" value="lucene-${name}-${version}"/>
|
||||
|
|
|
@ -33,7 +33,6 @@ class FieldSetting implements Serializable {
|
|||
boolean stored = false;
|
||||
boolean indexed = false;
|
||||
boolean tokenized = false;
|
||||
boolean compressed = false;
|
||||
|
||||
FieldSetting() {
|
||||
}
|
||||
|
|
|
@ -44,9 +44,6 @@ class FieldSettings implements Serializable {
|
|||
if (fieldSetting.stored) {
|
||||
setting.stored = true;
|
||||
}
|
||||
if (fieldSetting.compressed) {
|
||||
setting.compressed = true;
|
||||
}
|
||||
|
||||
if ("b3".equals(fieldSetting.fieldName)) {
|
||||
System.currentTimeMillis();
|
||||
|
|
|
@ -480,9 +480,6 @@ public class InstantiatedIndexWriter {
|
|||
if (field.isTokenized()) {
|
||||
fieldSetting.tokenized = true;
|
||||
}
|
||||
if (field.isCompressed()) {
|
||||
fieldSetting.compressed = true;
|
||||
}
|
||||
if (field.isStored()) {
|
||||
fieldSetting.stored = true;
|
||||
}
|
||||
|
|
|
@ -36,7 +36,6 @@ public abstract class AbstractField implements Fieldable {
|
|||
protected boolean isIndexed = true;
|
||||
protected boolean isTokenized = true;
|
||||
protected boolean isBinary = false;
|
||||
protected boolean isCompressed = false;
|
||||
protected boolean lazy = false;
|
||||
protected boolean omitTermFreqAndPositions = false;
|
||||
protected float boost = 1.0f;
|
||||
|
@ -59,15 +58,9 @@ public abstract class AbstractField implements Fieldable {
|
|||
|
||||
if (store == Field.Store.YES){
|
||||
this.isStored = true;
|
||||
this.isCompressed = false;
|
||||
}
|
||||
else if (store == Field.Store.COMPRESS) {
|
||||
this.isStored = true;
|
||||
this.isCompressed = true;
|
||||
}
|
||||
else if (store == Field.Store.NO){
|
||||
this.isStored = false;
|
||||
this.isCompressed = false;
|
||||
}
|
||||
else
|
||||
throw new IllegalArgumentException("unknown store parameter " + store);
|
||||
|
@ -189,9 +182,6 @@ public abstract class AbstractField implements Fieldable {
|
|||
Reader-valued. */
|
||||
public final boolean isTokenized() { return isTokenized; }
|
||||
|
||||
/** True if the value of the field is stored and compressed within the index */
|
||||
public final boolean isCompressed() { return isCompressed; }
|
||||
|
||||
/** True iff the term or terms used to index this field are stored as a term
|
||||
* vector, available from {@link org.apache.lucene.index.IndexReader#getTermFreqVector(int,String)}.
|
||||
* These methods do not provide access to the original content of the field,
|
||||
|
@ -248,10 +238,7 @@ public abstract class AbstractField implements Fieldable {
|
|||
*/
|
||||
public int getBinaryLength() {
|
||||
if (isBinary) {
|
||||
if (!isCompressed)
|
||||
return binaryLength;
|
||||
else
|
||||
return ((byte[]) fieldsData).length;
|
||||
return binaryLength;
|
||||
} else if (fieldsData instanceof byte[])
|
||||
return ((byte[]) fieldsData).length;
|
||||
else
|
||||
|
@ -308,10 +295,6 @@ public abstract class AbstractField implements Fieldable {
|
|||
StringBuilder result = new StringBuilder();
|
||||
if (isStored) {
|
||||
result.append("stored");
|
||||
if (isCompressed)
|
||||
result.append("/compressed");
|
||||
else
|
||||
result.append("/uncompressed");
|
||||
}
|
||||
if (isIndexed) {
|
||||
if (result.length() > 0)
|
||||
|
|
|
@ -26,9 +26,7 @@ import org.apache.lucene.util.UnicodeUtil;
|
|||
/** Simple utility class providing static methods to
|
||||
* compress and decompress binary data for stored fields.
|
||||
* This class uses java.util.zip.Deflater and Inflater
|
||||
* classes to compress and decompress, which is the same
|
||||
* format previously used by the now deprecated
|
||||
* Field.Store.COMPRESS.
|
||||
* classes to compress and decompress.
|
||||
*/
|
||||
|
||||
public class CompressionTools {
|
||||
|
|
|
@ -42,16 +42,6 @@ public final class Field extends AbstractField implements Fieldable, Serializabl
|
|||
super(name);
|
||||
}
|
||||
|
||||
/** Store the original field value in the index in a compressed form. This is
|
||||
* useful for long documents and for binary valued fields.
|
||||
* @deprecated Please use {@link CompressionTools} instead.
|
||||
* For string fields that were previously indexed and stored using compression,
|
||||
* the new way to achieve this is: First add the field indexed-only (no store)
|
||||
* and additionally using the same field name as a binary, stored field
|
||||
* with {@link CompressionTools#compressString}.
|
||||
*/
|
||||
public static final Store COMPRESS = new Store("COMPRESS");
|
||||
|
||||
/** Store the original field value in the index. This is useful for short texts
|
||||
* like a document's title which should be displayed with the results. The
|
||||
* value is stored in its original form, i.e. no analyzer is used before it is
|
||||
|
@ -346,15 +336,9 @@ public final class Field extends AbstractField implements Fieldable, Serializabl
|
|||
|
||||
if (store == Store.YES){
|
||||
this.isStored = true;
|
||||
this.isCompressed = false;
|
||||
}
|
||||
else if (store == Store.COMPRESS) {
|
||||
this.isStored = true;
|
||||
this.isCompressed = true;
|
||||
}
|
||||
else if (store == Store.NO){
|
||||
this.isStored = false;
|
||||
this.isCompressed = false;
|
||||
}
|
||||
else
|
||||
throw new IllegalArgumentException("unknown store parameter " + store);
|
||||
|
@ -422,7 +406,6 @@ public final class Field extends AbstractField implements Fieldable, Serializabl
|
|||
this.fieldsData = reader;
|
||||
|
||||
this.isStored = false;
|
||||
this.isCompressed = false;
|
||||
|
||||
this.isIndexed = true;
|
||||
this.isTokenized = true;
|
||||
|
@ -470,7 +453,6 @@ public final class Field extends AbstractField implements Fieldable, Serializabl
|
|||
this.tokenStream = tokenStream;
|
||||
|
||||
this.isStored = false;
|
||||
this.isCompressed = false;
|
||||
|
||||
this.isIndexed = true;
|
||||
this.isTokenized = true;
|
||||
|
@ -515,11 +497,6 @@ public final class Field extends AbstractField implements Fieldable, Serializabl
|
|||
|
||||
if (store == Store.YES) {
|
||||
isStored = true;
|
||||
isCompressed = false;
|
||||
}
|
||||
else if (store == Store.COMPRESS) {
|
||||
isStored = true;
|
||||
isCompressed = true;
|
||||
}
|
||||
else if (store == Store.NO)
|
||||
throw new IllegalArgumentException("binary values can't be unstored");
|
||||
|
|
|
@ -54,17 +54,6 @@ public final class FieldSelectorResult implements Serializable {
|
|||
* {@link Document#add(Fieldable)} should be called by the Reader.
|
||||
*/
|
||||
public transient static final FieldSelectorResult LOAD_AND_BREAK = new FieldSelectorResult(3);
|
||||
/**
|
||||
* Behaves much like {@link #LOAD} but does not uncompress any compressed data. This is used for internal purposes.
|
||||
* {@link Document#getField(String)} and {@link Document#getFieldable(String)} should not return null.
|
||||
* <p/>
|
||||
* {@link Document#add(Fieldable)} should be called by
|
||||
* the Reader.
|
||||
* @deprecated This is an internal option only, and is
|
||||
* no longer needed now that {@link CompressionTools}
|
||||
* is used for field compression.
|
||||
*/
|
||||
public transient static final FieldSelectorResult LOAD_FOR_MERGE = new FieldSelectorResult(4);
|
||||
|
||||
/** Expert: Load the size of this {@link Field} rather than its value.
|
||||
* Size is measured as number of bytes required to store the field == bytes for a binary or any compressed value, and 2*chars for a String value.
|
||||
|
|
|
@ -113,9 +113,6 @@ public interface Fieldable extends Serializable {
|
|||
Reader-valued. */
|
||||
boolean isTokenized();
|
||||
|
||||
/** True if the value of the field is stored and compressed within the index */
|
||||
boolean isCompressed();
|
||||
|
||||
/** True if the term or terms used to index this field are stored as a term
|
||||
* vector, available from {@link org.apache.lucene.index.IndexReader#getTermFreqVector(int,String)}.
|
||||
* These methods do not provide access to the original content of the field,
|
||||
|
|
|
@ -24,11 +24,9 @@ import org.apache.lucene.store.IndexInput;
|
|||
import org.apache.lucene.store.AlreadyClosedException;
|
||||
import org.apache.lucene.store.BufferedIndexInput;
|
||||
import org.apache.lucene.util.CloseableThreadLocal;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.util.zip.DataFormatException;
|
||||
|
||||
/**
|
||||
* Class responsible for access to stored document fields.
|
||||
|
@ -216,35 +214,31 @@ final class FieldsReader implements Cloneable {
|
|||
FieldSelectorResult acceptField = fieldSelector == null ? FieldSelectorResult.LOAD : fieldSelector.accept(fi.name);
|
||||
|
||||
byte bits = fieldsStream.readByte();
|
||||
assert bits <= FieldsWriter.FIELD_IS_COMPRESSED + FieldsWriter.FIELD_IS_TOKENIZED + FieldsWriter.FIELD_IS_BINARY;
|
||||
assert bits <= FieldsWriter.FIELD_IS_TOKENIZED + FieldsWriter.FIELD_IS_BINARY;
|
||||
|
||||
boolean compressed = (bits & FieldsWriter.FIELD_IS_COMPRESSED) != 0;
|
||||
boolean tokenize = (bits & FieldsWriter.FIELD_IS_TOKENIZED) != 0;
|
||||
boolean binary = (bits & FieldsWriter.FIELD_IS_BINARY) != 0;
|
||||
//TODO: Find an alternative approach here if this list continues to grow beyond the
|
||||
//list of 5 or 6 currently here. See Lucene 762 for discussion
|
||||
if (acceptField.equals(FieldSelectorResult.LOAD)) {
|
||||
addField(doc, fi, binary, compressed, tokenize);
|
||||
}
|
||||
else if (acceptField.equals(FieldSelectorResult.LOAD_FOR_MERGE)) {
|
||||
addFieldForMerge(doc, fi, binary, compressed, tokenize);
|
||||
addField(doc, fi, binary, tokenize);
|
||||
}
|
||||
else if (acceptField.equals(FieldSelectorResult.LOAD_AND_BREAK)){
|
||||
addField(doc, fi, binary, compressed, tokenize);
|
||||
addField(doc, fi, binary, tokenize);
|
||||
break;//Get out of this loop
|
||||
}
|
||||
else if (acceptField.equals(FieldSelectorResult.LAZY_LOAD)) {
|
||||
addFieldLazy(doc, fi, binary, compressed, tokenize);
|
||||
addFieldLazy(doc, fi, binary, tokenize);
|
||||
}
|
||||
else if (acceptField.equals(FieldSelectorResult.SIZE)){
|
||||
skipField(binary, compressed, addFieldSize(doc, fi, binary, compressed));
|
||||
skipField(binary, addFieldSize(doc, fi, binary));
|
||||
}
|
||||
else if (acceptField.equals(FieldSelectorResult.SIZE_AND_BREAK)){
|
||||
addFieldSize(doc, fi, binary, compressed);
|
||||
addFieldSize(doc, fi, binary);
|
||||
break;
|
||||
}
|
||||
else {
|
||||
skipField(binary, compressed);
|
||||
skipField(binary);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -281,12 +275,12 @@ final class FieldsReader implements Cloneable {
|
|||
* Skip the field. We still have to read some of the information about the field, but can skip past the actual content.
|
||||
* This will have the most payoff on large fields.
|
||||
*/
|
||||
private void skipField(boolean binary, boolean compressed) throws IOException {
|
||||
skipField(binary, compressed, fieldsStream.readVInt());
|
||||
private void skipField(boolean binary) throws IOException {
|
||||
skipField(binary, fieldsStream.readVInt());
|
||||
}
|
||||
|
||||
private void skipField(boolean binary, boolean compressed, int toRead) throws IOException {
|
||||
if (format >= FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES || binary || compressed) {
|
||||
private void skipField(boolean binary, int toRead) throws IOException {
|
||||
if (format >= FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES || binary) {
|
||||
fieldsStream.seek(fieldsStream.getFilePointer() + toRead);
|
||||
} else {
|
||||
// We need to skip chars. This will slow us down, but still better
|
||||
|
@ -294,17 +288,12 @@ final class FieldsReader implements Cloneable {
|
|||
}
|
||||
}
|
||||
|
||||
private void addFieldLazy(Document doc, FieldInfo fi, boolean binary, boolean compressed, boolean tokenize) throws IOException {
|
||||
private void addFieldLazy(Document doc, FieldInfo fi, boolean binary, boolean tokenize) throws IOException {
|
||||
if (binary) {
|
||||
int toRead = fieldsStream.readVInt();
|
||||
long pointer = fieldsStream.getFilePointer();
|
||||
if (compressed) {
|
||||
//was: doc.add(new Fieldable(fi.name, uncompress(b), Fieldable.Store.COMPRESS));
|
||||
doc.add(new LazyField(fi.name, Field.Store.COMPRESS, toRead, pointer, binary));
|
||||
} else {
|
||||
//was: doc.add(new Fieldable(fi.name, b, Fieldable.Store.YES));
|
||||
doc.add(new LazyField(fi.name, Field.Store.YES, toRead, pointer, binary));
|
||||
}
|
||||
//was: doc.add(new Fieldable(fi.name, b, Fieldable.Store.YES));
|
||||
doc.add(new LazyField(fi.name, Field.Store.YES, toRead, pointer, binary));
|
||||
//Need to move the pointer ahead by toRead positions
|
||||
fieldsStream.seek(pointer + toRead);
|
||||
} else {
|
||||
|
@ -313,89 +302,43 @@ final class FieldsReader implements Cloneable {
|
|||
Field.TermVector termVector = getTermVectorType(fi);
|
||||
|
||||
AbstractField f;
|
||||
if (compressed) {
|
||||
store = Field.Store.COMPRESS;
|
||||
int toRead = fieldsStream.readVInt();
|
||||
long pointer = fieldsStream.getFilePointer();
|
||||
f = new LazyField(fi.name, store, toRead, pointer, binary);
|
||||
//skip over the part that we aren't loading
|
||||
fieldsStream.seek(pointer + toRead);
|
||||
f.setOmitNorms(fi.omitNorms);
|
||||
f.setOmitTermFreqAndPositions(fi.omitTermFreqAndPositions);
|
||||
} else {
|
||||
int length = fieldsStream.readVInt();
|
||||
long pointer = fieldsStream.getFilePointer();
|
||||
//Skip ahead of where we are by the length of what is stored
|
||||
if (format >= FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES)
|
||||
fieldsStream.seek(pointer+length);
|
||||
else
|
||||
fieldsStream.skipChars(length);
|
||||
f = new LazyField(fi.name, store, index, termVector, length, pointer, binary);
|
||||
f.setOmitNorms(fi.omitNorms);
|
||||
f.setOmitTermFreqAndPositions(fi.omitTermFreqAndPositions);
|
||||
}
|
||||
int length = fieldsStream.readVInt();
|
||||
long pointer = fieldsStream.getFilePointer();
|
||||
//Skip ahead of where we are by the length of what is stored
|
||||
if (format >= FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES)
|
||||
fieldsStream.seek(pointer+length);
|
||||
else
|
||||
fieldsStream.skipChars(length);
|
||||
f = new LazyField(fi.name, store, index, termVector, length, pointer, binary);
|
||||
f.setOmitNorms(fi.omitNorms);
|
||||
f.setOmitTermFreqAndPositions(fi.omitTermFreqAndPositions);
|
||||
doc.add(f);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// in merge mode we don't uncompress the data of a compressed field
|
||||
private void addFieldForMerge(Document doc, FieldInfo fi, boolean binary, boolean compressed, boolean tokenize) throws IOException {
|
||||
Object data;
|
||||
|
||||
if (binary || compressed) {
|
||||
int toRead = fieldsStream.readVInt();
|
||||
final byte[] b = new byte[toRead];
|
||||
fieldsStream.readBytes(b, 0, b.length);
|
||||
data = b;
|
||||
} else {
|
||||
data = fieldsStream.readString();
|
||||
}
|
||||
|
||||
doc.add(new FieldForMerge(data, fi, binary, compressed, tokenize));
|
||||
}
|
||||
|
||||
private void addField(Document doc, FieldInfo fi, boolean binary, boolean compressed, boolean tokenize) throws CorruptIndexException, IOException {
|
||||
private void addField(Document doc, FieldInfo fi, boolean binary, boolean tokenize) throws CorruptIndexException, IOException {
|
||||
|
||||
//we have a binary stored field, and it may be compressed
|
||||
if (binary) {
|
||||
int toRead = fieldsStream.readVInt();
|
||||
final byte[] b = new byte[toRead];
|
||||
fieldsStream.readBytes(b, 0, b.length);
|
||||
if (compressed)
|
||||
doc.add(new Field(fi.name, uncompress(b), Field.Store.COMPRESS));
|
||||
else
|
||||
doc.add(new Field(fi.name, b, Field.Store.YES));
|
||||
doc.add(new Field(fi.name, b, Field.Store.YES));
|
||||
} else {
|
||||
Field.Store store = Field.Store.YES;
|
||||
Field.Index index = getIndexType(fi, tokenize);
|
||||
Field.TermVector termVector = getTermVectorType(fi);
|
||||
|
||||
AbstractField f;
|
||||
if (compressed) {
|
||||
store = Field.Store.COMPRESS;
|
||||
int toRead = fieldsStream.readVInt();
|
||||
|
||||
final byte[] b = new byte[toRead];
|
||||
fieldsStream.readBytes(b, 0, b.length);
|
||||
f = new Field(fi.name, // field name
|
||||
false,
|
||||
new String(uncompress(b), "UTF-8"), // uncompress the value and add as string
|
||||
store,
|
||||
index,
|
||||
termVector);
|
||||
f.setOmitTermFreqAndPositions(fi.omitTermFreqAndPositions);
|
||||
f.setOmitNorms(fi.omitNorms);
|
||||
} else {
|
||||
f = new Field(fi.name, // name
|
||||
false,
|
||||
fieldsStream.readString(), // read value
|
||||
store,
|
||||
index,
|
||||
termVector);
|
||||
f.setOmitTermFreqAndPositions(fi.omitTermFreqAndPositions);
|
||||
f.setOmitNorms(fi.omitNorms);
|
||||
}
|
||||
f = new Field(fi.name, // name
|
||||
false,
|
||||
fieldsStream.readString(), // read value
|
||||
store,
|
||||
index,
|
||||
termVector);
|
||||
f.setOmitTermFreqAndPositions(fi.omitTermFreqAndPositions);
|
||||
f.setOmitNorms(fi.omitNorms);
|
||||
doc.add(f);
|
||||
}
|
||||
}
|
||||
|
@ -403,8 +346,8 @@ final class FieldsReader implements Cloneable {
|
|||
// Add the size of field as a byte[] containing the 4 bytes of the integer byte size (high order byte first; char = 2 bytes)
|
||||
// Read just the size -- caller must skip the field content to continue reading fields
|
||||
// Return the size in bytes or chars, depending on field type
|
||||
private int addFieldSize(Document doc, FieldInfo fi, boolean binary, boolean compressed) throws IOException {
|
||||
int size = fieldsStream.readVInt(), bytesize = binary || compressed ? size : 2*size;
|
||||
private int addFieldSize(Document doc, FieldInfo fi, boolean binary) throws IOException {
|
||||
int size = fieldsStream.readVInt(), bytesize = binary ? size : 2*size;
|
||||
byte[] sizebytes = new byte[4];
|
||||
sizebytes[0] = (byte) (bytesize>>>24);
|
||||
sizebytes[1] = (byte) (bytesize>>>16);
|
||||
|
@ -517,21 +460,15 @@ final class FieldsReader implements Cloneable {
|
|||
IndexInput localFieldsStream = getFieldStream();
|
||||
try {
|
||||
localFieldsStream.seek(pointer);
|
||||
if (isCompressed) {
|
||||
final byte[] b = new byte[toRead];
|
||||
localFieldsStream.readBytes(b, 0, b.length);
|
||||
fieldsData = new String(uncompress(b), "UTF-8");
|
||||
if (format >= FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES) {
|
||||
byte[] bytes = new byte[toRead];
|
||||
localFieldsStream.readBytes(bytes, 0, toRead);
|
||||
fieldsData = new String(bytes, "UTF-8");
|
||||
} else {
|
||||
if (format >= FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES) {
|
||||
byte[] bytes = new byte[toRead];
|
||||
localFieldsStream.readBytes(bytes, 0, toRead);
|
||||
fieldsData = new String(bytes, "UTF-8");
|
||||
} else {
|
||||
//read in chars b/c we already know the length we need to read
|
||||
char[] chars = new char[toRead];
|
||||
localFieldsStream.readChars(chars, 0, toRead);
|
||||
fieldsData = new String(chars);
|
||||
}
|
||||
//read in chars b/c we already know the length we need to read
|
||||
char[] chars = new char[toRead];
|
||||
localFieldsStream.readChars(chars, 0, toRead);
|
||||
fieldsData = new String(chars);
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new FieldReaderException(e);
|
||||
|
@ -580,11 +517,7 @@ final class FieldsReader implements Cloneable {
|
|||
try {
|
||||
localFieldsStream.seek(pointer);
|
||||
localFieldsStream.readBytes(b, 0, toRead);
|
||||
if (isCompressed == true) {
|
||||
fieldsData = uncompress(b);
|
||||
} else {
|
||||
fieldsData = b;
|
||||
}
|
||||
fieldsData = b;
|
||||
} catch (IOException e) {
|
||||
throw new FieldReaderException(e);
|
||||
}
|
||||
|
@ -598,58 +531,4 @@ final class FieldsReader implements Cloneable {
|
|||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private byte[] uncompress(byte[] b)
|
||||
throws CorruptIndexException {
|
||||
try {
|
||||
return CompressionTools.decompress(b);
|
||||
} catch (DataFormatException e) {
|
||||
// this will happen if the field is not compressed
|
||||
CorruptIndexException newException = new CorruptIndexException("field data are in wrong format: " + e.toString());
|
||||
newException.initCause(e);
|
||||
throw newException;
|
||||
}
|
||||
}
|
||||
|
||||
// Instances of this class hold field properties and data
|
||||
// for merge
|
||||
final static class FieldForMerge extends AbstractField {
|
||||
public String stringValue() {
|
||||
return (String) this.fieldsData;
|
||||
}
|
||||
|
||||
public Reader readerValue() {
|
||||
// not needed for merge
|
||||
return null;
|
||||
}
|
||||
|
||||
public byte[] binaryValue() {
|
||||
return (byte[]) this.fieldsData;
|
||||
}
|
||||
|
||||
public TokenStream tokenStreamValue() {
|
||||
// not needed for merge
|
||||
return null;
|
||||
}
|
||||
|
||||
public FieldForMerge(Object value, FieldInfo fi, boolean binary, boolean compressed, boolean tokenize) {
|
||||
this.isStored = true;
|
||||
this.fieldsData = value;
|
||||
this.isCompressed = compressed;
|
||||
this.isBinary = binary;
|
||||
if (binary)
|
||||
binaryLength = ((byte[]) value).length;
|
||||
|
||||
this.isTokenized = tokenize;
|
||||
|
||||
this.name = StringHelper.intern(fi.name);
|
||||
this.isIndexed = fi.isIndexed;
|
||||
this.omitNorms = fi.omitNorms;
|
||||
this.omitTermFreqAndPositions = fi.omitTermFreqAndPositions;
|
||||
this.storeOffsetWithTermVector = fi.storeOffsetWithTermVector;
|
||||
this.storePositionWithTermVector = fi.storePositionWithTermVector;
|
||||
this.storeTermVector = fi.storeTermVector;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
@ -21,7 +21,6 @@ import java.util.Iterator;
|
|||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Fieldable;
|
||||
import org.apache.lucene.document.CompressionTools;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.RAMOutputStream;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
|
@ -31,7 +30,6 @@ final class FieldsWriter
|
|||
{
|
||||
static final byte FIELD_IS_TOKENIZED = 0x1;
|
||||
static final byte FIELD_IS_BINARY = 0x2;
|
||||
static final byte FIELD_IS_COMPRESSED = 0x4;
|
||||
|
||||
// Original format
|
||||
static final int FORMAT = 0;
|
||||
|
@ -172,64 +170,28 @@ final class FieldsWriter
|
|||
}
|
||||
|
||||
final void writeField(FieldInfo fi, Fieldable field) throws IOException {
|
||||
// if the field as an instanceof FieldsReader.FieldForMerge, we're in merge mode
|
||||
// and field.binaryValue() already returns the compressed value for a field
|
||||
// with isCompressed()==true, so we disable compression in that case
|
||||
boolean disableCompression = (field instanceof FieldsReader.FieldForMerge);
|
||||
fieldsStream.writeVInt(fi.number);
|
||||
byte bits = 0;
|
||||
if (field.isTokenized())
|
||||
bits |= FieldsWriter.FIELD_IS_TOKENIZED;
|
||||
if (field.isBinary())
|
||||
bits |= FieldsWriter.FIELD_IS_BINARY;
|
||||
if (field.isCompressed())
|
||||
bits |= FieldsWriter.FIELD_IS_COMPRESSED;
|
||||
|
||||
fieldsStream.writeByte(bits);
|
||||
|
||||
if (field.isCompressed()) {
|
||||
// compression is enabled for the current field
|
||||
if (field.isBinary()) {
|
||||
final byte[] data;
|
||||
final int len;
|
||||
final int offset;
|
||||
if (disableCompression) {
|
||||
// optimized case for merging, the data
|
||||
// is already compressed
|
||||
data = field.getBinaryValue();
|
||||
assert data != null;
|
||||
len = field.getBinaryLength();
|
||||
offset = field.getBinaryOffset();
|
||||
} else {
|
||||
// check if it is a binary field
|
||||
if (field.isBinary()) {
|
||||
data = CompressionTools.compress(field.getBinaryValue(), field.getBinaryOffset(), field.getBinaryLength());
|
||||
} else {
|
||||
byte x[] = field.stringValue().getBytes("UTF-8");
|
||||
data = CompressionTools.compress(x, 0, x.length);
|
||||
}
|
||||
len = data.length;
|
||||
offset = 0;
|
||||
}
|
||||
data = field.getBinaryValue();
|
||||
len = field.getBinaryLength();
|
||||
offset = field.getBinaryOffset();
|
||||
|
||||
fieldsStream.writeVInt(len);
|
||||
fieldsStream.writeBytes(data, offset, len);
|
||||
}
|
||||
else {
|
||||
// compression is disabled for the current field
|
||||
if (field.isBinary()) {
|
||||
final byte[] data;
|
||||
final int len;
|
||||
final int offset;
|
||||
data = field.getBinaryValue();
|
||||
len = field.getBinaryLength();
|
||||
offset = field.getBinaryOffset();
|
||||
|
||||
fieldsStream.writeVInt(len);
|
||||
fieldsStream.writeBytes(data, offset, len);
|
||||
}
|
||||
else {
|
||||
fieldsStream.writeString(field.stringValue());
|
||||
}
|
||||
fieldsStream.writeString(field.stringValue());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -24,8 +24,6 @@ import java.util.Iterator;
|
|||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.FieldSelector;
|
||||
import org.apache.lucene.document.FieldSelectorResult;
|
||||
import org.apache.lucene.index.IndexReader.FieldOption;
|
||||
import org.apache.lucene.index.MergePolicy.MergeAbortedException;
|
||||
import org.apache.lucene.store.Directory;
|
||||
|
@ -321,15 +319,6 @@ final class SegmentMerger {
|
|||
setMatchingSegmentReaders();
|
||||
|
||||
if (mergeDocStores) {
|
||||
|
||||
// for merging we don't want to compress/uncompress the data, so to tell the FieldsReader that we're
|
||||
// in merge mode, we use this FieldSelector
|
||||
FieldSelector fieldSelectorMerge = new FieldSelector() {
|
||||
public FieldSelectorResult accept(String fieldName) {
|
||||
return FieldSelectorResult.LOAD_FOR_MERGE;
|
||||
}
|
||||
};
|
||||
|
||||
// merge field values
|
||||
final FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos);
|
||||
|
||||
|
@ -346,10 +335,10 @@ final class SegmentMerger {
|
|||
}
|
||||
}
|
||||
if (reader.hasDeletions()) {
|
||||
docCount += copyFieldsWithDeletions(fieldSelectorMerge, fieldsWriter,
|
||||
docCount += copyFieldsWithDeletions(fieldsWriter,
|
||||
reader, matchingFieldsReader);
|
||||
} else {
|
||||
docCount += copyFieldsNoDeletions(fieldSelectorMerge, fieldsWriter,
|
||||
docCount += copyFieldsNoDeletions(fieldsWriter,
|
||||
reader, matchingFieldsReader);
|
||||
}
|
||||
}
|
||||
|
@ -379,8 +368,7 @@ final class SegmentMerger {
|
|||
return docCount;
|
||||
}
|
||||
|
||||
private int copyFieldsWithDeletions(final FieldSelector fieldSelectorMerge,
|
||||
final FieldsWriter fieldsWriter, final IndexReader reader,
|
||||
private int copyFieldsWithDeletions(final FieldsWriter fieldsWriter, final IndexReader reader,
|
||||
final FieldsReader matchingFieldsReader)
|
||||
throws IOException, MergeAbortedException, CorruptIndexException {
|
||||
int docCount = 0;
|
||||
|
@ -419,7 +407,7 @@ final class SegmentMerger {
|
|||
}
|
||||
// NOTE: it's very important to first assign to doc then pass it to
|
||||
// termVectorsWriter.addAllDocVectors; see LUCENE-1282
|
||||
Document doc = reader.document(j, fieldSelectorMerge);
|
||||
Document doc = reader.document(j);
|
||||
fieldsWriter.addDocument(doc);
|
||||
docCount++;
|
||||
checkAbort.work(300);
|
||||
|
@ -428,8 +416,7 @@ final class SegmentMerger {
|
|||
return docCount;
|
||||
}
|
||||
|
||||
private int copyFieldsNoDeletions(FieldSelector fieldSelectorMerge,
|
||||
final FieldsWriter fieldsWriter, final IndexReader reader,
|
||||
private int copyFieldsNoDeletions(final FieldsWriter fieldsWriter, final IndexReader reader,
|
||||
final FieldsReader matchingFieldsReader)
|
||||
throws IOException, MergeAbortedException, CorruptIndexException {
|
||||
final int maxDoc = reader.maxDoc();
|
||||
|
@ -447,7 +434,7 @@ final class SegmentMerger {
|
|||
for (; docCount < maxDoc; docCount++) {
|
||||
// NOTE: it's very important to first assign to doc then pass it to
|
||||
// termVectorsWriter.addAllDocVectors; see LUCENE-1282
|
||||
Document doc = reader.document(docCount, fieldSelectorMerge);
|
||||
Document doc = reader.document(docCount);
|
||||
fieldsWriter.addDocument(doc);
|
||||
checkAbort.work(300);
|
||||
}
|
||||
|
|
|
@ -37,13 +37,11 @@ public class TestBinaryDocument extends LuceneTestCase
|
|||
throws Exception
|
||||
{
|
||||
Fieldable binaryFldStored = new Field("binaryStored", binaryValStored.getBytes(), Field.Store.YES);
|
||||
Fieldable binaryFldCompressed = new Field("binaryCompressed", binaryValCompressed.getBytes(), Field.Store.COMPRESS);
|
||||
Fieldable stringFldStored = new Field("stringStored", binaryValStored, Field.Store.YES, Field.Index.NO, Field.TermVector.NO);
|
||||
Fieldable stringFldCompressed = new Field("stringCompressed", binaryValCompressed, Field.Store.COMPRESS, Field.Index.NO, Field.TermVector.NO);
|
||||
|
||||
try {
|
||||
// binary fields with store off are not allowed
|
||||
new Field("fail", binaryValCompressed.getBytes(), Field.Store.NO);
|
||||
new Field("fail", binaryValStored.getBytes(), Field.Store.NO);
|
||||
fail();
|
||||
}
|
||||
catch (IllegalArgumentException iae) {
|
||||
|
@ -53,13 +51,11 @@ public class TestBinaryDocument extends LuceneTestCase
|
|||
Document doc = new Document();
|
||||
|
||||
doc.add(binaryFldStored);
|
||||
doc.add(binaryFldCompressed);
|
||||
|
||||
doc.add(stringFldStored);
|
||||
doc.add(stringFldCompressed);
|
||||
|
||||
/** test for field count */
|
||||
assertEquals(4, doc.fields.size());
|
||||
assertEquals(2, doc.fields.size());
|
||||
|
||||
/** add the doc to a ram index */
|
||||
MockRAMDirectory dir = new MockRAMDirectory();
|
||||
|
@ -76,18 +72,10 @@ public class TestBinaryDocument extends LuceneTestCase
|
|||
String binaryFldStoredTest = new String(docFromReader.getBinaryValue("binaryStored"));
|
||||
assertTrue(binaryFldStoredTest.equals(binaryValStored));
|
||||
|
||||
/** fetch the binary compressed field and compare it's content with the original one */
|
||||
String binaryFldCompressedTest = new String(docFromReader.getBinaryValue("binaryCompressed"));
|
||||
assertTrue(binaryFldCompressedTest.equals(binaryValCompressed));
|
||||
|
||||
/** fetch the string field and compare it's content with the original one */
|
||||
String stringFldStoredTest = docFromReader.get("stringStored");
|
||||
assertTrue(stringFldStoredTest.equals(binaryValStored));
|
||||
|
||||
/** fetch the compressed string field and compare it's content with the original one */
|
||||
String stringFldCompressedTest = docFromReader.get("stringCompressed");
|
||||
assertTrue(stringFldCompressedTest.equals(binaryValCompressed));
|
||||
|
||||
/** delete the document from index */
|
||||
reader.deleteDocument(0);
|
||||
assertEquals(0, reader.numDocs());
|
||||
|
|
|
@ -42,13 +42,6 @@ class DocHelper {
|
|||
public static final String TEXT_FIELD_2_KEY = "textField2";
|
||||
public static Field textField2 = new Field(TEXT_FIELD_2_KEY, FIELD_2_TEXT, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
|
||||
|
||||
public static final String FIELD_2_COMPRESSED_TEXT = "field field field two text";
|
||||
//Fields will be lexicographically sorted. So, the order is: field, text, two
|
||||
public static final int [] COMPRESSED_FIELD_2_FREQS = {3, 1, 1};
|
||||
public static final String COMPRESSED_TEXT_FIELD_2_KEY = "compressedTextField2";
|
||||
public static Field compressedTextField2 = new Field(COMPRESSED_TEXT_FIELD_2_KEY, FIELD_2_COMPRESSED_TEXT, Field.Store.COMPRESS, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
|
||||
|
||||
|
||||
public static final String FIELD_3_TEXT = "aaaNoNorms aaaNoNorms bbbNoNorms";
|
||||
public static final String TEXT_FIELD_3_KEY = "textField3";
|
||||
public static Field textField3 = new Field(TEXT_FIELD_3_KEY, FIELD_3_TEXT, Field.Store.YES, Field.Index.ANALYZED);
|
||||
|
@ -124,7 +117,6 @@ class DocHelper {
|
|||
textField1,
|
||||
textField2,
|
||||
textField3,
|
||||
compressedTextField2,
|
||||
keyField,
|
||||
noNormsField,
|
||||
noTFField,
|
||||
|
@ -193,7 +185,6 @@ class DocHelper {
|
|||
nameValues = new HashMap();
|
||||
nameValues.put(TEXT_FIELD_1_KEY, FIELD_1_TEXT);
|
||||
nameValues.put(TEXT_FIELD_2_KEY, FIELD_2_TEXT);
|
||||
nameValues.put(COMPRESSED_TEXT_FIELD_2_KEY, FIELD_2_COMPRESSED_TEXT);
|
||||
nameValues.put(TEXT_FIELD_3_KEY, FIELD_3_TEXT);
|
||||
nameValues.put(KEYWORD_FIELD_KEY, KEYWORD_TEXT);
|
||||
nameValues.put(NO_NORMS_KEY, NO_NORMS_TEXT);
|
||||
|
|
|
@ -108,7 +108,6 @@ public class TestFieldsReader extends LuceneTestCase {
|
|||
lazyFieldNames.add(DocHelper.LAZY_FIELD_KEY);
|
||||
lazyFieldNames.add(DocHelper.LAZY_FIELD_BINARY_KEY);
|
||||
lazyFieldNames.add(DocHelper.TEXT_FIELD_UTF2_KEY);
|
||||
lazyFieldNames.add(DocHelper.COMPRESSED_TEXT_FIELD_2_KEY);
|
||||
SetBasedFieldSelector fieldSelector = new SetBasedFieldSelector(loadFieldNames, lazyFieldNames);
|
||||
Document doc = reader.doc(0, fieldSelector);
|
||||
assertTrue("doc is null and it shouldn't be", doc != null);
|
||||
|
@ -118,13 +117,6 @@ public class TestFieldsReader extends LuceneTestCase {
|
|||
String value = field.stringValue();
|
||||
assertTrue("value is null and it shouldn't be", value != null);
|
||||
assertTrue(value + " is not equal to " + DocHelper.LAZY_FIELD_TEXT, value.equals(DocHelper.LAZY_FIELD_TEXT) == true);
|
||||
field = doc.getFieldable(DocHelper.COMPRESSED_TEXT_FIELD_2_KEY);
|
||||
assertTrue("field is null and it shouldn't be", field != null);
|
||||
assertTrue("field is not lazy and it should be", field.isLazy());
|
||||
assertTrue("binaryValue isn't null for lazy string field", field.binaryValue() == null);
|
||||
value = field.stringValue();
|
||||
assertTrue("value is null and it shouldn't be", value != null);
|
||||
assertTrue(value + " is not equal to " + DocHelper.FIELD_2_COMPRESSED_TEXT, value.equals(DocHelper.FIELD_2_COMPRESSED_TEXT) == true);
|
||||
field = doc.getFieldable(DocHelper.TEXT_FIELD_1_KEY);
|
||||
assertTrue("field is null and it shouldn't be", field != null);
|
||||
assertTrue("Field is lazy and it should not be", field.isLazy() == false);
|
||||
|
@ -165,7 +157,6 @@ public class TestFieldsReader extends LuceneTestCase {
|
|||
lazyFieldNames.add(DocHelper.LAZY_FIELD_KEY);
|
||||
lazyFieldNames.add(DocHelper.LAZY_FIELD_BINARY_KEY);
|
||||
lazyFieldNames.add(DocHelper.TEXT_FIELD_UTF2_KEY);
|
||||
lazyFieldNames.add(DocHelper.COMPRESSED_TEXT_FIELD_2_KEY);
|
||||
SetBasedFieldSelector fieldSelector = new SetBasedFieldSelector(loadFieldNames, lazyFieldNames);
|
||||
Document doc = reader.doc(0, fieldSelector);
|
||||
assertTrue("doc is null and it shouldn't be", doc != null);
|
||||
|
@ -281,7 +272,6 @@ public class TestFieldsReader extends LuceneTestCase {
|
|||
doc = reader.doc(0, new FieldSelector(){
|
||||
public FieldSelectorResult accept(String fieldName) {
|
||||
if (fieldName.equals(DocHelper.TEXT_FIELD_1_KEY) ||
|
||||
fieldName.equals(DocHelper.COMPRESSED_TEXT_FIELD_2_KEY) ||
|
||||
fieldName.equals(DocHelper.LAZY_FIELD_BINARY_KEY))
|
||||
return FieldSelectorResult.SIZE;
|
||||
else if (fieldName.equals(DocHelper.TEXT_FIELD_3_KEY))
|
||||
|
|
|
@ -365,7 +365,6 @@ public class TestIndexReader extends LuceneTestCase
|
|||
writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false, IndexWriter.MaxFieldLength.LIMITED);
|
||||
Document doc = new Document();
|
||||
doc.add(new Field("bin1", bin, Field.Store.YES));
|
||||
doc.add(new Field("bin2", bin, Field.Store.COMPRESS));
|
||||
doc.add(new Field("junk", "junk text", Field.Store.NO, Field.Index.ANALYZED));
|
||||
writer.addDocument(doc);
|
||||
writer.close();
|
||||
|
@ -381,16 +380,6 @@ public class TestIndexReader extends LuceneTestCase
|
|||
for (int i = 0; i < bin.length; i++) {
|
||||
assertEquals(bin[i], data1[i + b1.getBinaryOffset()]);
|
||||
}
|
||||
fields = doc.getFields("bin2");
|
||||
assertNotNull(fields);
|
||||
assertEquals(1, fields.length);
|
||||
b1 = fields[0];
|
||||
assertTrue(b1.isBinary());
|
||||
data1 = b1.getBinaryValue();
|
||||
assertEquals(bin.length, b1.getBinaryLength());
|
||||
for (int i = 0; i < bin.length; i++) {
|
||||
assertEquals(bin[i], data1[i + b1.getBinaryOffset()]);
|
||||
}
|
||||
Set lazyFields = new HashSet();
|
||||
lazyFields.add("bin1");
|
||||
FieldSelector sel = new SetBasedFieldSelector(new HashSet(), lazyFields);
|
||||
|
@ -425,16 +414,6 @@ public class TestIndexReader extends LuceneTestCase
|
|||
for (int i = 0; i < bin.length; i++) {
|
||||
assertEquals(bin[i], data1[i + b1.getBinaryOffset()]);
|
||||
}
|
||||
fields = doc.getFields("bin2");
|
||||
assertNotNull(fields);
|
||||
assertEquals(1, fields.length);
|
||||
b1 = fields[0];
|
||||
assertTrue(b1.isBinary());
|
||||
data1 = b1.getBinaryValue();
|
||||
assertEquals(bin.length, b1.getBinaryLength());
|
||||
for (int i = 0; i < bin.length; i++) {
|
||||
assertEquals(bin[i], data1[i + b1.getBinaryOffset()]);
|
||||
}
|
||||
reader.close();
|
||||
}
|
||||
|
||||
|
|
|
@ -3964,48 +3964,6 @@ public class TestIndexWriter extends BaseTokenStreamTestCase {
|
|||
dir.close();
|
||||
}
|
||||
|
||||
// LUCENE-1374
|
||||
public void testMergeCompressedFields() throws IOException {
|
||||
File indexDir = new File(System.getProperty("tempDir"), "mergecompressedfields");
|
||||
Directory dir = FSDirectory.open(indexDir);
|
||||
try {
|
||||
for(int i=0;i<5;i++) {
|
||||
// Must make a new writer & doc each time, w/
|
||||
// different fields, so bulk merge of stored fields
|
||||
// cannot run:
|
||||
IndexWriter w = new IndexWriter(dir, new WhitespaceAnalyzer(), i==0, IndexWriter.MaxFieldLength.UNLIMITED);
|
||||
try {
|
||||
w.setMergeFactor(5);
|
||||
w.setMergeScheduler(new SerialMergeScheduler());
|
||||
Document doc = new Document();
|
||||
doc.add(new Field("test1", "this is some data that will be compressed this this this", Field.Store.COMPRESS, Field.Index.NO));
|
||||
doc.add(new Field("test2", new byte[20], Field.Store.COMPRESS));
|
||||
doc.add(new Field("field" + i, "random field", Field.Store.NO, Field.Index.ANALYZED));
|
||||
w.addDocument(doc);
|
||||
} finally {
|
||||
w.close();
|
||||
}
|
||||
}
|
||||
|
||||
byte[] cmp = new byte[20];
|
||||
|
||||
IndexReader r = IndexReader.open(dir, true);
|
||||
try {
|
||||
for(int i=0;i<5;i++) {
|
||||
Document doc = r.document(i);
|
||||
assertEquals("this is some data that will be compressed this this this", doc.getField("test1").stringValue());
|
||||
byte[] b = doc.getField("test2").binaryValue();
|
||||
assertTrue(Arrays.equals(b, cmp));
|
||||
}
|
||||
} finally {
|
||||
r.close();
|
||||
}
|
||||
} finally {
|
||||
dir.close();
|
||||
_TestUtil.rmDir(indexDir);
|
||||
}
|
||||
}
|
||||
|
||||
// LUCENE-1382
|
||||
public void testCommitUserData() throws IOException {
|
||||
Directory dir = new MockRAMDirectory();
|
||||
|
|
|
@ -87,7 +87,7 @@ public class TestSegmentMerger extends LuceneTestCase {
|
|||
Collection stored = mergedReader.getFieldNames(IndexReader.FieldOption.INDEXED_WITH_TERMVECTOR);
|
||||
assertTrue(stored != null);
|
||||
//System.out.println("stored size: " + stored.size());
|
||||
assertTrue("We do not have 4 fields that were indexed with term vector",stored.size() == 4);
|
||||
assertTrue("We do not have 3 fields that were indexed with term vector",stored.size() == 3);
|
||||
|
||||
TermFreqVector vector = mergedReader.getTermFreqVector(0, DocHelper.TEXT_FIELD_2_KEY);
|
||||
assertTrue(vector != null);
|
||||
|
|
|
@ -200,6 +200,6 @@ public class TestSegmentReader extends LuceneTestCase {
|
|||
|
||||
TermFreqVector [] results = reader.getTermFreqVectors(0);
|
||||
assertTrue(results != null);
|
||||
assertTrue("We do not have 4 term freq vectors, we have: " + results.length, results.length == 4);
|
||||
assertTrue("We do not have 3 term freq vectors, we have: " + results.length, results.length == 3);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue