LUCENE-1960: Remove deprecated Field.Store.COMPRESS.

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@822978 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael Busch 2009-10-08 00:54:59 +00:00
parent 39b4a0e4dc
commit 6be57e324e
20 changed files with 66 additions and 393 deletions

View File

@ -34,6 +34,8 @@ API Changes
* LUCENE-1957: Remove Filter.bits(IndexReader) method and make
Filter.getDocIdSet(IndexReader) abstract. (Michael Busch)
* LUCENE-1960: Remove deprecated Field.Store.COMPRESS. (Michael Busch)
Bug fixes
New features

View File

@ -42,7 +42,7 @@
<property name="Name" value="Lucene"/>
<property name="dev.version" value="3.0-dev"/>
<property name="version" value="${dev.version}"/>
<property name="compatibility.tag" value="lucene_2_9_back_compat_tests_20091007b"/>
<property name="compatibility.tag" value="lucene_2_9_back_compat_tests_20091007c"/>
<property name="spec.version" value="${version}"/>
<property name="year" value="2000-${current.year}"/>
<property name="final.name" value="lucene-${name}-${version}"/>

View File

@ -33,7 +33,6 @@ class FieldSetting implements Serializable {
boolean stored = false;
boolean indexed = false;
boolean tokenized = false;
boolean compressed = false;
FieldSetting() {
}

View File

@ -44,9 +44,6 @@ class FieldSettings implements Serializable {
if (fieldSetting.stored) {
setting.stored = true;
}
if (fieldSetting.compressed) {
setting.compressed = true;
}
if ("b3".equals(fieldSetting.fieldName)) {
System.currentTimeMillis();

View File

@ -480,9 +480,6 @@ public class InstantiatedIndexWriter {
if (field.isTokenized()) {
fieldSetting.tokenized = true;
}
if (field.isCompressed()) {
fieldSetting.compressed = true;
}
if (field.isStored()) {
fieldSetting.stored = true;
}

View File

@ -36,7 +36,6 @@ public abstract class AbstractField implements Fieldable {
protected boolean isIndexed = true;
protected boolean isTokenized = true;
protected boolean isBinary = false;
protected boolean isCompressed = false;
protected boolean lazy = false;
protected boolean omitTermFreqAndPositions = false;
protected float boost = 1.0f;
@ -59,15 +58,9 @@ public abstract class AbstractField implements Fieldable {
if (store == Field.Store.YES){
this.isStored = true;
this.isCompressed = false;
}
else if (store == Field.Store.COMPRESS) {
this.isStored = true;
this.isCompressed = true;
}
else if (store == Field.Store.NO){
this.isStored = false;
this.isCompressed = false;
}
else
throw new IllegalArgumentException("unknown store parameter " + store);
@ -189,9 +182,6 @@ public abstract class AbstractField implements Fieldable {
Reader-valued. */
public final boolean isTokenized() { return isTokenized; }
/** True if the value of the field is stored and compressed within the index */
public final boolean isCompressed() { return isCompressed; }
/** True iff the term or terms used to index this field are stored as a term
* vector, available from {@link org.apache.lucene.index.IndexReader#getTermFreqVector(int,String)}.
* These methods do not provide access to the original content of the field,
@ -248,10 +238,7 @@ public abstract class AbstractField implements Fieldable {
*/
public int getBinaryLength() {
if (isBinary) {
if (!isCompressed)
return binaryLength;
else
return ((byte[]) fieldsData).length;
return binaryLength;
} else if (fieldsData instanceof byte[])
return ((byte[]) fieldsData).length;
else
@ -308,10 +295,6 @@ public abstract class AbstractField implements Fieldable {
StringBuilder result = new StringBuilder();
if (isStored) {
result.append("stored");
if (isCompressed)
result.append("/compressed");
else
result.append("/uncompressed");
}
if (isIndexed) {
if (result.length() > 0)

View File

@ -26,9 +26,7 @@ import org.apache.lucene.util.UnicodeUtil;
/** Simple utility class providing static methods to
* compress and decompress binary data for stored fields.
* This class uses java.util.zip.Deflater and Inflater
* classes to compress and decompress, which is the same
* format previously used by the now deprecated
* Field.Store.COMPRESS.
* classes to compress and decompress.
*/
public class CompressionTools {

View File

@ -42,16 +42,6 @@ public final class Field extends AbstractField implements Fieldable, Serializabl
super(name);
}
/** Store the original field value in the index in a compressed form. This is
* useful for long documents and for binary valued fields.
* @deprecated Please use {@link CompressionTools} instead.
* For string fields that were previously indexed and stored using compression,
* the new way to achieve this is: First add the field indexed-only (no store)
* and additionally using the same field name as a binary, stored field
* with {@link CompressionTools#compressString}.
*/
public static final Store COMPRESS = new Store("COMPRESS");
/** Store the original field value in the index. This is useful for short texts
* like a document's title which should be displayed with the results. The
* value is stored in its original form, i.e. no analyzer is used before it is
@ -346,15 +336,9 @@ public final class Field extends AbstractField implements Fieldable, Serializabl
if (store == Store.YES){
this.isStored = true;
this.isCompressed = false;
}
else if (store == Store.COMPRESS) {
this.isStored = true;
this.isCompressed = true;
}
else if (store == Store.NO){
this.isStored = false;
this.isCompressed = false;
}
else
throw new IllegalArgumentException("unknown store parameter " + store);
@ -422,7 +406,6 @@ public final class Field extends AbstractField implements Fieldable, Serializabl
this.fieldsData = reader;
this.isStored = false;
this.isCompressed = false;
this.isIndexed = true;
this.isTokenized = true;
@ -470,7 +453,6 @@ public final class Field extends AbstractField implements Fieldable, Serializabl
this.tokenStream = tokenStream;
this.isStored = false;
this.isCompressed = false;
this.isIndexed = true;
this.isTokenized = true;
@ -515,11 +497,6 @@ public final class Field extends AbstractField implements Fieldable, Serializabl
if (store == Store.YES) {
isStored = true;
isCompressed = false;
}
else if (store == Store.COMPRESS) {
isStored = true;
isCompressed = true;
}
else if (store == Store.NO)
throw new IllegalArgumentException("binary values can't be unstored");

View File

@ -54,17 +54,6 @@ public final class FieldSelectorResult implements Serializable {
* {@link Document#add(Fieldable)} should be called by the Reader.
*/
public transient static final FieldSelectorResult LOAD_AND_BREAK = new FieldSelectorResult(3);
/**
* Behaves much like {@link #LOAD} but does not uncompress any compressed data. This is used for internal purposes.
* {@link Document#getField(String)} and {@link Document#getFieldable(String)} should not return null.
* <p/>
* {@link Document#add(Fieldable)} should be called by
* the Reader.
* @deprecated This is an internal option only, and is
* no longer needed now that {@link CompressionTools}
* is used for field compression.
*/
public transient static final FieldSelectorResult LOAD_FOR_MERGE = new FieldSelectorResult(4);
/** Expert: Load the size of this {@link Field} rather than its value.
* Size is measured as number of bytes required to store the field == bytes for a binary or any compressed value, and 2*chars for a String value.

View File

@ -113,9 +113,6 @@ public interface Fieldable extends Serializable {
Reader-valued. */
boolean isTokenized();
/** True if the value of the field is stored and compressed within the index */
boolean isCompressed();
/** True if the term or terms used to index this field are stored as a term
* vector, available from {@link org.apache.lucene.index.IndexReader#getTermFreqVector(int,String)}.
* These methods do not provide access to the original content of the field,

View File

@ -24,11 +24,9 @@ import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.BufferedIndexInput;
import org.apache.lucene.util.CloseableThreadLocal;
import org.apache.lucene.util.StringHelper;
import java.io.IOException;
import java.io.Reader;
import java.util.zip.DataFormatException;
/**
* Class responsible for access to stored document fields.
@ -216,35 +214,31 @@ final class FieldsReader implements Cloneable {
FieldSelectorResult acceptField = fieldSelector == null ? FieldSelectorResult.LOAD : fieldSelector.accept(fi.name);
byte bits = fieldsStream.readByte();
assert bits <= FieldsWriter.FIELD_IS_COMPRESSED + FieldsWriter.FIELD_IS_TOKENIZED + FieldsWriter.FIELD_IS_BINARY;
assert bits <= FieldsWriter.FIELD_IS_TOKENIZED + FieldsWriter.FIELD_IS_BINARY;
boolean compressed = (bits & FieldsWriter.FIELD_IS_COMPRESSED) != 0;
boolean tokenize = (bits & FieldsWriter.FIELD_IS_TOKENIZED) != 0;
boolean binary = (bits & FieldsWriter.FIELD_IS_BINARY) != 0;
//TODO: Find an alternative approach here if this list continues to grow beyond the
//list of 5 or 6 currently here. See Lucene 762 for discussion
if (acceptField.equals(FieldSelectorResult.LOAD)) {
addField(doc, fi, binary, compressed, tokenize);
}
else if (acceptField.equals(FieldSelectorResult.LOAD_FOR_MERGE)) {
addFieldForMerge(doc, fi, binary, compressed, tokenize);
addField(doc, fi, binary, tokenize);
}
else if (acceptField.equals(FieldSelectorResult.LOAD_AND_BREAK)){
addField(doc, fi, binary, compressed, tokenize);
addField(doc, fi, binary, tokenize);
break;//Get out of this loop
}
else if (acceptField.equals(FieldSelectorResult.LAZY_LOAD)) {
addFieldLazy(doc, fi, binary, compressed, tokenize);
addFieldLazy(doc, fi, binary, tokenize);
}
else if (acceptField.equals(FieldSelectorResult.SIZE)){
skipField(binary, compressed, addFieldSize(doc, fi, binary, compressed));
skipField(binary, addFieldSize(doc, fi, binary));
}
else if (acceptField.equals(FieldSelectorResult.SIZE_AND_BREAK)){
addFieldSize(doc, fi, binary, compressed);
addFieldSize(doc, fi, binary);
break;
}
else {
skipField(binary, compressed);
skipField(binary);
}
}
@ -281,12 +275,12 @@ final class FieldsReader implements Cloneable {
* Skip the field. We still have to read some of the information about the field, but can skip past the actual content.
* This will have the most payoff on large fields.
*/
private void skipField(boolean binary, boolean compressed) throws IOException {
skipField(binary, compressed, fieldsStream.readVInt());
private void skipField(boolean binary) throws IOException {
skipField(binary, fieldsStream.readVInt());
}
private void skipField(boolean binary, boolean compressed, int toRead) throws IOException {
if (format >= FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES || binary || compressed) {
private void skipField(boolean binary, int toRead) throws IOException {
if (format >= FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES || binary) {
fieldsStream.seek(fieldsStream.getFilePointer() + toRead);
} else {
// We need to skip chars. This will slow us down, but still better
@ -294,17 +288,12 @@ final class FieldsReader implements Cloneable {
}
}
private void addFieldLazy(Document doc, FieldInfo fi, boolean binary, boolean compressed, boolean tokenize) throws IOException {
private void addFieldLazy(Document doc, FieldInfo fi, boolean binary, boolean tokenize) throws IOException {
if (binary) {
int toRead = fieldsStream.readVInt();
long pointer = fieldsStream.getFilePointer();
if (compressed) {
//was: doc.add(new Fieldable(fi.name, uncompress(b), Fieldable.Store.COMPRESS));
doc.add(new LazyField(fi.name, Field.Store.COMPRESS, toRead, pointer, binary));
} else {
//was: doc.add(new Fieldable(fi.name, b, Fieldable.Store.YES));
doc.add(new LazyField(fi.name, Field.Store.YES, toRead, pointer, binary));
}
//was: doc.add(new Fieldable(fi.name, b, Fieldable.Store.YES));
doc.add(new LazyField(fi.name, Field.Store.YES, toRead, pointer, binary));
//Need to move the pointer ahead by toRead positions
fieldsStream.seek(pointer + toRead);
} else {
@ -313,89 +302,43 @@ final class FieldsReader implements Cloneable {
Field.TermVector termVector = getTermVectorType(fi);
AbstractField f;
if (compressed) {
store = Field.Store.COMPRESS;
int toRead = fieldsStream.readVInt();
long pointer = fieldsStream.getFilePointer();
f = new LazyField(fi.name, store, toRead, pointer, binary);
//skip over the part that we aren't loading
fieldsStream.seek(pointer + toRead);
f.setOmitNorms(fi.omitNorms);
f.setOmitTermFreqAndPositions(fi.omitTermFreqAndPositions);
} else {
int length = fieldsStream.readVInt();
long pointer = fieldsStream.getFilePointer();
//Skip ahead of where we are by the length of what is stored
if (format >= FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES)
fieldsStream.seek(pointer+length);
else
fieldsStream.skipChars(length);
f = new LazyField(fi.name, store, index, termVector, length, pointer, binary);
f.setOmitNorms(fi.omitNorms);
f.setOmitTermFreqAndPositions(fi.omitTermFreqAndPositions);
}
int length = fieldsStream.readVInt();
long pointer = fieldsStream.getFilePointer();
//Skip ahead of where we are by the length of what is stored
if (format >= FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES)
fieldsStream.seek(pointer+length);
else
fieldsStream.skipChars(length);
f = new LazyField(fi.name, store, index, termVector, length, pointer, binary);
f.setOmitNorms(fi.omitNorms);
f.setOmitTermFreqAndPositions(fi.omitTermFreqAndPositions);
doc.add(f);
}
}
// in merge mode we don't uncompress the data of a compressed field
private void addFieldForMerge(Document doc, FieldInfo fi, boolean binary, boolean compressed, boolean tokenize) throws IOException {
Object data;
if (binary || compressed) {
int toRead = fieldsStream.readVInt();
final byte[] b = new byte[toRead];
fieldsStream.readBytes(b, 0, b.length);
data = b;
} else {
data = fieldsStream.readString();
}
doc.add(new FieldForMerge(data, fi, binary, compressed, tokenize));
}
private void addField(Document doc, FieldInfo fi, boolean binary, boolean compressed, boolean tokenize) throws CorruptIndexException, IOException {
private void addField(Document doc, FieldInfo fi, boolean binary, boolean tokenize) throws CorruptIndexException, IOException {
//we have a binary stored field, and it may be compressed
if (binary) {
int toRead = fieldsStream.readVInt();
final byte[] b = new byte[toRead];
fieldsStream.readBytes(b, 0, b.length);
if (compressed)
doc.add(new Field(fi.name, uncompress(b), Field.Store.COMPRESS));
else
doc.add(new Field(fi.name, b, Field.Store.YES));
doc.add(new Field(fi.name, b, Field.Store.YES));
} else {
Field.Store store = Field.Store.YES;
Field.Index index = getIndexType(fi, tokenize);
Field.TermVector termVector = getTermVectorType(fi);
AbstractField f;
if (compressed) {
store = Field.Store.COMPRESS;
int toRead = fieldsStream.readVInt();
final byte[] b = new byte[toRead];
fieldsStream.readBytes(b, 0, b.length);
f = new Field(fi.name, // field name
false,
new String(uncompress(b), "UTF-8"), // uncompress the value and add as string
store,
index,
termVector);
f.setOmitTermFreqAndPositions(fi.omitTermFreqAndPositions);
f.setOmitNorms(fi.omitNorms);
} else {
f = new Field(fi.name, // name
false,
fieldsStream.readString(), // read value
store,
index,
termVector);
f.setOmitTermFreqAndPositions(fi.omitTermFreqAndPositions);
f.setOmitNorms(fi.omitNorms);
}
f = new Field(fi.name, // name
false,
fieldsStream.readString(), // read value
store,
index,
termVector);
f.setOmitTermFreqAndPositions(fi.omitTermFreqAndPositions);
f.setOmitNorms(fi.omitNorms);
doc.add(f);
}
}
@ -403,8 +346,8 @@ final class FieldsReader implements Cloneable {
// Add the size of field as a byte[] containing the 4 bytes of the integer byte size (high order byte first; char = 2 bytes)
// Read just the size -- caller must skip the field content to continue reading fields
// Return the size in bytes or chars, depending on field type
private int addFieldSize(Document doc, FieldInfo fi, boolean binary, boolean compressed) throws IOException {
int size = fieldsStream.readVInt(), bytesize = binary || compressed ? size : 2*size;
private int addFieldSize(Document doc, FieldInfo fi, boolean binary) throws IOException {
int size = fieldsStream.readVInt(), bytesize = binary ? size : 2*size;
byte[] sizebytes = new byte[4];
sizebytes[0] = (byte) (bytesize>>>24);
sizebytes[1] = (byte) (bytesize>>>16);
@ -517,21 +460,15 @@ final class FieldsReader implements Cloneable {
IndexInput localFieldsStream = getFieldStream();
try {
localFieldsStream.seek(pointer);
if (isCompressed) {
final byte[] b = new byte[toRead];
localFieldsStream.readBytes(b, 0, b.length);
fieldsData = new String(uncompress(b), "UTF-8");
if (format >= FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES) {
byte[] bytes = new byte[toRead];
localFieldsStream.readBytes(bytes, 0, toRead);
fieldsData = new String(bytes, "UTF-8");
} else {
if (format >= FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES) {
byte[] bytes = new byte[toRead];
localFieldsStream.readBytes(bytes, 0, toRead);
fieldsData = new String(bytes, "UTF-8");
} else {
//read in chars b/c we already know the length we need to read
char[] chars = new char[toRead];
localFieldsStream.readChars(chars, 0, toRead);
fieldsData = new String(chars);
}
//read in chars b/c we already know the length we need to read
char[] chars = new char[toRead];
localFieldsStream.readChars(chars, 0, toRead);
fieldsData = new String(chars);
}
} catch (IOException e) {
throw new FieldReaderException(e);
@ -580,11 +517,7 @@ final class FieldsReader implements Cloneable {
try {
localFieldsStream.seek(pointer);
localFieldsStream.readBytes(b, 0, toRead);
if (isCompressed == true) {
fieldsData = uncompress(b);
} else {
fieldsData = b;
}
fieldsData = b;
} catch (IOException e) {
throw new FieldReaderException(e);
}
@ -598,58 +531,4 @@ final class FieldsReader implements Cloneable {
return null;
}
}
private byte[] uncompress(byte[] b)
throws CorruptIndexException {
try {
return CompressionTools.decompress(b);
} catch (DataFormatException e) {
// this will happen if the field is not compressed
CorruptIndexException newException = new CorruptIndexException("field data are in wrong format: " + e.toString());
newException.initCause(e);
throw newException;
}
}
// Instances of this class hold field properties and data
// for merge
final static class FieldForMerge extends AbstractField {
public String stringValue() {
return (String) this.fieldsData;
}
public Reader readerValue() {
// not needed for merge
return null;
}
public byte[] binaryValue() {
return (byte[]) this.fieldsData;
}
public TokenStream tokenStreamValue() {
// not needed for merge
return null;
}
public FieldForMerge(Object value, FieldInfo fi, boolean binary, boolean compressed, boolean tokenize) {
this.isStored = true;
this.fieldsData = value;
this.isCompressed = compressed;
this.isBinary = binary;
if (binary)
binaryLength = ((byte[]) value).length;
this.isTokenized = tokenize;
this.name = StringHelper.intern(fi.name);
this.isIndexed = fi.isIndexed;
this.omitNorms = fi.omitNorms;
this.omitTermFreqAndPositions = fi.omitTermFreqAndPositions;
this.storeOffsetWithTermVector = fi.storeOffsetWithTermVector;
this.storePositionWithTermVector = fi.storePositionWithTermVector;
this.storeTermVector = fi.storeTermVector;
}
}
}

View File

@ -21,7 +21,6 @@ import java.util.Iterator;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.document.CompressionTools;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMOutputStream;
import org.apache.lucene.store.IndexOutput;
@ -31,7 +30,6 @@ final class FieldsWriter
{
static final byte FIELD_IS_TOKENIZED = 0x1;
static final byte FIELD_IS_BINARY = 0x2;
static final byte FIELD_IS_COMPRESSED = 0x4;
// Original format
static final int FORMAT = 0;
@ -172,64 +170,28 @@ final class FieldsWriter
}
final void writeField(FieldInfo fi, Fieldable field) throws IOException {
// if the field as an instanceof FieldsReader.FieldForMerge, we're in merge mode
// and field.binaryValue() already returns the compressed value for a field
// with isCompressed()==true, so we disable compression in that case
boolean disableCompression = (field instanceof FieldsReader.FieldForMerge);
fieldsStream.writeVInt(fi.number);
byte bits = 0;
if (field.isTokenized())
bits |= FieldsWriter.FIELD_IS_TOKENIZED;
if (field.isBinary())
bits |= FieldsWriter.FIELD_IS_BINARY;
if (field.isCompressed())
bits |= FieldsWriter.FIELD_IS_COMPRESSED;
fieldsStream.writeByte(bits);
if (field.isCompressed()) {
// compression is enabled for the current field
if (field.isBinary()) {
final byte[] data;
final int len;
final int offset;
if (disableCompression) {
// optimized case for merging, the data
// is already compressed
data = field.getBinaryValue();
assert data != null;
len = field.getBinaryLength();
offset = field.getBinaryOffset();
} else {
// check if it is a binary field
if (field.isBinary()) {
data = CompressionTools.compress(field.getBinaryValue(), field.getBinaryOffset(), field.getBinaryLength());
} else {
byte x[] = field.stringValue().getBytes("UTF-8");
data = CompressionTools.compress(x, 0, x.length);
}
len = data.length;
offset = 0;
}
data = field.getBinaryValue();
len = field.getBinaryLength();
offset = field.getBinaryOffset();
fieldsStream.writeVInt(len);
fieldsStream.writeBytes(data, offset, len);
}
else {
// compression is disabled for the current field
if (field.isBinary()) {
final byte[] data;
final int len;
final int offset;
data = field.getBinaryValue();
len = field.getBinaryLength();
offset = field.getBinaryOffset();
fieldsStream.writeVInt(len);
fieldsStream.writeBytes(data, offset, len);
}
else {
fieldsStream.writeString(field.stringValue());
}
fieldsStream.writeString(field.stringValue());
}
}

View File

@ -24,8 +24,6 @@ import java.util.Iterator;
import java.util.List;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.FieldSelector;
import org.apache.lucene.document.FieldSelectorResult;
import org.apache.lucene.index.IndexReader.FieldOption;
import org.apache.lucene.index.MergePolicy.MergeAbortedException;
import org.apache.lucene.store.Directory;
@ -321,15 +319,6 @@ final class SegmentMerger {
setMatchingSegmentReaders();
if (mergeDocStores) {
// for merging we don't want to compress/uncompress the data, so to tell the FieldsReader that we're
// in merge mode, we use this FieldSelector
FieldSelector fieldSelectorMerge = new FieldSelector() {
public FieldSelectorResult accept(String fieldName) {
return FieldSelectorResult.LOAD_FOR_MERGE;
}
};
// merge field values
final FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos);
@ -346,10 +335,10 @@ final class SegmentMerger {
}
}
if (reader.hasDeletions()) {
docCount += copyFieldsWithDeletions(fieldSelectorMerge, fieldsWriter,
docCount += copyFieldsWithDeletions(fieldsWriter,
reader, matchingFieldsReader);
} else {
docCount += copyFieldsNoDeletions(fieldSelectorMerge, fieldsWriter,
docCount += copyFieldsNoDeletions(fieldsWriter,
reader, matchingFieldsReader);
}
}
@ -379,8 +368,7 @@ final class SegmentMerger {
return docCount;
}
private int copyFieldsWithDeletions(final FieldSelector fieldSelectorMerge,
final FieldsWriter fieldsWriter, final IndexReader reader,
private int copyFieldsWithDeletions(final FieldsWriter fieldsWriter, final IndexReader reader,
final FieldsReader matchingFieldsReader)
throws IOException, MergeAbortedException, CorruptIndexException {
int docCount = 0;
@ -419,7 +407,7 @@ final class SegmentMerger {
}
// NOTE: it's very important to first assign to doc then pass it to
// termVectorsWriter.addAllDocVectors; see LUCENE-1282
Document doc = reader.document(j, fieldSelectorMerge);
Document doc = reader.document(j);
fieldsWriter.addDocument(doc);
docCount++;
checkAbort.work(300);
@ -428,8 +416,7 @@ final class SegmentMerger {
return docCount;
}
private int copyFieldsNoDeletions(FieldSelector fieldSelectorMerge,
final FieldsWriter fieldsWriter, final IndexReader reader,
private int copyFieldsNoDeletions(final FieldsWriter fieldsWriter, final IndexReader reader,
final FieldsReader matchingFieldsReader)
throws IOException, MergeAbortedException, CorruptIndexException {
final int maxDoc = reader.maxDoc();
@ -447,7 +434,7 @@ final class SegmentMerger {
for (; docCount < maxDoc; docCount++) {
// NOTE: it's very important to first assign to doc then pass it to
// termVectorsWriter.addAllDocVectors; see LUCENE-1282
Document doc = reader.document(docCount, fieldSelectorMerge);
Document doc = reader.document(docCount);
fieldsWriter.addDocument(doc);
checkAbort.work(300);
}

View File

@ -37,13 +37,11 @@ public class TestBinaryDocument extends LuceneTestCase
throws Exception
{
Fieldable binaryFldStored = new Field("binaryStored", binaryValStored.getBytes(), Field.Store.YES);
Fieldable binaryFldCompressed = new Field("binaryCompressed", binaryValCompressed.getBytes(), Field.Store.COMPRESS);
Fieldable stringFldStored = new Field("stringStored", binaryValStored, Field.Store.YES, Field.Index.NO, Field.TermVector.NO);
Fieldable stringFldCompressed = new Field("stringCompressed", binaryValCompressed, Field.Store.COMPRESS, Field.Index.NO, Field.TermVector.NO);
try {
// binary fields with store off are not allowed
new Field("fail", binaryValCompressed.getBytes(), Field.Store.NO);
new Field("fail", binaryValStored.getBytes(), Field.Store.NO);
fail();
}
catch (IllegalArgumentException iae) {
@ -53,13 +51,11 @@ public class TestBinaryDocument extends LuceneTestCase
Document doc = new Document();
doc.add(binaryFldStored);
doc.add(binaryFldCompressed);
doc.add(stringFldStored);
doc.add(stringFldCompressed);
/** test for field count */
assertEquals(4, doc.fields.size());
assertEquals(2, doc.fields.size());
/** add the doc to a ram index */
MockRAMDirectory dir = new MockRAMDirectory();
@ -76,18 +72,10 @@ public class TestBinaryDocument extends LuceneTestCase
String binaryFldStoredTest = new String(docFromReader.getBinaryValue("binaryStored"));
assertTrue(binaryFldStoredTest.equals(binaryValStored));
/** fetch the binary compressed field and compare it's content with the original one */
String binaryFldCompressedTest = new String(docFromReader.getBinaryValue("binaryCompressed"));
assertTrue(binaryFldCompressedTest.equals(binaryValCompressed));
/** fetch the string field and compare it's content with the original one */
String stringFldStoredTest = docFromReader.get("stringStored");
assertTrue(stringFldStoredTest.equals(binaryValStored));
/** fetch the compressed string field and compare it's content with the original one */
String stringFldCompressedTest = docFromReader.get("stringCompressed");
assertTrue(stringFldCompressedTest.equals(binaryValCompressed));
/** delete the document from index */
reader.deleteDocument(0);
assertEquals(0, reader.numDocs());

View File

@ -42,13 +42,6 @@ class DocHelper {
public static final String TEXT_FIELD_2_KEY = "textField2";
public static Field textField2 = new Field(TEXT_FIELD_2_KEY, FIELD_2_TEXT, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
public static final String FIELD_2_COMPRESSED_TEXT = "field field field two text";
//Fields will be lexicographically sorted. So, the order is: field, text, two
public static final int [] COMPRESSED_FIELD_2_FREQS = {3, 1, 1};
public static final String COMPRESSED_TEXT_FIELD_2_KEY = "compressedTextField2";
public static Field compressedTextField2 = new Field(COMPRESSED_TEXT_FIELD_2_KEY, FIELD_2_COMPRESSED_TEXT, Field.Store.COMPRESS, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
public static final String FIELD_3_TEXT = "aaaNoNorms aaaNoNorms bbbNoNorms";
public static final String TEXT_FIELD_3_KEY = "textField3";
public static Field textField3 = new Field(TEXT_FIELD_3_KEY, FIELD_3_TEXT, Field.Store.YES, Field.Index.ANALYZED);
@ -124,7 +117,6 @@ class DocHelper {
textField1,
textField2,
textField3,
compressedTextField2,
keyField,
noNormsField,
noTFField,
@ -193,7 +185,6 @@ class DocHelper {
nameValues = new HashMap();
nameValues.put(TEXT_FIELD_1_KEY, FIELD_1_TEXT);
nameValues.put(TEXT_FIELD_2_KEY, FIELD_2_TEXT);
nameValues.put(COMPRESSED_TEXT_FIELD_2_KEY, FIELD_2_COMPRESSED_TEXT);
nameValues.put(TEXT_FIELD_3_KEY, FIELD_3_TEXT);
nameValues.put(KEYWORD_FIELD_KEY, KEYWORD_TEXT);
nameValues.put(NO_NORMS_KEY, NO_NORMS_TEXT);

View File

@ -108,7 +108,6 @@ public class TestFieldsReader extends LuceneTestCase {
lazyFieldNames.add(DocHelper.LAZY_FIELD_KEY);
lazyFieldNames.add(DocHelper.LAZY_FIELD_BINARY_KEY);
lazyFieldNames.add(DocHelper.TEXT_FIELD_UTF2_KEY);
lazyFieldNames.add(DocHelper.COMPRESSED_TEXT_FIELD_2_KEY);
SetBasedFieldSelector fieldSelector = new SetBasedFieldSelector(loadFieldNames, lazyFieldNames);
Document doc = reader.doc(0, fieldSelector);
assertTrue("doc is null and it shouldn't be", doc != null);
@ -118,13 +117,6 @@ public class TestFieldsReader extends LuceneTestCase {
String value = field.stringValue();
assertTrue("value is null and it shouldn't be", value != null);
assertTrue(value + " is not equal to " + DocHelper.LAZY_FIELD_TEXT, value.equals(DocHelper.LAZY_FIELD_TEXT) == true);
field = doc.getFieldable(DocHelper.COMPRESSED_TEXT_FIELD_2_KEY);
assertTrue("field is null and it shouldn't be", field != null);
assertTrue("field is not lazy and it should be", field.isLazy());
assertTrue("binaryValue isn't null for lazy string field", field.binaryValue() == null);
value = field.stringValue();
assertTrue("value is null and it shouldn't be", value != null);
assertTrue(value + " is not equal to " + DocHelper.FIELD_2_COMPRESSED_TEXT, value.equals(DocHelper.FIELD_2_COMPRESSED_TEXT) == true);
field = doc.getFieldable(DocHelper.TEXT_FIELD_1_KEY);
assertTrue("field is null and it shouldn't be", field != null);
assertTrue("Field is lazy and it should not be", field.isLazy() == false);
@ -165,7 +157,6 @@ public class TestFieldsReader extends LuceneTestCase {
lazyFieldNames.add(DocHelper.LAZY_FIELD_KEY);
lazyFieldNames.add(DocHelper.LAZY_FIELD_BINARY_KEY);
lazyFieldNames.add(DocHelper.TEXT_FIELD_UTF2_KEY);
lazyFieldNames.add(DocHelper.COMPRESSED_TEXT_FIELD_2_KEY);
SetBasedFieldSelector fieldSelector = new SetBasedFieldSelector(loadFieldNames, lazyFieldNames);
Document doc = reader.doc(0, fieldSelector);
assertTrue("doc is null and it shouldn't be", doc != null);
@ -281,7 +272,6 @@ public class TestFieldsReader extends LuceneTestCase {
doc = reader.doc(0, new FieldSelector(){
public FieldSelectorResult accept(String fieldName) {
if (fieldName.equals(DocHelper.TEXT_FIELD_1_KEY) ||
fieldName.equals(DocHelper.COMPRESSED_TEXT_FIELD_2_KEY) ||
fieldName.equals(DocHelper.LAZY_FIELD_BINARY_KEY))
return FieldSelectorResult.SIZE;
else if (fieldName.equals(DocHelper.TEXT_FIELD_3_KEY))

View File

@ -365,7 +365,6 @@ public class TestIndexReader extends LuceneTestCase
writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false, IndexWriter.MaxFieldLength.LIMITED);
Document doc = new Document();
doc.add(new Field("bin1", bin, Field.Store.YES));
doc.add(new Field("bin2", bin, Field.Store.COMPRESS));
doc.add(new Field("junk", "junk text", Field.Store.NO, Field.Index.ANALYZED));
writer.addDocument(doc);
writer.close();
@ -381,16 +380,6 @@ public class TestIndexReader extends LuceneTestCase
for (int i = 0; i < bin.length; i++) {
assertEquals(bin[i], data1[i + b1.getBinaryOffset()]);
}
fields = doc.getFields("bin2");
assertNotNull(fields);
assertEquals(1, fields.length);
b1 = fields[0];
assertTrue(b1.isBinary());
data1 = b1.getBinaryValue();
assertEquals(bin.length, b1.getBinaryLength());
for (int i = 0; i < bin.length; i++) {
assertEquals(bin[i], data1[i + b1.getBinaryOffset()]);
}
Set lazyFields = new HashSet();
lazyFields.add("bin1");
FieldSelector sel = new SetBasedFieldSelector(new HashSet(), lazyFields);
@ -425,16 +414,6 @@ public class TestIndexReader extends LuceneTestCase
for (int i = 0; i < bin.length; i++) {
assertEquals(bin[i], data1[i + b1.getBinaryOffset()]);
}
fields = doc.getFields("bin2");
assertNotNull(fields);
assertEquals(1, fields.length);
b1 = fields[0];
assertTrue(b1.isBinary());
data1 = b1.getBinaryValue();
assertEquals(bin.length, b1.getBinaryLength());
for (int i = 0; i < bin.length; i++) {
assertEquals(bin[i], data1[i + b1.getBinaryOffset()]);
}
reader.close();
}

View File

@ -3964,48 +3964,6 @@ public class TestIndexWriter extends BaseTokenStreamTestCase {
dir.close();
}
// LUCENE-1374
public void testMergeCompressedFields() throws IOException {
File indexDir = new File(System.getProperty("tempDir"), "mergecompressedfields");
Directory dir = FSDirectory.open(indexDir);
try {
for(int i=0;i<5;i++) {
// Must make a new writer & doc each time, w/
// different fields, so bulk merge of stored fields
// cannot run:
IndexWriter w = new IndexWriter(dir, new WhitespaceAnalyzer(), i==0, IndexWriter.MaxFieldLength.UNLIMITED);
try {
w.setMergeFactor(5);
w.setMergeScheduler(new SerialMergeScheduler());
Document doc = new Document();
doc.add(new Field("test1", "this is some data that will be compressed this this this", Field.Store.COMPRESS, Field.Index.NO));
doc.add(new Field("test2", new byte[20], Field.Store.COMPRESS));
doc.add(new Field("field" + i, "random field", Field.Store.NO, Field.Index.ANALYZED));
w.addDocument(doc);
} finally {
w.close();
}
}
byte[] cmp = new byte[20];
IndexReader r = IndexReader.open(dir, true);
try {
for(int i=0;i<5;i++) {
Document doc = r.document(i);
assertEquals("this is some data that will be compressed this this this", doc.getField("test1").stringValue());
byte[] b = doc.getField("test2").binaryValue();
assertTrue(Arrays.equals(b, cmp));
}
} finally {
r.close();
}
} finally {
dir.close();
_TestUtil.rmDir(indexDir);
}
}
// LUCENE-1382
public void testCommitUserData() throws IOException {
Directory dir = new MockRAMDirectory();

View File

@ -87,7 +87,7 @@ public class TestSegmentMerger extends LuceneTestCase {
Collection stored = mergedReader.getFieldNames(IndexReader.FieldOption.INDEXED_WITH_TERMVECTOR);
assertTrue(stored != null);
//System.out.println("stored size: " + stored.size());
assertTrue("We do not have 4 fields that were indexed with term vector",stored.size() == 4);
assertTrue("We do not have 3 fields that were indexed with term vector",stored.size() == 3);
TermFreqVector vector = mergedReader.getTermFreqVector(0, DocHelper.TEXT_FIELD_2_KEY);
assertTrue(vector != null);

View File

@ -200,6 +200,6 @@ public class TestSegmentReader extends LuceneTestCase {
TermFreqVector [] results = reader.getTermFreqVectors(0);
assertTrue(results != null);
assertTrue("We do not have 4 term freq vectors, we have: " + results.length, results.length == 4);
assertTrue("We do not have 3 term freq vectors, we have: " + results.length, results.length == 3);
}
}