mirror of https://github.com/apache/lucene.git
LUCENE-1960: Remove deprecated Field.Store.COMPRESS.
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@822978 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
39b4a0e4dc
commit
6be57e324e
|
@ -34,6 +34,8 @@ API Changes
|
||||||
* LUCENE-1957: Remove Filter.bits(IndexReader) method and make
|
* LUCENE-1957: Remove Filter.bits(IndexReader) method and make
|
||||||
Filter.getDocIdSet(IndexReader) abstract. (Michael Busch)
|
Filter.getDocIdSet(IndexReader) abstract. (Michael Busch)
|
||||||
|
|
||||||
|
* LUCENE-1960: Remove deprecated Field.Store.COMPRESS. (Michael Busch)
|
||||||
|
|
||||||
Bug fixes
|
Bug fixes
|
||||||
|
|
||||||
New features
|
New features
|
||||||
|
|
|
@ -42,7 +42,7 @@
|
||||||
<property name="Name" value="Lucene"/>
|
<property name="Name" value="Lucene"/>
|
||||||
<property name="dev.version" value="3.0-dev"/>
|
<property name="dev.version" value="3.0-dev"/>
|
||||||
<property name="version" value="${dev.version}"/>
|
<property name="version" value="${dev.version}"/>
|
||||||
<property name="compatibility.tag" value="lucene_2_9_back_compat_tests_20091007b"/>
|
<property name="compatibility.tag" value="lucene_2_9_back_compat_tests_20091007c"/>
|
||||||
<property name="spec.version" value="${version}"/>
|
<property name="spec.version" value="${version}"/>
|
||||||
<property name="year" value="2000-${current.year}"/>
|
<property name="year" value="2000-${current.year}"/>
|
||||||
<property name="final.name" value="lucene-${name}-${version}"/>
|
<property name="final.name" value="lucene-${name}-${version}"/>
|
||||||
|
|
|
@ -33,7 +33,6 @@ class FieldSetting implements Serializable {
|
||||||
boolean stored = false;
|
boolean stored = false;
|
||||||
boolean indexed = false;
|
boolean indexed = false;
|
||||||
boolean tokenized = false;
|
boolean tokenized = false;
|
||||||
boolean compressed = false;
|
|
||||||
|
|
||||||
FieldSetting() {
|
FieldSetting() {
|
||||||
}
|
}
|
||||||
|
|
|
@ -44,9 +44,6 @@ class FieldSettings implements Serializable {
|
||||||
if (fieldSetting.stored) {
|
if (fieldSetting.stored) {
|
||||||
setting.stored = true;
|
setting.stored = true;
|
||||||
}
|
}
|
||||||
if (fieldSetting.compressed) {
|
|
||||||
setting.compressed = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
if ("b3".equals(fieldSetting.fieldName)) {
|
if ("b3".equals(fieldSetting.fieldName)) {
|
||||||
System.currentTimeMillis();
|
System.currentTimeMillis();
|
||||||
|
|
|
@ -480,9 +480,6 @@ public class InstantiatedIndexWriter {
|
||||||
if (field.isTokenized()) {
|
if (field.isTokenized()) {
|
||||||
fieldSetting.tokenized = true;
|
fieldSetting.tokenized = true;
|
||||||
}
|
}
|
||||||
if (field.isCompressed()) {
|
|
||||||
fieldSetting.compressed = true;
|
|
||||||
}
|
|
||||||
if (field.isStored()) {
|
if (field.isStored()) {
|
||||||
fieldSetting.stored = true;
|
fieldSetting.stored = true;
|
||||||
}
|
}
|
||||||
|
|
|
@ -36,7 +36,6 @@ public abstract class AbstractField implements Fieldable {
|
||||||
protected boolean isIndexed = true;
|
protected boolean isIndexed = true;
|
||||||
protected boolean isTokenized = true;
|
protected boolean isTokenized = true;
|
||||||
protected boolean isBinary = false;
|
protected boolean isBinary = false;
|
||||||
protected boolean isCompressed = false;
|
|
||||||
protected boolean lazy = false;
|
protected boolean lazy = false;
|
||||||
protected boolean omitTermFreqAndPositions = false;
|
protected boolean omitTermFreqAndPositions = false;
|
||||||
protected float boost = 1.0f;
|
protected float boost = 1.0f;
|
||||||
|
@ -59,15 +58,9 @@ public abstract class AbstractField implements Fieldable {
|
||||||
|
|
||||||
if (store == Field.Store.YES){
|
if (store == Field.Store.YES){
|
||||||
this.isStored = true;
|
this.isStored = true;
|
||||||
this.isCompressed = false;
|
|
||||||
}
|
|
||||||
else if (store == Field.Store.COMPRESS) {
|
|
||||||
this.isStored = true;
|
|
||||||
this.isCompressed = true;
|
|
||||||
}
|
}
|
||||||
else if (store == Field.Store.NO){
|
else if (store == Field.Store.NO){
|
||||||
this.isStored = false;
|
this.isStored = false;
|
||||||
this.isCompressed = false;
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
throw new IllegalArgumentException("unknown store parameter " + store);
|
throw new IllegalArgumentException("unknown store parameter " + store);
|
||||||
|
@ -189,9 +182,6 @@ public abstract class AbstractField implements Fieldable {
|
||||||
Reader-valued. */
|
Reader-valued. */
|
||||||
public final boolean isTokenized() { return isTokenized; }
|
public final boolean isTokenized() { return isTokenized; }
|
||||||
|
|
||||||
/** True if the value of the field is stored and compressed within the index */
|
|
||||||
public final boolean isCompressed() { return isCompressed; }
|
|
||||||
|
|
||||||
/** True iff the term or terms used to index this field are stored as a term
|
/** True iff the term or terms used to index this field are stored as a term
|
||||||
* vector, available from {@link org.apache.lucene.index.IndexReader#getTermFreqVector(int,String)}.
|
* vector, available from {@link org.apache.lucene.index.IndexReader#getTermFreqVector(int,String)}.
|
||||||
* These methods do not provide access to the original content of the field,
|
* These methods do not provide access to the original content of the field,
|
||||||
|
@ -248,10 +238,7 @@ public abstract class AbstractField implements Fieldable {
|
||||||
*/
|
*/
|
||||||
public int getBinaryLength() {
|
public int getBinaryLength() {
|
||||||
if (isBinary) {
|
if (isBinary) {
|
||||||
if (!isCompressed)
|
|
||||||
return binaryLength;
|
return binaryLength;
|
||||||
else
|
|
||||||
return ((byte[]) fieldsData).length;
|
|
||||||
} else if (fieldsData instanceof byte[])
|
} else if (fieldsData instanceof byte[])
|
||||||
return ((byte[]) fieldsData).length;
|
return ((byte[]) fieldsData).length;
|
||||||
else
|
else
|
||||||
|
@ -308,10 +295,6 @@ public abstract class AbstractField implements Fieldable {
|
||||||
StringBuilder result = new StringBuilder();
|
StringBuilder result = new StringBuilder();
|
||||||
if (isStored) {
|
if (isStored) {
|
||||||
result.append("stored");
|
result.append("stored");
|
||||||
if (isCompressed)
|
|
||||||
result.append("/compressed");
|
|
||||||
else
|
|
||||||
result.append("/uncompressed");
|
|
||||||
}
|
}
|
||||||
if (isIndexed) {
|
if (isIndexed) {
|
||||||
if (result.length() > 0)
|
if (result.length() > 0)
|
||||||
|
|
|
@ -26,9 +26,7 @@ import org.apache.lucene.util.UnicodeUtil;
|
||||||
/** Simple utility class providing static methods to
|
/** Simple utility class providing static methods to
|
||||||
* compress and decompress binary data for stored fields.
|
* compress and decompress binary data for stored fields.
|
||||||
* This class uses java.util.zip.Deflater and Inflater
|
* This class uses java.util.zip.Deflater and Inflater
|
||||||
* classes to compress and decompress, which is the same
|
* classes to compress and decompress.
|
||||||
* format previously used by the now deprecated
|
|
||||||
* Field.Store.COMPRESS.
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public class CompressionTools {
|
public class CompressionTools {
|
||||||
|
|
|
@ -42,16 +42,6 @@ public final class Field extends AbstractField implements Fieldable, Serializabl
|
||||||
super(name);
|
super(name);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Store the original field value in the index in a compressed form. This is
|
|
||||||
* useful for long documents and for binary valued fields.
|
|
||||||
* @deprecated Please use {@link CompressionTools} instead.
|
|
||||||
* For string fields that were previously indexed and stored using compression,
|
|
||||||
* the new way to achieve this is: First add the field indexed-only (no store)
|
|
||||||
* and additionally using the same field name as a binary, stored field
|
|
||||||
* with {@link CompressionTools#compressString}.
|
|
||||||
*/
|
|
||||||
public static final Store COMPRESS = new Store("COMPRESS");
|
|
||||||
|
|
||||||
/** Store the original field value in the index. This is useful for short texts
|
/** Store the original field value in the index. This is useful for short texts
|
||||||
* like a document's title which should be displayed with the results. The
|
* like a document's title which should be displayed with the results. The
|
||||||
* value is stored in its original form, i.e. no analyzer is used before it is
|
* value is stored in its original form, i.e. no analyzer is used before it is
|
||||||
|
@ -346,15 +336,9 @@ public final class Field extends AbstractField implements Fieldable, Serializabl
|
||||||
|
|
||||||
if (store == Store.YES){
|
if (store == Store.YES){
|
||||||
this.isStored = true;
|
this.isStored = true;
|
||||||
this.isCompressed = false;
|
|
||||||
}
|
|
||||||
else if (store == Store.COMPRESS) {
|
|
||||||
this.isStored = true;
|
|
||||||
this.isCompressed = true;
|
|
||||||
}
|
}
|
||||||
else if (store == Store.NO){
|
else if (store == Store.NO){
|
||||||
this.isStored = false;
|
this.isStored = false;
|
||||||
this.isCompressed = false;
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
throw new IllegalArgumentException("unknown store parameter " + store);
|
throw new IllegalArgumentException("unknown store parameter " + store);
|
||||||
|
@ -422,7 +406,6 @@ public final class Field extends AbstractField implements Fieldable, Serializabl
|
||||||
this.fieldsData = reader;
|
this.fieldsData = reader;
|
||||||
|
|
||||||
this.isStored = false;
|
this.isStored = false;
|
||||||
this.isCompressed = false;
|
|
||||||
|
|
||||||
this.isIndexed = true;
|
this.isIndexed = true;
|
||||||
this.isTokenized = true;
|
this.isTokenized = true;
|
||||||
|
@ -470,7 +453,6 @@ public final class Field extends AbstractField implements Fieldable, Serializabl
|
||||||
this.tokenStream = tokenStream;
|
this.tokenStream = tokenStream;
|
||||||
|
|
||||||
this.isStored = false;
|
this.isStored = false;
|
||||||
this.isCompressed = false;
|
|
||||||
|
|
||||||
this.isIndexed = true;
|
this.isIndexed = true;
|
||||||
this.isTokenized = true;
|
this.isTokenized = true;
|
||||||
|
@ -515,11 +497,6 @@ public final class Field extends AbstractField implements Fieldable, Serializabl
|
||||||
|
|
||||||
if (store == Store.YES) {
|
if (store == Store.YES) {
|
||||||
isStored = true;
|
isStored = true;
|
||||||
isCompressed = false;
|
|
||||||
}
|
|
||||||
else if (store == Store.COMPRESS) {
|
|
||||||
isStored = true;
|
|
||||||
isCompressed = true;
|
|
||||||
}
|
}
|
||||||
else if (store == Store.NO)
|
else if (store == Store.NO)
|
||||||
throw new IllegalArgumentException("binary values can't be unstored");
|
throw new IllegalArgumentException("binary values can't be unstored");
|
||||||
|
|
|
@ -54,17 +54,6 @@ public final class FieldSelectorResult implements Serializable {
|
||||||
* {@link Document#add(Fieldable)} should be called by the Reader.
|
* {@link Document#add(Fieldable)} should be called by the Reader.
|
||||||
*/
|
*/
|
||||||
public transient static final FieldSelectorResult LOAD_AND_BREAK = new FieldSelectorResult(3);
|
public transient static final FieldSelectorResult LOAD_AND_BREAK = new FieldSelectorResult(3);
|
||||||
/**
|
|
||||||
* Behaves much like {@link #LOAD} but does not uncompress any compressed data. This is used for internal purposes.
|
|
||||||
* {@link Document#getField(String)} and {@link Document#getFieldable(String)} should not return null.
|
|
||||||
* <p/>
|
|
||||||
* {@link Document#add(Fieldable)} should be called by
|
|
||||||
* the Reader.
|
|
||||||
* @deprecated This is an internal option only, and is
|
|
||||||
* no longer needed now that {@link CompressionTools}
|
|
||||||
* is used for field compression.
|
|
||||||
*/
|
|
||||||
public transient static final FieldSelectorResult LOAD_FOR_MERGE = new FieldSelectorResult(4);
|
|
||||||
|
|
||||||
/** Expert: Load the size of this {@link Field} rather than its value.
|
/** Expert: Load the size of this {@link Field} rather than its value.
|
||||||
* Size is measured as number of bytes required to store the field == bytes for a binary or any compressed value, and 2*chars for a String value.
|
* Size is measured as number of bytes required to store the field == bytes for a binary or any compressed value, and 2*chars for a String value.
|
||||||
|
|
|
@ -113,9 +113,6 @@ public interface Fieldable extends Serializable {
|
||||||
Reader-valued. */
|
Reader-valued. */
|
||||||
boolean isTokenized();
|
boolean isTokenized();
|
||||||
|
|
||||||
/** True if the value of the field is stored and compressed within the index */
|
|
||||||
boolean isCompressed();
|
|
||||||
|
|
||||||
/** True if the term or terms used to index this field are stored as a term
|
/** True if the term or terms used to index this field are stored as a term
|
||||||
* vector, available from {@link org.apache.lucene.index.IndexReader#getTermFreqVector(int,String)}.
|
* vector, available from {@link org.apache.lucene.index.IndexReader#getTermFreqVector(int,String)}.
|
||||||
* These methods do not provide access to the original content of the field,
|
* These methods do not provide access to the original content of the field,
|
||||||
|
|
|
@ -24,11 +24,9 @@ import org.apache.lucene.store.IndexInput;
|
||||||
import org.apache.lucene.store.AlreadyClosedException;
|
import org.apache.lucene.store.AlreadyClosedException;
|
||||||
import org.apache.lucene.store.BufferedIndexInput;
|
import org.apache.lucene.store.BufferedIndexInput;
|
||||||
import org.apache.lucene.util.CloseableThreadLocal;
|
import org.apache.lucene.util.CloseableThreadLocal;
|
||||||
import org.apache.lucene.util.StringHelper;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
import java.util.zip.DataFormatException;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Class responsible for access to stored document fields.
|
* Class responsible for access to stored document fields.
|
||||||
|
@ -216,35 +214,31 @@ final class FieldsReader implements Cloneable {
|
||||||
FieldSelectorResult acceptField = fieldSelector == null ? FieldSelectorResult.LOAD : fieldSelector.accept(fi.name);
|
FieldSelectorResult acceptField = fieldSelector == null ? FieldSelectorResult.LOAD : fieldSelector.accept(fi.name);
|
||||||
|
|
||||||
byte bits = fieldsStream.readByte();
|
byte bits = fieldsStream.readByte();
|
||||||
assert bits <= FieldsWriter.FIELD_IS_COMPRESSED + FieldsWriter.FIELD_IS_TOKENIZED + FieldsWriter.FIELD_IS_BINARY;
|
assert bits <= FieldsWriter.FIELD_IS_TOKENIZED + FieldsWriter.FIELD_IS_BINARY;
|
||||||
|
|
||||||
boolean compressed = (bits & FieldsWriter.FIELD_IS_COMPRESSED) != 0;
|
|
||||||
boolean tokenize = (bits & FieldsWriter.FIELD_IS_TOKENIZED) != 0;
|
boolean tokenize = (bits & FieldsWriter.FIELD_IS_TOKENIZED) != 0;
|
||||||
boolean binary = (bits & FieldsWriter.FIELD_IS_BINARY) != 0;
|
boolean binary = (bits & FieldsWriter.FIELD_IS_BINARY) != 0;
|
||||||
//TODO: Find an alternative approach here if this list continues to grow beyond the
|
//TODO: Find an alternative approach here if this list continues to grow beyond the
|
||||||
//list of 5 or 6 currently here. See Lucene 762 for discussion
|
//list of 5 or 6 currently here. See Lucene 762 for discussion
|
||||||
if (acceptField.equals(FieldSelectorResult.LOAD)) {
|
if (acceptField.equals(FieldSelectorResult.LOAD)) {
|
||||||
addField(doc, fi, binary, compressed, tokenize);
|
addField(doc, fi, binary, tokenize);
|
||||||
}
|
|
||||||
else if (acceptField.equals(FieldSelectorResult.LOAD_FOR_MERGE)) {
|
|
||||||
addFieldForMerge(doc, fi, binary, compressed, tokenize);
|
|
||||||
}
|
}
|
||||||
else if (acceptField.equals(FieldSelectorResult.LOAD_AND_BREAK)){
|
else if (acceptField.equals(FieldSelectorResult.LOAD_AND_BREAK)){
|
||||||
addField(doc, fi, binary, compressed, tokenize);
|
addField(doc, fi, binary, tokenize);
|
||||||
break;//Get out of this loop
|
break;//Get out of this loop
|
||||||
}
|
}
|
||||||
else if (acceptField.equals(FieldSelectorResult.LAZY_LOAD)) {
|
else if (acceptField.equals(FieldSelectorResult.LAZY_LOAD)) {
|
||||||
addFieldLazy(doc, fi, binary, compressed, tokenize);
|
addFieldLazy(doc, fi, binary, tokenize);
|
||||||
}
|
}
|
||||||
else if (acceptField.equals(FieldSelectorResult.SIZE)){
|
else if (acceptField.equals(FieldSelectorResult.SIZE)){
|
||||||
skipField(binary, compressed, addFieldSize(doc, fi, binary, compressed));
|
skipField(binary, addFieldSize(doc, fi, binary));
|
||||||
}
|
}
|
||||||
else if (acceptField.equals(FieldSelectorResult.SIZE_AND_BREAK)){
|
else if (acceptField.equals(FieldSelectorResult.SIZE_AND_BREAK)){
|
||||||
addFieldSize(doc, fi, binary, compressed);
|
addFieldSize(doc, fi, binary);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
skipField(binary, compressed);
|
skipField(binary);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -281,12 +275,12 @@ final class FieldsReader implements Cloneable {
|
||||||
* Skip the field. We still have to read some of the information about the field, but can skip past the actual content.
|
* Skip the field. We still have to read some of the information about the field, but can skip past the actual content.
|
||||||
* This will have the most payoff on large fields.
|
* This will have the most payoff on large fields.
|
||||||
*/
|
*/
|
||||||
private void skipField(boolean binary, boolean compressed) throws IOException {
|
private void skipField(boolean binary) throws IOException {
|
||||||
skipField(binary, compressed, fieldsStream.readVInt());
|
skipField(binary, fieldsStream.readVInt());
|
||||||
}
|
}
|
||||||
|
|
||||||
private void skipField(boolean binary, boolean compressed, int toRead) throws IOException {
|
private void skipField(boolean binary, int toRead) throws IOException {
|
||||||
if (format >= FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES || binary || compressed) {
|
if (format >= FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES || binary) {
|
||||||
fieldsStream.seek(fieldsStream.getFilePointer() + toRead);
|
fieldsStream.seek(fieldsStream.getFilePointer() + toRead);
|
||||||
} else {
|
} else {
|
||||||
// We need to skip chars. This will slow us down, but still better
|
// We need to skip chars. This will slow us down, but still better
|
||||||
|
@ -294,17 +288,12 @@ final class FieldsReader implements Cloneable {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void addFieldLazy(Document doc, FieldInfo fi, boolean binary, boolean compressed, boolean tokenize) throws IOException {
|
private void addFieldLazy(Document doc, FieldInfo fi, boolean binary, boolean tokenize) throws IOException {
|
||||||
if (binary) {
|
if (binary) {
|
||||||
int toRead = fieldsStream.readVInt();
|
int toRead = fieldsStream.readVInt();
|
||||||
long pointer = fieldsStream.getFilePointer();
|
long pointer = fieldsStream.getFilePointer();
|
||||||
if (compressed) {
|
|
||||||
//was: doc.add(new Fieldable(fi.name, uncompress(b), Fieldable.Store.COMPRESS));
|
|
||||||
doc.add(new LazyField(fi.name, Field.Store.COMPRESS, toRead, pointer, binary));
|
|
||||||
} else {
|
|
||||||
//was: doc.add(new Fieldable(fi.name, b, Fieldable.Store.YES));
|
//was: doc.add(new Fieldable(fi.name, b, Fieldable.Store.YES));
|
||||||
doc.add(new LazyField(fi.name, Field.Store.YES, toRead, pointer, binary));
|
doc.add(new LazyField(fi.name, Field.Store.YES, toRead, pointer, binary));
|
||||||
}
|
|
||||||
//Need to move the pointer ahead by toRead positions
|
//Need to move the pointer ahead by toRead positions
|
||||||
fieldsStream.seek(pointer + toRead);
|
fieldsStream.seek(pointer + toRead);
|
||||||
} else {
|
} else {
|
||||||
|
@ -313,16 +302,6 @@ final class FieldsReader implements Cloneable {
|
||||||
Field.TermVector termVector = getTermVectorType(fi);
|
Field.TermVector termVector = getTermVectorType(fi);
|
||||||
|
|
||||||
AbstractField f;
|
AbstractField f;
|
||||||
if (compressed) {
|
|
||||||
store = Field.Store.COMPRESS;
|
|
||||||
int toRead = fieldsStream.readVInt();
|
|
||||||
long pointer = fieldsStream.getFilePointer();
|
|
||||||
f = new LazyField(fi.name, store, toRead, pointer, binary);
|
|
||||||
//skip over the part that we aren't loading
|
|
||||||
fieldsStream.seek(pointer + toRead);
|
|
||||||
f.setOmitNorms(fi.omitNorms);
|
|
||||||
f.setOmitTermFreqAndPositions(fi.omitTermFreqAndPositions);
|
|
||||||
} else {
|
|
||||||
int length = fieldsStream.readVInt();
|
int length = fieldsStream.readVInt();
|
||||||
long pointer = fieldsStream.getFilePointer();
|
long pointer = fieldsStream.getFilePointer();
|
||||||
//Skip ahead of where we are by the length of what is stored
|
//Skip ahead of where we are by the length of what is stored
|
||||||
|
@ -333,38 +312,18 @@ final class FieldsReader implements Cloneable {
|
||||||
f = new LazyField(fi.name, store, index, termVector, length, pointer, binary);
|
f = new LazyField(fi.name, store, index, termVector, length, pointer, binary);
|
||||||
f.setOmitNorms(fi.omitNorms);
|
f.setOmitNorms(fi.omitNorms);
|
||||||
f.setOmitTermFreqAndPositions(fi.omitTermFreqAndPositions);
|
f.setOmitTermFreqAndPositions(fi.omitTermFreqAndPositions);
|
||||||
}
|
|
||||||
doc.add(f);
|
doc.add(f);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// in merge mode we don't uncompress the data of a compressed field
|
private void addField(Document doc, FieldInfo fi, boolean binary, boolean tokenize) throws CorruptIndexException, IOException {
|
||||||
private void addFieldForMerge(Document doc, FieldInfo fi, boolean binary, boolean compressed, boolean tokenize) throws IOException {
|
|
||||||
Object data;
|
|
||||||
|
|
||||||
if (binary || compressed) {
|
|
||||||
int toRead = fieldsStream.readVInt();
|
|
||||||
final byte[] b = new byte[toRead];
|
|
||||||
fieldsStream.readBytes(b, 0, b.length);
|
|
||||||
data = b;
|
|
||||||
} else {
|
|
||||||
data = fieldsStream.readString();
|
|
||||||
}
|
|
||||||
|
|
||||||
doc.add(new FieldForMerge(data, fi, binary, compressed, tokenize));
|
|
||||||
}
|
|
||||||
|
|
||||||
private void addField(Document doc, FieldInfo fi, boolean binary, boolean compressed, boolean tokenize) throws CorruptIndexException, IOException {
|
|
||||||
|
|
||||||
//we have a binary stored field, and it may be compressed
|
//we have a binary stored field, and it may be compressed
|
||||||
if (binary) {
|
if (binary) {
|
||||||
int toRead = fieldsStream.readVInt();
|
int toRead = fieldsStream.readVInt();
|
||||||
final byte[] b = new byte[toRead];
|
final byte[] b = new byte[toRead];
|
||||||
fieldsStream.readBytes(b, 0, b.length);
|
fieldsStream.readBytes(b, 0, b.length);
|
||||||
if (compressed)
|
|
||||||
doc.add(new Field(fi.name, uncompress(b), Field.Store.COMPRESS));
|
|
||||||
else
|
|
||||||
doc.add(new Field(fi.name, b, Field.Store.YES));
|
doc.add(new Field(fi.name, b, Field.Store.YES));
|
||||||
} else {
|
} else {
|
||||||
Field.Store store = Field.Store.YES;
|
Field.Store store = Field.Store.YES;
|
||||||
|
@ -372,21 +331,6 @@ final class FieldsReader implements Cloneable {
|
||||||
Field.TermVector termVector = getTermVectorType(fi);
|
Field.TermVector termVector = getTermVectorType(fi);
|
||||||
|
|
||||||
AbstractField f;
|
AbstractField f;
|
||||||
if (compressed) {
|
|
||||||
store = Field.Store.COMPRESS;
|
|
||||||
int toRead = fieldsStream.readVInt();
|
|
||||||
|
|
||||||
final byte[] b = new byte[toRead];
|
|
||||||
fieldsStream.readBytes(b, 0, b.length);
|
|
||||||
f = new Field(fi.name, // field name
|
|
||||||
false,
|
|
||||||
new String(uncompress(b), "UTF-8"), // uncompress the value and add as string
|
|
||||||
store,
|
|
||||||
index,
|
|
||||||
termVector);
|
|
||||||
f.setOmitTermFreqAndPositions(fi.omitTermFreqAndPositions);
|
|
||||||
f.setOmitNorms(fi.omitNorms);
|
|
||||||
} else {
|
|
||||||
f = new Field(fi.name, // name
|
f = new Field(fi.name, // name
|
||||||
false,
|
false,
|
||||||
fieldsStream.readString(), // read value
|
fieldsStream.readString(), // read value
|
||||||
|
@ -395,7 +339,6 @@ final class FieldsReader implements Cloneable {
|
||||||
termVector);
|
termVector);
|
||||||
f.setOmitTermFreqAndPositions(fi.omitTermFreqAndPositions);
|
f.setOmitTermFreqAndPositions(fi.omitTermFreqAndPositions);
|
||||||
f.setOmitNorms(fi.omitNorms);
|
f.setOmitNorms(fi.omitNorms);
|
||||||
}
|
|
||||||
doc.add(f);
|
doc.add(f);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -403,8 +346,8 @@ final class FieldsReader implements Cloneable {
|
||||||
// Add the size of field as a byte[] containing the 4 bytes of the integer byte size (high order byte first; char = 2 bytes)
|
// Add the size of field as a byte[] containing the 4 bytes of the integer byte size (high order byte first; char = 2 bytes)
|
||||||
// Read just the size -- caller must skip the field content to continue reading fields
|
// Read just the size -- caller must skip the field content to continue reading fields
|
||||||
// Return the size in bytes or chars, depending on field type
|
// Return the size in bytes or chars, depending on field type
|
||||||
private int addFieldSize(Document doc, FieldInfo fi, boolean binary, boolean compressed) throws IOException {
|
private int addFieldSize(Document doc, FieldInfo fi, boolean binary) throws IOException {
|
||||||
int size = fieldsStream.readVInt(), bytesize = binary || compressed ? size : 2*size;
|
int size = fieldsStream.readVInt(), bytesize = binary ? size : 2*size;
|
||||||
byte[] sizebytes = new byte[4];
|
byte[] sizebytes = new byte[4];
|
||||||
sizebytes[0] = (byte) (bytesize>>>24);
|
sizebytes[0] = (byte) (bytesize>>>24);
|
||||||
sizebytes[1] = (byte) (bytesize>>>16);
|
sizebytes[1] = (byte) (bytesize>>>16);
|
||||||
|
@ -517,11 +460,6 @@ final class FieldsReader implements Cloneable {
|
||||||
IndexInput localFieldsStream = getFieldStream();
|
IndexInput localFieldsStream = getFieldStream();
|
||||||
try {
|
try {
|
||||||
localFieldsStream.seek(pointer);
|
localFieldsStream.seek(pointer);
|
||||||
if (isCompressed) {
|
|
||||||
final byte[] b = new byte[toRead];
|
|
||||||
localFieldsStream.readBytes(b, 0, b.length);
|
|
||||||
fieldsData = new String(uncompress(b), "UTF-8");
|
|
||||||
} else {
|
|
||||||
if (format >= FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES) {
|
if (format >= FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES) {
|
||||||
byte[] bytes = new byte[toRead];
|
byte[] bytes = new byte[toRead];
|
||||||
localFieldsStream.readBytes(bytes, 0, toRead);
|
localFieldsStream.readBytes(bytes, 0, toRead);
|
||||||
|
@ -532,7 +470,6 @@ final class FieldsReader implements Cloneable {
|
||||||
localFieldsStream.readChars(chars, 0, toRead);
|
localFieldsStream.readChars(chars, 0, toRead);
|
||||||
fieldsData = new String(chars);
|
fieldsData = new String(chars);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
throw new FieldReaderException(e);
|
throw new FieldReaderException(e);
|
||||||
}
|
}
|
||||||
|
@ -580,11 +517,7 @@ final class FieldsReader implements Cloneable {
|
||||||
try {
|
try {
|
||||||
localFieldsStream.seek(pointer);
|
localFieldsStream.seek(pointer);
|
||||||
localFieldsStream.readBytes(b, 0, toRead);
|
localFieldsStream.readBytes(b, 0, toRead);
|
||||||
if (isCompressed == true) {
|
|
||||||
fieldsData = uncompress(b);
|
|
||||||
} else {
|
|
||||||
fieldsData = b;
|
fieldsData = b;
|
||||||
}
|
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
throw new FieldReaderException(e);
|
throw new FieldReaderException(e);
|
||||||
}
|
}
|
||||||
|
@ -598,58 +531,4 @@ final class FieldsReader implements Cloneable {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private byte[] uncompress(byte[] b)
|
|
||||||
throws CorruptIndexException {
|
|
||||||
try {
|
|
||||||
return CompressionTools.decompress(b);
|
|
||||||
} catch (DataFormatException e) {
|
|
||||||
// this will happen if the field is not compressed
|
|
||||||
CorruptIndexException newException = new CorruptIndexException("field data are in wrong format: " + e.toString());
|
|
||||||
newException.initCause(e);
|
|
||||||
throw newException;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Instances of this class hold field properties and data
|
|
||||||
// for merge
|
|
||||||
final static class FieldForMerge extends AbstractField {
|
|
||||||
public String stringValue() {
|
|
||||||
return (String) this.fieldsData;
|
|
||||||
}
|
|
||||||
|
|
||||||
public Reader readerValue() {
|
|
||||||
// not needed for merge
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
public byte[] binaryValue() {
|
|
||||||
return (byte[]) this.fieldsData;
|
|
||||||
}
|
|
||||||
|
|
||||||
public TokenStream tokenStreamValue() {
|
|
||||||
// not needed for merge
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
public FieldForMerge(Object value, FieldInfo fi, boolean binary, boolean compressed, boolean tokenize) {
|
|
||||||
this.isStored = true;
|
|
||||||
this.fieldsData = value;
|
|
||||||
this.isCompressed = compressed;
|
|
||||||
this.isBinary = binary;
|
|
||||||
if (binary)
|
|
||||||
binaryLength = ((byte[]) value).length;
|
|
||||||
|
|
||||||
this.isTokenized = tokenize;
|
|
||||||
|
|
||||||
this.name = StringHelper.intern(fi.name);
|
|
||||||
this.isIndexed = fi.isIndexed;
|
|
||||||
this.omitNorms = fi.omitNorms;
|
|
||||||
this.omitTermFreqAndPositions = fi.omitTermFreqAndPositions;
|
|
||||||
this.storeOffsetWithTermVector = fi.storeOffsetWithTermVector;
|
|
||||||
this.storePositionWithTermVector = fi.storePositionWithTermVector;
|
|
||||||
this.storeTermVector = fi.storeTermVector;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -21,7 +21,6 @@ import java.util.Iterator;
|
||||||
|
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.Fieldable;
|
import org.apache.lucene.document.Fieldable;
|
||||||
import org.apache.lucene.document.CompressionTools;
|
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.store.RAMOutputStream;
|
import org.apache.lucene.store.RAMOutputStream;
|
||||||
import org.apache.lucene.store.IndexOutput;
|
import org.apache.lucene.store.IndexOutput;
|
||||||
|
@ -31,7 +30,6 @@ final class FieldsWriter
|
||||||
{
|
{
|
||||||
static final byte FIELD_IS_TOKENIZED = 0x1;
|
static final byte FIELD_IS_TOKENIZED = 0x1;
|
||||||
static final byte FIELD_IS_BINARY = 0x2;
|
static final byte FIELD_IS_BINARY = 0x2;
|
||||||
static final byte FIELD_IS_COMPRESSED = 0x4;
|
|
||||||
|
|
||||||
// Original format
|
// Original format
|
||||||
static final int FORMAT = 0;
|
static final int FORMAT = 0;
|
||||||
|
@ -172,50 +170,15 @@ final class FieldsWriter
|
||||||
}
|
}
|
||||||
|
|
||||||
final void writeField(FieldInfo fi, Fieldable field) throws IOException {
|
final void writeField(FieldInfo fi, Fieldable field) throws IOException {
|
||||||
// if the field as an instanceof FieldsReader.FieldForMerge, we're in merge mode
|
|
||||||
// and field.binaryValue() already returns the compressed value for a field
|
|
||||||
// with isCompressed()==true, so we disable compression in that case
|
|
||||||
boolean disableCompression = (field instanceof FieldsReader.FieldForMerge);
|
|
||||||
fieldsStream.writeVInt(fi.number);
|
fieldsStream.writeVInt(fi.number);
|
||||||
byte bits = 0;
|
byte bits = 0;
|
||||||
if (field.isTokenized())
|
if (field.isTokenized())
|
||||||
bits |= FieldsWriter.FIELD_IS_TOKENIZED;
|
bits |= FieldsWriter.FIELD_IS_TOKENIZED;
|
||||||
if (field.isBinary())
|
if (field.isBinary())
|
||||||
bits |= FieldsWriter.FIELD_IS_BINARY;
|
bits |= FieldsWriter.FIELD_IS_BINARY;
|
||||||
if (field.isCompressed())
|
|
||||||
bits |= FieldsWriter.FIELD_IS_COMPRESSED;
|
|
||||||
|
|
||||||
fieldsStream.writeByte(bits);
|
fieldsStream.writeByte(bits);
|
||||||
|
|
||||||
if (field.isCompressed()) {
|
|
||||||
// compression is enabled for the current field
|
|
||||||
final byte[] data;
|
|
||||||
final int len;
|
|
||||||
final int offset;
|
|
||||||
if (disableCompression) {
|
|
||||||
// optimized case for merging, the data
|
|
||||||
// is already compressed
|
|
||||||
data = field.getBinaryValue();
|
|
||||||
assert data != null;
|
|
||||||
len = field.getBinaryLength();
|
|
||||||
offset = field.getBinaryOffset();
|
|
||||||
} else {
|
|
||||||
// check if it is a binary field
|
|
||||||
if (field.isBinary()) {
|
|
||||||
data = CompressionTools.compress(field.getBinaryValue(), field.getBinaryOffset(), field.getBinaryLength());
|
|
||||||
} else {
|
|
||||||
byte x[] = field.stringValue().getBytes("UTF-8");
|
|
||||||
data = CompressionTools.compress(x, 0, x.length);
|
|
||||||
}
|
|
||||||
len = data.length;
|
|
||||||
offset = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
fieldsStream.writeVInt(len);
|
|
||||||
fieldsStream.writeBytes(data, offset, len);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
// compression is disabled for the current field
|
|
||||||
if (field.isBinary()) {
|
if (field.isBinary()) {
|
||||||
final byte[] data;
|
final byte[] data;
|
||||||
final int len;
|
final int len;
|
||||||
|
@ -231,7 +194,6 @@ final class FieldsWriter
|
||||||
fieldsStream.writeString(field.stringValue());
|
fieldsStream.writeString(field.stringValue());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
/** Bulk write a contiguous series of documents. The
|
/** Bulk write a contiguous series of documents. The
|
||||||
* lengths array is the length (in bytes) of each raw
|
* lengths array is the length (in bytes) of each raw
|
||||||
|
|
|
@ -24,8 +24,6 @@ import java.util.Iterator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.FieldSelector;
|
|
||||||
import org.apache.lucene.document.FieldSelectorResult;
|
|
||||||
import org.apache.lucene.index.IndexReader.FieldOption;
|
import org.apache.lucene.index.IndexReader.FieldOption;
|
||||||
import org.apache.lucene.index.MergePolicy.MergeAbortedException;
|
import org.apache.lucene.index.MergePolicy.MergeAbortedException;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
|
@ -321,15 +319,6 @@ final class SegmentMerger {
|
||||||
setMatchingSegmentReaders();
|
setMatchingSegmentReaders();
|
||||||
|
|
||||||
if (mergeDocStores) {
|
if (mergeDocStores) {
|
||||||
|
|
||||||
// for merging we don't want to compress/uncompress the data, so to tell the FieldsReader that we're
|
|
||||||
// in merge mode, we use this FieldSelector
|
|
||||||
FieldSelector fieldSelectorMerge = new FieldSelector() {
|
|
||||||
public FieldSelectorResult accept(String fieldName) {
|
|
||||||
return FieldSelectorResult.LOAD_FOR_MERGE;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// merge field values
|
// merge field values
|
||||||
final FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos);
|
final FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos);
|
||||||
|
|
||||||
|
@ -346,10 +335,10 @@ final class SegmentMerger {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (reader.hasDeletions()) {
|
if (reader.hasDeletions()) {
|
||||||
docCount += copyFieldsWithDeletions(fieldSelectorMerge, fieldsWriter,
|
docCount += copyFieldsWithDeletions(fieldsWriter,
|
||||||
reader, matchingFieldsReader);
|
reader, matchingFieldsReader);
|
||||||
} else {
|
} else {
|
||||||
docCount += copyFieldsNoDeletions(fieldSelectorMerge, fieldsWriter,
|
docCount += copyFieldsNoDeletions(fieldsWriter,
|
||||||
reader, matchingFieldsReader);
|
reader, matchingFieldsReader);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -379,8 +368,7 @@ final class SegmentMerger {
|
||||||
return docCount;
|
return docCount;
|
||||||
}
|
}
|
||||||
|
|
||||||
private int copyFieldsWithDeletions(final FieldSelector fieldSelectorMerge,
|
private int copyFieldsWithDeletions(final FieldsWriter fieldsWriter, final IndexReader reader,
|
||||||
final FieldsWriter fieldsWriter, final IndexReader reader,
|
|
||||||
final FieldsReader matchingFieldsReader)
|
final FieldsReader matchingFieldsReader)
|
||||||
throws IOException, MergeAbortedException, CorruptIndexException {
|
throws IOException, MergeAbortedException, CorruptIndexException {
|
||||||
int docCount = 0;
|
int docCount = 0;
|
||||||
|
@ -419,7 +407,7 @@ final class SegmentMerger {
|
||||||
}
|
}
|
||||||
// NOTE: it's very important to first assign to doc then pass it to
|
// NOTE: it's very important to first assign to doc then pass it to
|
||||||
// termVectorsWriter.addAllDocVectors; see LUCENE-1282
|
// termVectorsWriter.addAllDocVectors; see LUCENE-1282
|
||||||
Document doc = reader.document(j, fieldSelectorMerge);
|
Document doc = reader.document(j);
|
||||||
fieldsWriter.addDocument(doc);
|
fieldsWriter.addDocument(doc);
|
||||||
docCount++;
|
docCount++;
|
||||||
checkAbort.work(300);
|
checkAbort.work(300);
|
||||||
|
@ -428,8 +416,7 @@ final class SegmentMerger {
|
||||||
return docCount;
|
return docCount;
|
||||||
}
|
}
|
||||||
|
|
||||||
private int copyFieldsNoDeletions(FieldSelector fieldSelectorMerge,
|
private int copyFieldsNoDeletions(final FieldsWriter fieldsWriter, final IndexReader reader,
|
||||||
final FieldsWriter fieldsWriter, final IndexReader reader,
|
|
||||||
final FieldsReader matchingFieldsReader)
|
final FieldsReader matchingFieldsReader)
|
||||||
throws IOException, MergeAbortedException, CorruptIndexException {
|
throws IOException, MergeAbortedException, CorruptIndexException {
|
||||||
final int maxDoc = reader.maxDoc();
|
final int maxDoc = reader.maxDoc();
|
||||||
|
@ -447,7 +434,7 @@ final class SegmentMerger {
|
||||||
for (; docCount < maxDoc; docCount++) {
|
for (; docCount < maxDoc; docCount++) {
|
||||||
// NOTE: it's very important to first assign to doc then pass it to
|
// NOTE: it's very important to first assign to doc then pass it to
|
||||||
// termVectorsWriter.addAllDocVectors; see LUCENE-1282
|
// termVectorsWriter.addAllDocVectors; see LUCENE-1282
|
||||||
Document doc = reader.document(docCount, fieldSelectorMerge);
|
Document doc = reader.document(docCount);
|
||||||
fieldsWriter.addDocument(doc);
|
fieldsWriter.addDocument(doc);
|
||||||
checkAbort.work(300);
|
checkAbort.work(300);
|
||||||
}
|
}
|
||||||
|
|
|
@ -37,13 +37,11 @@ public class TestBinaryDocument extends LuceneTestCase
|
||||||
throws Exception
|
throws Exception
|
||||||
{
|
{
|
||||||
Fieldable binaryFldStored = new Field("binaryStored", binaryValStored.getBytes(), Field.Store.YES);
|
Fieldable binaryFldStored = new Field("binaryStored", binaryValStored.getBytes(), Field.Store.YES);
|
||||||
Fieldable binaryFldCompressed = new Field("binaryCompressed", binaryValCompressed.getBytes(), Field.Store.COMPRESS);
|
|
||||||
Fieldable stringFldStored = new Field("stringStored", binaryValStored, Field.Store.YES, Field.Index.NO, Field.TermVector.NO);
|
Fieldable stringFldStored = new Field("stringStored", binaryValStored, Field.Store.YES, Field.Index.NO, Field.TermVector.NO);
|
||||||
Fieldable stringFldCompressed = new Field("stringCompressed", binaryValCompressed, Field.Store.COMPRESS, Field.Index.NO, Field.TermVector.NO);
|
|
||||||
|
|
||||||
try {
|
try {
|
||||||
// binary fields with store off are not allowed
|
// binary fields with store off are not allowed
|
||||||
new Field("fail", binaryValCompressed.getBytes(), Field.Store.NO);
|
new Field("fail", binaryValStored.getBytes(), Field.Store.NO);
|
||||||
fail();
|
fail();
|
||||||
}
|
}
|
||||||
catch (IllegalArgumentException iae) {
|
catch (IllegalArgumentException iae) {
|
||||||
|
@ -53,13 +51,11 @@ public class TestBinaryDocument extends LuceneTestCase
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
|
|
||||||
doc.add(binaryFldStored);
|
doc.add(binaryFldStored);
|
||||||
doc.add(binaryFldCompressed);
|
|
||||||
|
|
||||||
doc.add(stringFldStored);
|
doc.add(stringFldStored);
|
||||||
doc.add(stringFldCompressed);
|
|
||||||
|
|
||||||
/** test for field count */
|
/** test for field count */
|
||||||
assertEquals(4, doc.fields.size());
|
assertEquals(2, doc.fields.size());
|
||||||
|
|
||||||
/** add the doc to a ram index */
|
/** add the doc to a ram index */
|
||||||
MockRAMDirectory dir = new MockRAMDirectory();
|
MockRAMDirectory dir = new MockRAMDirectory();
|
||||||
|
@ -76,18 +72,10 @@ public class TestBinaryDocument extends LuceneTestCase
|
||||||
String binaryFldStoredTest = new String(docFromReader.getBinaryValue("binaryStored"));
|
String binaryFldStoredTest = new String(docFromReader.getBinaryValue("binaryStored"));
|
||||||
assertTrue(binaryFldStoredTest.equals(binaryValStored));
|
assertTrue(binaryFldStoredTest.equals(binaryValStored));
|
||||||
|
|
||||||
/** fetch the binary compressed field and compare it's content with the original one */
|
|
||||||
String binaryFldCompressedTest = new String(docFromReader.getBinaryValue("binaryCompressed"));
|
|
||||||
assertTrue(binaryFldCompressedTest.equals(binaryValCompressed));
|
|
||||||
|
|
||||||
/** fetch the string field and compare it's content with the original one */
|
/** fetch the string field and compare it's content with the original one */
|
||||||
String stringFldStoredTest = docFromReader.get("stringStored");
|
String stringFldStoredTest = docFromReader.get("stringStored");
|
||||||
assertTrue(stringFldStoredTest.equals(binaryValStored));
|
assertTrue(stringFldStoredTest.equals(binaryValStored));
|
||||||
|
|
||||||
/** fetch the compressed string field and compare it's content with the original one */
|
|
||||||
String stringFldCompressedTest = docFromReader.get("stringCompressed");
|
|
||||||
assertTrue(stringFldCompressedTest.equals(binaryValCompressed));
|
|
||||||
|
|
||||||
/** delete the document from index */
|
/** delete the document from index */
|
||||||
reader.deleteDocument(0);
|
reader.deleteDocument(0);
|
||||||
assertEquals(0, reader.numDocs());
|
assertEquals(0, reader.numDocs());
|
||||||
|
|
|
@ -42,13 +42,6 @@ class DocHelper {
|
||||||
public static final String TEXT_FIELD_2_KEY = "textField2";
|
public static final String TEXT_FIELD_2_KEY = "textField2";
|
||||||
public static Field textField2 = new Field(TEXT_FIELD_2_KEY, FIELD_2_TEXT, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
|
public static Field textField2 = new Field(TEXT_FIELD_2_KEY, FIELD_2_TEXT, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
|
||||||
|
|
||||||
public static final String FIELD_2_COMPRESSED_TEXT = "field field field two text";
|
|
||||||
//Fields will be lexicographically sorted. So, the order is: field, text, two
|
|
||||||
public static final int [] COMPRESSED_FIELD_2_FREQS = {3, 1, 1};
|
|
||||||
public static final String COMPRESSED_TEXT_FIELD_2_KEY = "compressedTextField2";
|
|
||||||
public static Field compressedTextField2 = new Field(COMPRESSED_TEXT_FIELD_2_KEY, FIELD_2_COMPRESSED_TEXT, Field.Store.COMPRESS, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
|
|
||||||
|
|
||||||
|
|
||||||
public static final String FIELD_3_TEXT = "aaaNoNorms aaaNoNorms bbbNoNorms";
|
public static final String FIELD_3_TEXT = "aaaNoNorms aaaNoNorms bbbNoNorms";
|
||||||
public static final String TEXT_FIELD_3_KEY = "textField3";
|
public static final String TEXT_FIELD_3_KEY = "textField3";
|
||||||
public static Field textField3 = new Field(TEXT_FIELD_3_KEY, FIELD_3_TEXT, Field.Store.YES, Field.Index.ANALYZED);
|
public static Field textField3 = new Field(TEXT_FIELD_3_KEY, FIELD_3_TEXT, Field.Store.YES, Field.Index.ANALYZED);
|
||||||
|
@ -124,7 +117,6 @@ class DocHelper {
|
||||||
textField1,
|
textField1,
|
||||||
textField2,
|
textField2,
|
||||||
textField3,
|
textField3,
|
||||||
compressedTextField2,
|
|
||||||
keyField,
|
keyField,
|
||||||
noNormsField,
|
noNormsField,
|
||||||
noTFField,
|
noTFField,
|
||||||
|
@ -193,7 +185,6 @@ class DocHelper {
|
||||||
nameValues = new HashMap();
|
nameValues = new HashMap();
|
||||||
nameValues.put(TEXT_FIELD_1_KEY, FIELD_1_TEXT);
|
nameValues.put(TEXT_FIELD_1_KEY, FIELD_1_TEXT);
|
||||||
nameValues.put(TEXT_FIELD_2_KEY, FIELD_2_TEXT);
|
nameValues.put(TEXT_FIELD_2_KEY, FIELD_2_TEXT);
|
||||||
nameValues.put(COMPRESSED_TEXT_FIELD_2_KEY, FIELD_2_COMPRESSED_TEXT);
|
|
||||||
nameValues.put(TEXT_FIELD_3_KEY, FIELD_3_TEXT);
|
nameValues.put(TEXT_FIELD_3_KEY, FIELD_3_TEXT);
|
||||||
nameValues.put(KEYWORD_FIELD_KEY, KEYWORD_TEXT);
|
nameValues.put(KEYWORD_FIELD_KEY, KEYWORD_TEXT);
|
||||||
nameValues.put(NO_NORMS_KEY, NO_NORMS_TEXT);
|
nameValues.put(NO_NORMS_KEY, NO_NORMS_TEXT);
|
||||||
|
|
|
@ -108,7 +108,6 @@ public class TestFieldsReader extends LuceneTestCase {
|
||||||
lazyFieldNames.add(DocHelper.LAZY_FIELD_KEY);
|
lazyFieldNames.add(DocHelper.LAZY_FIELD_KEY);
|
||||||
lazyFieldNames.add(DocHelper.LAZY_FIELD_BINARY_KEY);
|
lazyFieldNames.add(DocHelper.LAZY_FIELD_BINARY_KEY);
|
||||||
lazyFieldNames.add(DocHelper.TEXT_FIELD_UTF2_KEY);
|
lazyFieldNames.add(DocHelper.TEXT_FIELD_UTF2_KEY);
|
||||||
lazyFieldNames.add(DocHelper.COMPRESSED_TEXT_FIELD_2_KEY);
|
|
||||||
SetBasedFieldSelector fieldSelector = new SetBasedFieldSelector(loadFieldNames, lazyFieldNames);
|
SetBasedFieldSelector fieldSelector = new SetBasedFieldSelector(loadFieldNames, lazyFieldNames);
|
||||||
Document doc = reader.doc(0, fieldSelector);
|
Document doc = reader.doc(0, fieldSelector);
|
||||||
assertTrue("doc is null and it shouldn't be", doc != null);
|
assertTrue("doc is null and it shouldn't be", doc != null);
|
||||||
|
@ -118,13 +117,6 @@ public class TestFieldsReader extends LuceneTestCase {
|
||||||
String value = field.stringValue();
|
String value = field.stringValue();
|
||||||
assertTrue("value is null and it shouldn't be", value != null);
|
assertTrue("value is null and it shouldn't be", value != null);
|
||||||
assertTrue(value + " is not equal to " + DocHelper.LAZY_FIELD_TEXT, value.equals(DocHelper.LAZY_FIELD_TEXT) == true);
|
assertTrue(value + " is not equal to " + DocHelper.LAZY_FIELD_TEXT, value.equals(DocHelper.LAZY_FIELD_TEXT) == true);
|
||||||
field = doc.getFieldable(DocHelper.COMPRESSED_TEXT_FIELD_2_KEY);
|
|
||||||
assertTrue("field is null and it shouldn't be", field != null);
|
|
||||||
assertTrue("field is not lazy and it should be", field.isLazy());
|
|
||||||
assertTrue("binaryValue isn't null for lazy string field", field.binaryValue() == null);
|
|
||||||
value = field.stringValue();
|
|
||||||
assertTrue("value is null and it shouldn't be", value != null);
|
|
||||||
assertTrue(value + " is not equal to " + DocHelper.FIELD_2_COMPRESSED_TEXT, value.equals(DocHelper.FIELD_2_COMPRESSED_TEXT) == true);
|
|
||||||
field = doc.getFieldable(DocHelper.TEXT_FIELD_1_KEY);
|
field = doc.getFieldable(DocHelper.TEXT_FIELD_1_KEY);
|
||||||
assertTrue("field is null and it shouldn't be", field != null);
|
assertTrue("field is null and it shouldn't be", field != null);
|
||||||
assertTrue("Field is lazy and it should not be", field.isLazy() == false);
|
assertTrue("Field is lazy and it should not be", field.isLazy() == false);
|
||||||
|
@ -165,7 +157,6 @@ public class TestFieldsReader extends LuceneTestCase {
|
||||||
lazyFieldNames.add(DocHelper.LAZY_FIELD_KEY);
|
lazyFieldNames.add(DocHelper.LAZY_FIELD_KEY);
|
||||||
lazyFieldNames.add(DocHelper.LAZY_FIELD_BINARY_KEY);
|
lazyFieldNames.add(DocHelper.LAZY_FIELD_BINARY_KEY);
|
||||||
lazyFieldNames.add(DocHelper.TEXT_FIELD_UTF2_KEY);
|
lazyFieldNames.add(DocHelper.TEXT_FIELD_UTF2_KEY);
|
||||||
lazyFieldNames.add(DocHelper.COMPRESSED_TEXT_FIELD_2_KEY);
|
|
||||||
SetBasedFieldSelector fieldSelector = new SetBasedFieldSelector(loadFieldNames, lazyFieldNames);
|
SetBasedFieldSelector fieldSelector = new SetBasedFieldSelector(loadFieldNames, lazyFieldNames);
|
||||||
Document doc = reader.doc(0, fieldSelector);
|
Document doc = reader.doc(0, fieldSelector);
|
||||||
assertTrue("doc is null and it shouldn't be", doc != null);
|
assertTrue("doc is null and it shouldn't be", doc != null);
|
||||||
|
@ -281,7 +272,6 @@ public class TestFieldsReader extends LuceneTestCase {
|
||||||
doc = reader.doc(0, new FieldSelector(){
|
doc = reader.doc(0, new FieldSelector(){
|
||||||
public FieldSelectorResult accept(String fieldName) {
|
public FieldSelectorResult accept(String fieldName) {
|
||||||
if (fieldName.equals(DocHelper.TEXT_FIELD_1_KEY) ||
|
if (fieldName.equals(DocHelper.TEXT_FIELD_1_KEY) ||
|
||||||
fieldName.equals(DocHelper.COMPRESSED_TEXT_FIELD_2_KEY) ||
|
|
||||||
fieldName.equals(DocHelper.LAZY_FIELD_BINARY_KEY))
|
fieldName.equals(DocHelper.LAZY_FIELD_BINARY_KEY))
|
||||||
return FieldSelectorResult.SIZE;
|
return FieldSelectorResult.SIZE;
|
||||||
else if (fieldName.equals(DocHelper.TEXT_FIELD_3_KEY))
|
else if (fieldName.equals(DocHelper.TEXT_FIELD_3_KEY))
|
||||||
|
|
|
@ -365,7 +365,6 @@ public class TestIndexReader extends LuceneTestCase
|
||||||
writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false, IndexWriter.MaxFieldLength.LIMITED);
|
writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false, IndexWriter.MaxFieldLength.LIMITED);
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
doc.add(new Field("bin1", bin, Field.Store.YES));
|
doc.add(new Field("bin1", bin, Field.Store.YES));
|
||||||
doc.add(new Field("bin2", bin, Field.Store.COMPRESS));
|
|
||||||
doc.add(new Field("junk", "junk text", Field.Store.NO, Field.Index.ANALYZED));
|
doc.add(new Field("junk", "junk text", Field.Store.NO, Field.Index.ANALYZED));
|
||||||
writer.addDocument(doc);
|
writer.addDocument(doc);
|
||||||
writer.close();
|
writer.close();
|
||||||
|
@ -381,16 +380,6 @@ public class TestIndexReader extends LuceneTestCase
|
||||||
for (int i = 0; i < bin.length; i++) {
|
for (int i = 0; i < bin.length; i++) {
|
||||||
assertEquals(bin[i], data1[i + b1.getBinaryOffset()]);
|
assertEquals(bin[i], data1[i + b1.getBinaryOffset()]);
|
||||||
}
|
}
|
||||||
fields = doc.getFields("bin2");
|
|
||||||
assertNotNull(fields);
|
|
||||||
assertEquals(1, fields.length);
|
|
||||||
b1 = fields[0];
|
|
||||||
assertTrue(b1.isBinary());
|
|
||||||
data1 = b1.getBinaryValue();
|
|
||||||
assertEquals(bin.length, b1.getBinaryLength());
|
|
||||||
for (int i = 0; i < bin.length; i++) {
|
|
||||||
assertEquals(bin[i], data1[i + b1.getBinaryOffset()]);
|
|
||||||
}
|
|
||||||
Set lazyFields = new HashSet();
|
Set lazyFields = new HashSet();
|
||||||
lazyFields.add("bin1");
|
lazyFields.add("bin1");
|
||||||
FieldSelector sel = new SetBasedFieldSelector(new HashSet(), lazyFields);
|
FieldSelector sel = new SetBasedFieldSelector(new HashSet(), lazyFields);
|
||||||
|
@ -425,16 +414,6 @@ public class TestIndexReader extends LuceneTestCase
|
||||||
for (int i = 0; i < bin.length; i++) {
|
for (int i = 0; i < bin.length; i++) {
|
||||||
assertEquals(bin[i], data1[i + b1.getBinaryOffset()]);
|
assertEquals(bin[i], data1[i + b1.getBinaryOffset()]);
|
||||||
}
|
}
|
||||||
fields = doc.getFields("bin2");
|
|
||||||
assertNotNull(fields);
|
|
||||||
assertEquals(1, fields.length);
|
|
||||||
b1 = fields[0];
|
|
||||||
assertTrue(b1.isBinary());
|
|
||||||
data1 = b1.getBinaryValue();
|
|
||||||
assertEquals(bin.length, b1.getBinaryLength());
|
|
||||||
for (int i = 0; i < bin.length; i++) {
|
|
||||||
assertEquals(bin[i], data1[i + b1.getBinaryOffset()]);
|
|
||||||
}
|
|
||||||
reader.close();
|
reader.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -3964,48 +3964,6 @@ public class TestIndexWriter extends BaseTokenStreamTestCase {
|
||||||
dir.close();
|
dir.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
// LUCENE-1374
|
|
||||||
public void testMergeCompressedFields() throws IOException {
|
|
||||||
File indexDir = new File(System.getProperty("tempDir"), "mergecompressedfields");
|
|
||||||
Directory dir = FSDirectory.open(indexDir);
|
|
||||||
try {
|
|
||||||
for(int i=0;i<5;i++) {
|
|
||||||
// Must make a new writer & doc each time, w/
|
|
||||||
// different fields, so bulk merge of stored fields
|
|
||||||
// cannot run:
|
|
||||||
IndexWriter w = new IndexWriter(dir, new WhitespaceAnalyzer(), i==0, IndexWriter.MaxFieldLength.UNLIMITED);
|
|
||||||
try {
|
|
||||||
w.setMergeFactor(5);
|
|
||||||
w.setMergeScheduler(new SerialMergeScheduler());
|
|
||||||
Document doc = new Document();
|
|
||||||
doc.add(new Field("test1", "this is some data that will be compressed this this this", Field.Store.COMPRESS, Field.Index.NO));
|
|
||||||
doc.add(new Field("test2", new byte[20], Field.Store.COMPRESS));
|
|
||||||
doc.add(new Field("field" + i, "random field", Field.Store.NO, Field.Index.ANALYZED));
|
|
||||||
w.addDocument(doc);
|
|
||||||
} finally {
|
|
||||||
w.close();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
byte[] cmp = new byte[20];
|
|
||||||
|
|
||||||
IndexReader r = IndexReader.open(dir, true);
|
|
||||||
try {
|
|
||||||
for(int i=0;i<5;i++) {
|
|
||||||
Document doc = r.document(i);
|
|
||||||
assertEquals("this is some data that will be compressed this this this", doc.getField("test1").stringValue());
|
|
||||||
byte[] b = doc.getField("test2").binaryValue();
|
|
||||||
assertTrue(Arrays.equals(b, cmp));
|
|
||||||
}
|
|
||||||
} finally {
|
|
||||||
r.close();
|
|
||||||
}
|
|
||||||
} finally {
|
|
||||||
dir.close();
|
|
||||||
_TestUtil.rmDir(indexDir);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// LUCENE-1382
|
// LUCENE-1382
|
||||||
public void testCommitUserData() throws IOException {
|
public void testCommitUserData() throws IOException {
|
||||||
Directory dir = new MockRAMDirectory();
|
Directory dir = new MockRAMDirectory();
|
||||||
|
|
|
@ -87,7 +87,7 @@ public class TestSegmentMerger extends LuceneTestCase {
|
||||||
Collection stored = mergedReader.getFieldNames(IndexReader.FieldOption.INDEXED_WITH_TERMVECTOR);
|
Collection stored = mergedReader.getFieldNames(IndexReader.FieldOption.INDEXED_WITH_TERMVECTOR);
|
||||||
assertTrue(stored != null);
|
assertTrue(stored != null);
|
||||||
//System.out.println("stored size: " + stored.size());
|
//System.out.println("stored size: " + stored.size());
|
||||||
assertTrue("We do not have 4 fields that were indexed with term vector",stored.size() == 4);
|
assertTrue("We do not have 3 fields that were indexed with term vector",stored.size() == 3);
|
||||||
|
|
||||||
TermFreqVector vector = mergedReader.getTermFreqVector(0, DocHelper.TEXT_FIELD_2_KEY);
|
TermFreqVector vector = mergedReader.getTermFreqVector(0, DocHelper.TEXT_FIELD_2_KEY);
|
||||||
assertTrue(vector != null);
|
assertTrue(vector != null);
|
||||||
|
|
|
@ -200,6 +200,6 @@ public class TestSegmentReader extends LuceneTestCase {
|
||||||
|
|
||||||
TermFreqVector [] results = reader.getTermFreqVectors(0);
|
TermFreqVector [] results = reader.getTermFreqVectors(0);
|
||||||
assertTrue(results != null);
|
assertTrue(results != null);
|
||||||
assertTrue("We do not have 4 term freq vectors, we have: " + results.length, results.length == 4);
|
assertTrue("We do not have 3 term freq vectors, we have: " + results.length, results.length == 3);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue