diff --git a/CHANGES.txt b/CHANGES.txt index 16775619299..4223f2c5691 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -9,6 +9,9 @@ New features 1. LUCENE-503: New ThaiAnalyzer and ThaiWordFilter in contrib/analyzers (Samphan Raruenrom va Chris Hostetter) + 2. LUCENE-545: New FieldSelector API and associated changes to IndexReader and implementations. + New Fieldable interface for use with the lazy field loading mechanism. (Grant Ingersoll and Chuck Williams via Grant Ingersoll) + API Changes 1. LUCENE-438: Remove "final" from Token, implement Cloneable, allow diff --git a/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java b/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java index d48db2015e8..48206c0745b 100644 --- a/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java +++ b/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java @@ -16,20 +16,11 @@ package org.apache.lucene.index.memory; * limitations under the License. */ -import java.io.IOException; -import java.io.Serializable; -import java.util.Arrays; -import java.util.Collection; -import java.util.Collections; -import java.util.Comparator; -import java.util.HashMap; -import java.util.Iterator; -import java.util.Map; - import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.document.Document; +import org.apache.lucene.document.FieldSelector; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermDocs; @@ -43,6 +34,16 @@ import org.apache.lucene.search.Query; import org.apache.lucene.search.Searcher; import org.apache.lucene.search.Similarity; +import java.io.IOException; +import java.io.Serializable; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; +import java.util.Iterator; +import java.util.Map; + /** * High-performance single-document main memory Apache Lucene fulltext search index. * @@ -1004,8 +1005,14 @@ public class MemoryIndex { if (DEBUG) System.err.println("MemoryIndexReader.document"); return new Document(); // there are no stored fields } - - public boolean isDeleted(int n) { + + //When we convert to JDK 1.5 make this Set + public Document document(int n, FieldSelector fieldSelector) throws IOException { + if (DEBUG) System.err.println("MemoryIndexReader.document"); + return new Document(); // there are no stored fields + } + + public boolean isDeleted(int n) { if (DEBUG) System.err.println("MemoryIndexReader.isDeleted"); return false; } diff --git a/contrib/swing/src/java/org/apache/lucene/swing/models/ListSearcher.java b/contrib/swing/src/java/org/apache/lucene/swing/models/ListSearcher.java index 38a7558f9ef..48b34150834 100644 --- a/contrib/swing/src/java/org/apache/lucene/swing/models/ListSearcher.java +++ b/contrib/swing/src/java/org/apache/lucene/swing/models/ListSearcher.java @@ -22,6 +22,7 @@ import org.apache.lucene.analysis.WhitespaceAnalyzer; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.document.Fieldable; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.Hits; @@ -190,7 +191,7 @@ public class ListSearcher extends AbstractListModel { //tabble model row that we are mapping to for (int t=0; tThe boost is multiplied by {@link org.apache.lucene.document.Document#getBoost()} of the document + * containing this field. If a document has multiple fields with the same + * name, all such values are multiplied together. This product is then + * multipled by the value {@link org.apache.lucene.search.Similarity#lengthNorm(String,int)}, and + * rounded by {@link org.apache.lucene.search.Similarity#encodeNorm(float)} before it is stored in the + * index. One should attempt to ensure that this product does not overflow + * the range of that encoding. + * + * @see org.apache.lucene.document.Document#setBoost(float) + * @see org.apache.lucene.search.Similarity#lengthNorm(String, int) + * @see org.apache.lucene.search.Similarity#encodeNorm(float) + */ + public void setBoost(float boost) { + this.boost = boost; + } + + /** Returns the boost factor for hits for this field. + * + *

The default value is 1.0. + * + *

Note: this value is not stored directly with the document in the index. + * Documents returned from {@link org.apache.lucene.index.IndexReader#document(int)} and + * {@link org.apache.lucene.search.Hits#doc(int)} may thus not have the same value present as when + * this field was indexed. + * + * @see #setBoost(float) + */ + public float getBoost() { + return boost; + } + + /** Returns the name of the field as an interned string. + * For example "date", "title", "body", ... + */ + public String name() { return name; } + + protected void setStoreTermVector(Field.TermVector termVector) { + if (termVector == Field.TermVector.NO) { + this.storeTermVector = false; + this.storePositionWithTermVector = false; + this.storeOffsetWithTermVector = false; + } + else if (termVector == Field.TermVector.YES) { + this.storeTermVector = true; + this.storePositionWithTermVector = false; + this.storeOffsetWithTermVector = false; + } + else if (termVector == Field.TermVector.WITH_POSITIONS) { + this.storeTermVector = true; + this.storePositionWithTermVector = true; + this.storeOffsetWithTermVector = false; + } + else if (termVector == Field.TermVector.WITH_OFFSETS) { + this.storeTermVector = true; + this.storePositionWithTermVector = false; + this.storeOffsetWithTermVector = true; + } + else if (termVector == Field.TermVector.WITH_POSITIONS_OFFSETS) { + this.storeTermVector = true; + this.storePositionWithTermVector = true; + this.storeOffsetWithTermVector = true; + } + else { + throw new IllegalArgumentException("unknown termVector parameter " + termVector); + } + } + + /** True iff the value of the field is to be stored in the index for return + with search hits. It is an error for this to be true if a field is + Reader-valued. */ + public final boolean isStored() { return isStored; } + + /** True iff the value of the field is to be indexed, so that it may be + searched on. */ + public final boolean isIndexed() { return isIndexed; } + + /** True iff the value of the field should be tokenized as text prior to + indexing. Un-tokenized fields are indexed as a single word and may not be + Reader-valued. */ + public final boolean isTokenized() { return isTokenized; } + + /** True if the value of the field is stored and compressed within the index */ + public final boolean isCompressed() { return isCompressed; } + + /** True iff the term or terms used to index this field are stored as a term + * vector, available from {@link org.apache.lucene.index.IndexReader#getTermFreqVector(int,String)}. + * These methods do not provide access to the original content of the field, + * only to terms used to index it. If the original content must be + * preserved, use the stored attribute instead. + * + * @see org.apache.lucene.index.IndexReader#getTermFreqVector(int, String) + */ + public final boolean isTermVectorStored() { return storeTermVector; } + + /** + * True iff terms are stored as term vector together with their offsets + * (start and end positon in source text). + */ + public boolean isStoreOffsetWithTermVector(){ + return storeOffsetWithTermVector; + } + + /** + * True iff terms are stored as term vector together with their token positions. + */ + public boolean isStorePositionWithTermVector(){ + return storePositionWithTermVector; + } + + /** True iff the value of the filed is stored as binary */ + public final boolean isBinary() { return isBinary; } + + /** True if norms are omitted for this indexed field */ + public boolean getOmitNorms() { return omitNorms; } + + /** Expert: + * + * If set, omit normalization factors associated with this indexed field. + * This effectively disables indexing boosts and length normalization for this field. + */ + public void setOmitNorms(boolean omitNorms) { this.omitNorms=omitNorms; } + + public boolean isLazy() { + return lazy; + } + + /** Prints a Field for human consumption. */ + public final String toString() { + StringBuffer result = new StringBuffer(); + if (isStored) { + result.append("stored"); + if (isCompressed) + result.append("/compressed"); + else + result.append("/uncompressed"); + } + if (isIndexed) { + if (result.length() > 0) + result.append(","); + result.append("indexed"); + } + if (isTokenized) { + if (result.length() > 0) + result.append(","); + result.append("tokenized"); + } + if (storeTermVector) { + if (result.length() > 0) + result.append(","); + result.append("termVector"); + } + if (storeOffsetWithTermVector) { + if (result.length() > 0) + result.append(","); + result.append("termVectorOffsets"); + } + if (storePositionWithTermVector) { + if (result.length() > 0) + result.append(","); + result.append("termVectorPosition"); + } + if (isBinary) { + if (result.length() > 0) + result.append(","); + result.append("binary"); + } + if (omitNorms) { + result.append(",omitNorms"); + } + if (lazy){ + result.append(",lazy"); + } + result.append('<'); + result.append(name); + result.append(':'); + + if (fieldsData != null && lazy == false) { + result.append(fieldsData); + } + + result.append('>'); + return result.toString(); + } +} diff --git a/src/java/org/apache/lucene/document/Document.java b/src/java/org/apache/lucene/document/Document.java index 5fb0ce5e693..a19ca9fa48d 100644 --- a/src/java/org/apache/lucene/document/Document.java +++ b/src/java/org/apache/lucene/document/Document.java @@ -16,24 +16,21 @@ package org.apache.lucene.document; * limitations under the License. */ -import java.util.Enumeration; -import java.util.Iterator; -import java.util.List; -import java.util.ArrayList; -import java.util.Vector; -import org.apache.lucene.index.IndexReader; // for javadoc -import org.apache.lucene.search.Searcher; // for javadoc -import org.apache.lucene.search.Hits; // for javadoc +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.search.Hits; +import org.apache.lucene.search.Searcher; + +import java.util.*; // for javadoc /** Documents are the unit of indexing and search. * * A Document is a set of fields. Each field has a name and a textual value. - * A field may be {@link Field#isStored() stored} with the document, in which + * A field may be {@link Fieldable#isStored() stored} with the document, in which * case it is returned with search hits on the document. Thus each document * should typically contain one or more stored fields which uniquely identify * it. * - *

Note that fields which are not {@link Field#isStored() stored} are + *

Note that fields which are not {@link Fieldable#isStored() stored} are * not available in documents retrieved from the index, e.g. with {@link * Hits#doc(int)}, {@link Searcher#doc(int)} or {@link * IndexReader#document(int)}. @@ -50,11 +47,11 @@ public final class Document implements java.io.Serializable { /** Sets a boost factor for hits on any field of this document. This value * will be multiplied into the score of all hits on this document. * - *

Values are multiplied into the value of {@link Field#getBoost()} of + *

Values are multiplied into the value of {@link Fieldable#getBoost()} of * each field in this document. Thus, this method in effect sets a default * boost for the fields of this document. * - * @see Field#setBoost(float) + * @see Fieldable#setBoost(float) */ public void setBoost(float boost) { this.boost = boost; @@ -85,7 +82,7 @@ public final class Document implements java.io.Serializable { * a document has to be deleted from an index and a new changed version of that * document has to be added.

*/ - public final void add(Field field) { + public final void add(Fieldable field) { fields.add(field); } @@ -102,7 +99,7 @@ public final class Document implements java.io.Serializable { public final void removeField(String name) { Iterator it = fields.iterator(); while (it.hasNext()) { - Field field = (Field)it.next(); + Fieldable field = (Fieldable)it.next(); if (field.name().equals(name)) { it.remove(); return; @@ -122,7 +119,7 @@ public final class Document implements java.io.Serializable { public final void removeFields(String name) { Iterator it = fields.iterator(); while (it.hasNext()) { - Field field = (Field)it.next(); + Fieldable field = (Fieldable)it.next(); if (field.name().equals(name)) { it.remove(); } @@ -133,9 +130,9 @@ public final class Document implements java.io.Serializable { * null. If multiple fields exists with this name, this method returns the * first value added. */ - public final Field getField(String name) { + public final Fieldable getField(String name) { for (int i = 0; i < fields.size(); i++) { - Field field = (Field)fields.get(i); + Fieldable field = (Fieldable)fields.get(i); if (field.name().equals(name)) return field; } @@ -149,7 +146,7 @@ public final class Document implements java.io.Serializable { */ public final String get(String name) { for (int i = 0; i < fields.size(); i++) { - Field field = (Field)fields.get(i); + Fieldable field = (Fieldable)fields.get(i); if (field.name().equals(name) && (!field.isBinary())) return field.stringValue(); } @@ -162,16 +159,16 @@ public final class Document implements java.io.Serializable { } /** - * Returns an array of {@link Field}s with the given name. + * Returns an array of {@link Fieldable}s with the given name. * This method can return null. * * @param name the name of the field - * @return a Field[] array + * @return a Fieldable[] array */ - public final Field[] getFields(String name) { + public final Fieldable[] getFields(String name) { List result = new ArrayList(); for (int i = 0; i < fields.size(); i++) { - Field field = (Field)fields.get(i); + Fieldable field = (Fieldable)fields.get(i); if (field.name().equals(name)) { result.add(field); } @@ -180,7 +177,7 @@ public final class Document implements java.io.Serializable { if (result.size() == 0) return null; - return (Field[])result.toArray(new Field[result.size()]); + return (Fieldable[])result.toArray(new Fieldable[result.size()]); } /** @@ -193,7 +190,7 @@ public final class Document implements java.io.Serializable { public final String[] getValues(String name) { List result = new ArrayList(); for (int i = 0; i < fields.size(); i++) { - Field field = (Field)fields.get(i); + Fieldable field = (Fieldable)fields.get(i); if (field.name().equals(name) && (!field.isBinary())) result.add(field.stringValue()); } @@ -215,7 +212,7 @@ public final class Document implements java.io.Serializable { public final byte[][] getBinaryValues(String name) { List result = new ArrayList(); for (int i = 0; i < fields.size(); i++) { - Field field = (Field)fields.get(i); + Fieldable field = (Fieldable)fields.get(i); if (field.name().equals(name) && (field.isBinary())) result.add(field.binaryValue()); } @@ -237,7 +234,7 @@ public final class Document implements java.io.Serializable { */ public final byte[] getBinaryValue(String name) { for (int i=0; i < fields.size(); i++) { - Field field = (Field)fields.get(i); + Fieldable field = (Fieldable)fields.get(i); if (field.name().equals(name) && (field.isBinary())) return field.binaryValue(); } @@ -249,7 +246,7 @@ public final class Document implements java.io.Serializable { StringBuffer buffer = new StringBuffer(); buffer.append("Document<"); for (int i = 0; i < fields.size(); i++) { - Field field = (Field)fields.get(i); + Fieldable field = (Fieldable)fields.get(i); buffer.append(field.toString()); if (i != fields.size()-1) buffer.append(" "); diff --git a/src/java/org/apache/lucene/document/Field.java b/src/java/org/apache/lucene/document/Field.java index 30a134efdbc..3902c1adb7e 100644 --- a/src/java/org/apache/lucene/document/Field.java +++ b/src/java/org/apache/lucene/document/Field.java @@ -16,9 +16,6 @@ package org.apache.lucene.document; * limitations under the License. */ -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.search.Hits; -import org.apache.lucene.search.Similarity; import org.apache.lucene.util.Parameter; import java.io.Reader; @@ -32,23 +29,7 @@ import java.io.Serializable; index, so that they may be returned with hits on the document. */ -public final class Field implements Serializable { - private String name = "body"; - - // the one and only data object for all different kind of field values - private Object fieldsData = null; - - private boolean storeTermVector = false; - private boolean storeOffsetWithTermVector = false; - private boolean storePositionWithTermVector = false; - private boolean omitNorms = false; - private boolean isStored = false; - private boolean isIndexed = true; - private boolean isTokenized = true; - private boolean isBinary = false; - private boolean isCompressed = false; - - private float boost = 1.0f; +public final class Field extends AbstractField implements Fieldable, Serializable { /** Specifies whether and how a field should be stored. */ public static final class Store extends Parameter implements Serializable { @@ -146,45 +127,7 @@ public final class Field implements Serializable { public static final TermVector WITH_POSITIONS_OFFSETS = new TermVector("WITH_POSITIONS_OFFSETS"); } - /** Sets the boost factor hits on this field. This value will be - * multiplied into the score of all hits on this this field of this - * document. - * - *

The boost is multiplied by {@link Document#getBoost()} of the document - * containing this field. If a document has multiple fields with the same - * name, all such values are multiplied together. This product is then - * multipled by the value {@link Similarity#lengthNorm(String,int)}, and - * rounded by {@link Similarity#encodeNorm(float)} before it is stored in the - * index. One should attempt to ensure that this product does not overflow - * the range of that encoding. - * - * @see Document#setBoost(float) - * @see Similarity#lengthNorm(String, int) - * @see Similarity#encodeNorm(float) - */ - public void setBoost(float boost) { - this.boost = boost; - } - - /** Returns the boost factor for hits for this field. - * - *

The default value is 1.0. - * - *

Note: this value is not stored directly with the document in the index. - * Documents returned from {@link IndexReader#document(int)} and - * {@link Hits#doc(int)} may thus not have the same value present as when - * this field was indexed. - * - * @see #setBoost(float) - */ - public float getBoost() { - return boost; - } - /** Returns the name of the field as an interned string. - * For example "date", "title", "body", ... - */ - public String name() { return name; } - + /** The value of the field as a String, or null. If null, the Reader value * or binary value is used. Exactly one of stringValue(), readerValue(), and * binaryValue() must be set. */ @@ -365,146 +308,6 @@ public final class Field implements Serializable { setStoreTermVector(TermVector.NO); } - - private void setStoreTermVector(TermVector termVector) { - if (termVector == TermVector.NO) { - this.storeTermVector = false; - this.storePositionWithTermVector = false; - this.storeOffsetWithTermVector = false; - } - else if (termVector == TermVector.YES) { - this.storeTermVector = true; - this.storePositionWithTermVector = false; - this.storeOffsetWithTermVector = false; - } - else if (termVector == TermVector.WITH_POSITIONS) { - this.storeTermVector = true; - this.storePositionWithTermVector = true; - this.storeOffsetWithTermVector = false; - } - else if (termVector == TermVector.WITH_OFFSETS) { - this.storeTermVector = true; - this.storePositionWithTermVector = false; - this.storeOffsetWithTermVector = true; - } - else if (termVector == TermVector.WITH_POSITIONS_OFFSETS) { - this.storeTermVector = true; - this.storePositionWithTermVector = true; - this.storeOffsetWithTermVector = true; - } - else { - throw new IllegalArgumentException("unknown termVector parameter " + termVector); - } - } - - /** True iff the value of the field is to be stored in the index for return - with search hits. It is an error for this to be true if a field is - Reader-valued. */ - public final boolean isStored() { return isStored; } - /** True iff the value of the field is to be indexed, so that it may be - searched on. */ - public final boolean isIndexed() { return isIndexed; } - - /** True iff the value of the field should be tokenized as text prior to - indexing. Un-tokenized fields are indexed as a single word and may not be - Reader-valued. */ - public final boolean isTokenized() { return isTokenized; } - - /** True if the value of the field is stored and compressed within the index */ - public final boolean isCompressed() { return isCompressed; } - - /** True iff the term or terms used to index this field are stored as a term - * vector, available from {@link IndexReader#getTermFreqVector(int,String)}. - * These methods do not provide access to the original content of the field, - * only to terms used to index it. If the original content must be - * preserved, use the stored attribute instead. - * - * @see IndexReader#getTermFreqVector(int, String) - */ - public final boolean isTermVectorStored() { return storeTermVector; } - - /** - * True iff terms are stored as term vector together with their offsets - * (start and end positon in source text). - */ - public boolean isStoreOffsetWithTermVector(){ - return storeOffsetWithTermVector; - } - - /** - * True iff terms are stored as term vector together with their token positions. - */ - public boolean isStorePositionWithTermVector(){ - return storePositionWithTermVector; - } - - /** True iff the value of the filed is stored as binary */ - public final boolean isBinary() { return isBinary; } - - /** True if norms are omitted for this indexed field */ - public boolean getOmitNorms() { return omitNorms; } - - /** Expert: - * - * If set, omit normalization factors associated with this indexed field. - * This effectively disables indexing boosts and length normalization for this field. - */ - public void setOmitNorms(boolean omitNorms) { this.omitNorms=omitNorms; } - - /** Prints a Field for human consumption. */ - public final String toString() { - StringBuffer result = new StringBuffer(); - if (isStored) { - result.append("stored"); - if (isCompressed) - result.append("/compressed"); - else - result.append("/uncompressed"); - } - if (isIndexed) { - if (result.length() > 0) - result.append(","); - result.append("indexed"); - } - if (isTokenized) { - if (result.length() > 0) - result.append(","); - result.append("tokenized"); - } - if (storeTermVector) { - if (result.length() > 0) - result.append(","); - result.append("termVector"); - } - if (storeOffsetWithTermVector) { - if (result.length() > 0) - result.append(","); - result.append("termVectorOffsets"); - } - if (storePositionWithTermVector) { - if (result.length() > 0) - result.append(","); - result.append("termVectorPosition"); - } - if (isBinary) { - if (result.length() > 0) - result.append(","); - result.append("binary"); - } - if (omitNorms) { - result.append(",omitNorms"); - } - result.append('<'); - result.append(name); - result.append(':'); - - if (fieldsData != null) { - result.append(fieldsData); - } - - result.append('>'); - return result.toString(); - } } diff --git a/src/java/org/apache/lucene/document/FieldSelector.java b/src/java/org/apache/lucene/document/FieldSelector.java new file mode 100755 index 00000000000..fb6efdce4e0 --- /dev/null +++ b/src/java/org/apache/lucene/document/FieldSelector.java @@ -0,0 +1,24 @@ +package org.apache.lucene.document; +/** + * Created by IntelliJ IDEA. + * User: Grant Ingersoll + * Date: Apr 14, 2006 + * Time: 5:29:26 PM + * $Id:$ + * Copyright 2005. Center For Natural Language Processing + */ + +/** + * Similar to a {@link java.io.FileFilter}, the FieldSelector allows one to make decisions about + * what Fields get loaded on a {@link Document} by {@link org.apache.lucene.index.IndexReader#document(int,org.apache.lucene.document.FieldSelector)} + * + **/ +public interface FieldSelector { + + /** + * + * @param fieldName + * @return true if the {@link Field} with fieldName should be loaded or not + */ + FieldSelectorResult accept(String fieldName); +} diff --git a/src/java/org/apache/lucene/document/FieldSelectorResult.java b/src/java/org/apache/lucene/document/FieldSelectorResult.java new file mode 100755 index 00000000000..f3cea6c4537 --- /dev/null +++ b/src/java/org/apache/lucene/document/FieldSelectorResult.java @@ -0,0 +1,44 @@ +package org.apache.lucene.document; +/** + * Created by IntelliJ IDEA. + * User: Grant Ingersoll + * Date: Apr 14, 2006 + * Time: 5:40:17 PM + * $Id:$ + * Copyright 2005. Center For Natural Language Processing + */ + +/** + * Provides information about what should be done with this Field + * + **/ +//Replace with an enumerated type in 1.5 +public final class FieldSelectorResult { + + public static final FieldSelectorResult LOAD = new FieldSelectorResult(0); + public static final FieldSelectorResult LAZY_LOAD = new FieldSelectorResult(1); + public static final FieldSelectorResult NO_LOAD = new FieldSelectorResult(2); + public static final FieldSelectorResult LOAD_AND_BREAK = new FieldSelectorResult(3); + + private int id; + + private FieldSelectorResult(int id) + { + this.id = id; + } + + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + + final FieldSelectorResult that = (FieldSelectorResult) o; + + if (id != that.id) return false; + + return true; + } + + public int hashCode() { + return id; + } +} diff --git a/src/java/org/apache/lucene/document/Fieldable.java b/src/java/org/apache/lucene/document/Fieldable.java new file mode 100755 index 00000000000..58494382b26 --- /dev/null +++ b/src/java/org/apache/lucene/document/Fieldable.java @@ -0,0 +1,137 @@ +package org.apache.lucene.document; + +/** + * Copyright 2004 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.Reader; +import java.io.Serializable; + +/** + * Synonymous with {@link Field}. + * + **/ +public interface Fieldable extends Serializable { + /** Sets the boost factor hits on this field. This value will be + * multiplied into the score of all hits on this this field of this + * document. + * + *

The boost is multiplied by {@link org.apache.lucene.document.Document#getBoost()} of the document + * containing this field. If a document has multiple fields with the same + * name, all such values are multiplied together. This product is then + * multipled by the value {@link org.apache.lucene.search.Similarity#lengthNorm(String,int)}, and + * rounded by {@link org.apache.lucene.search.Similarity#encodeNorm(float)} before it is stored in the + * index. One should attempt to ensure that this product does not overflow + * the range of that encoding. + * + * @see org.apache.lucene.document.Document#setBoost(float) + * @see org.apache.lucene.search.Similarity#lengthNorm(String, int) + * @see org.apache.lucene.search.Similarity#encodeNorm(float) + */ + void setBoost(float boost); + + /** Returns the boost factor for hits for this field. + * + *

The default value is 1.0. + * + *

Note: this value is not stored directly with the document in the index. + * Documents returned from {@link org.apache.lucene.index.IndexReader#document(int)} and + * {@link org.apache.lucene.search.Hits#doc(int)} may thus not have the same value present as when + * this field was indexed. + * + * @see #setBoost(float) + */ + float getBoost(); + + /** Returns the name of the field as an interned string. + * For example "date", "title", "body", ... + */ + String name(); + + /** The value of the field as a String, or null. If null, the Reader value + * or binary value is used. Exactly one of stringValue(), readerValue(), and + * binaryValue() must be set. */ + String stringValue(); + + /** The value of the field as a Reader, or null. If null, the String value + * or binary value is used. Exactly one of stringValue(), readerValue(), + * and binaryValue() must be set. */ + Reader readerValue(); + + /** The value of the field in Binary, or null. If null, the Reader or + * String value is used. Exactly one of stringValue(), readerValue() and + * binaryValue() must be set. */ + byte[] binaryValue(); + + /** True iff the value of the field is to be stored in the index for return + with search hits. It is an error for this to be true if a field is + Reader-valued. */ + boolean isStored(); + + /** True iff the value of the field is to be indexed, so that it may be + searched on. */ + boolean isIndexed(); + + /** True iff the value of the field should be tokenized as text prior to + indexing. Un-tokenized fields are indexed as a single word and may not be + Reader-valued. */ + boolean isTokenized(); + + /** True if the value of the field is stored and compressed within the index */ + boolean isCompressed(); + + /** True iff the term or terms used to index this field are stored as a term + * vector, available from {@link org.apache.lucene.index.IndexReader#getTermFreqVector(int,String)}. + * These methods do not provide access to the original content of the field, + * only to terms used to index it. If the original content must be + * preserved, use the stored attribute instead. + * + * @see org.apache.lucene.index.IndexReader#getTermFreqVector(int, String) + */ + boolean isTermVectorStored(); + + /** + * True iff terms are stored as term vector together with their offsets + * (start and end positon in source text). + */ + boolean isStoreOffsetWithTermVector(); + + /** + * True iff terms are stored as term vector together with their token positions. + */ + boolean isStorePositionWithTermVector(); + + /** True iff the value of the filed is stored as binary */ + boolean isBinary(); + + /** True if norms are omitted for this indexed field */ + boolean getOmitNorms(); + + /** Expert: + * + * If set, omit normalization factors associated with this indexed field. + * This effectively disables indexing boosts and length normalization for this field. + */ + void setOmitNorms(boolean omitNorms); + + /** + * Indicates whether a Field is Lazy or not. The semantics of Lazy loading are such that if a Field is lazily loaded, retrieving + * it's values via {@link #stringValue()} or {@link #binaryValue()} is only valid as long as the {@link org.apache.lucene.index.IndexReader} that + * retrieved the {@link Document} is still open. + * + * @return true if this field can be loaded lazily + */ + boolean isLazy(); +} diff --git a/src/java/org/apache/lucene/document/LoadFirstFieldSelector.java b/src/java/org/apache/lucene/document/LoadFirstFieldSelector.java new file mode 100755 index 00000000000..b11a0f7e649 --- /dev/null +++ b/src/java/org/apache/lucene/document/LoadFirstFieldSelector.java @@ -0,0 +1,22 @@ +package org.apache.lucene.document; +/** + * Created by IntelliJ IDEA. + * User: Grant Ingersoll + * Date: Apr 15, 2006 + * Time: 10:13:07 AM + * $Id:$ + * Copyright 2005. Center For Natural Language Processing + */ + + +/** + * Load the First field and break. + *

+ * See {@link FieldSelectorResult#LOAD_AND_BREAK} + */ +public class LoadFirstFieldSelector implements FieldSelector { + + public FieldSelectorResult accept(String fieldName) { + return FieldSelectorResult.LOAD_AND_BREAK; + } +} \ No newline at end of file diff --git a/src/java/org/apache/lucene/document/MapFieldSelector.java b/src/java/org/apache/lucene/document/MapFieldSelector.java new file mode 100644 index 00000000000..02cc06743e3 --- /dev/null +++ b/src/java/org/apache/lucene/document/MapFieldSelector.java @@ -0,0 +1,57 @@ +/* + * MapFieldSelector.java + * + * Created on May 2, 2006, 6:49 PM + * + */ + +package org.apache.lucene.document; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * A FieldSelector based on a Map of field names to FieldSelectorResults + * + * @author Chuck Williams + */ +public class MapFieldSelector implements FieldSelector { + + Map fieldSelections; + + /** Create a a MapFieldSelector + * @param fieldSelections maps from field names to FieldSelectorResults + */ + public MapFieldSelector(Map fieldSelections) { + this.fieldSelections = fieldSelections; + } + + /** Create a a MapFieldSelector + * @param fields fields to LOAD. All other fields are NO_LOAD. + */ + public MapFieldSelector(List fields) { + fieldSelections = new HashMap(fields.size()*5/3); + for (int i=0; ifieldsToLoad and lazyFieldsToLoad, lazy has precedence. + * + * @param fieldName The {@link Field} name to check + * @return The {@link FieldSelectorResult} + */ + public FieldSelectorResult accept(String fieldName) { + FieldSelectorResult result = FieldSelectorResult.NO_LOAD; + if (fieldsToLoad.contains(fieldName) == true){ + result = FieldSelectorResult.LOAD; + } + if (lazyFieldsToLoad.contains(fieldName) == true){ + result = FieldSelectorResult.LAZY_LOAD; + } + return result; + } +} \ No newline at end of file diff --git a/src/java/org/apache/lucene/index/DocumentWriter.java b/src/java/org/apache/lucene/index/DocumentWriter.java index 250496ea382..d2b407e8142 100644 --- a/src/java/org/apache/lucene/index/DocumentWriter.java +++ b/src/java/org/apache/lucene/index/DocumentWriter.java @@ -16,22 +16,22 @@ package org.apache.lucene.index; * limitations under the License. */ +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.Token; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Fieldable; +import org.apache.lucene.search.Similarity; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IndexOutput; + import java.io.IOException; import java.io.PrintStream; import java.io.Reader; import java.io.StringReader; -import java.util.Hashtable; -import java.util.Enumeration; import java.util.Arrays; - -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.Token; -import org.apache.lucene.store.Directory; -import org.apache.lucene.store.IndexOutput; -import org.apache.lucene.search.Similarity; +import java.util.Enumeration; +import java.util.Hashtable; final class DocumentWriter { private Analyzer analyzer; @@ -129,7 +129,7 @@ final class DocumentWriter { throws IOException { Enumeration fields = doc.fields(); while (fields.hasMoreElements()) { - Field field = (Field) fields.nextElement(); + Fieldable field = (Fieldable) fields.nextElement(); String fieldName = field.name(); int fieldNumber = fieldInfos.fieldNumber(fieldName); diff --git a/src/java/org/apache/lucene/index/FieldInfos.java b/src/java/org/apache/lucene/index/FieldInfos.java index e35f79f5c64..16148f3d436 100644 --- a/src/java/org/apache/lucene/index/FieldInfos.java +++ b/src/java/org/apache/lucene/index/FieldInfos.java @@ -16,18 +16,17 @@ package org.apache.lucene.index; * limitations under the License. */ -import java.util.*; -import java.io.IOException; - import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; - +import org.apache.lucene.document.Fieldable; import org.apache.lucene.store.Directory; -import org.apache.lucene.store.IndexOutput; import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.IndexOutput; -/** Access to the Field Info file that describes document fields and whether or - * not they are indexed. Each segment has a separate Field Info file. Objects +import java.io.IOException; +import java.util.*; + +/** Access to the Fieldable Info file that describes document fields and whether or + * not they are indexed. Each segment has a separate Fieldable Info file. Objects * of this class are thread-safe for multiple readers, but only one thread can * be adding documents at a time, with no other reader or writer threads * accessing this object. @@ -65,7 +64,7 @@ final class FieldInfos { public void add(Document doc) { Enumeration fields = doc.fields(); while (fields.hasMoreElements()) { - Field field = (Field) fields.nextElement(); + Fieldable field = (Fieldable) fields.nextElement(); add(field.name(), field.isIndexed(), field.isTermVectorStored(), field.isStorePositionWithTermVector(), field.isStoreOffsetWithTermVector(), field.getOmitNorms()); } @@ -105,7 +104,7 @@ final class FieldInfos { /** * Calls 5 parameter add with false for all TermVector parameters. * - * @param name The name of the Field + * @param name The name of the Fieldable * @param isIndexed true if the field is indexed * @see #add(String, boolean, boolean, boolean, boolean) */ diff --git a/src/java/org/apache/lucene/index/FieldReaderException.java b/src/java/org/apache/lucene/index/FieldReaderException.java new file mode 100755 index 00000000000..eb8d3a75886 --- /dev/null +++ b/src/java/org/apache/lucene/index/FieldReaderException.java @@ -0,0 +1,70 @@ +package org.apache.lucene.index; +/** + * Created by IntelliJ IDEA. + * User: Grant Ingersoll + * Date: Jan 12, 2006 + * Time: 9:37:43 AM + * $Id:$ + * Copyright 2005. Center For Natural Language Processing + */ + +/** + * + * + **/ +public class FieldReaderException extends RuntimeException{ + /** + * Constructs a new runtime exception with null as its + * detail message. The cause is not initialized, and may subsequently be + * initialized by a call to {@link #initCause}. + */ + public FieldReaderException() { + } + + /** + * Constructs a new runtime exception with the specified cause and a + * detail message of (cause==null ? null : cause.toString()) + * (which typically contains the class and detail message of + * cause). This constructor is useful for runtime exceptions + * that are little more than wrappers for other throwables. + * + * @param cause the cause (which is saved for later retrieval by the + * {@link #getCause()} method). (A null value is + * permitted, and indicates that the cause is nonexistent or + * unknown.) + * @since 1.4 + */ + public FieldReaderException(Throwable cause) { + super(cause); + } + + /** + * Constructs a new runtime exception with the specified detail message. + * The cause is not initialized, and may subsequently be initialized by a + * call to {@link #initCause}. + * + * @param message the detail message. The detail message is saved for + * later retrieval by the {@link #getMessage()} method. + */ + public FieldReaderException(String message) { + super(message); + } + + /** + * Constructs a new runtime exception with the specified detail message and + * cause.

Note that the detail message associated with + * cause is not automatically incorporated in + * this runtime exception's detail message. + * + * @param message the detail message (which is saved for later retrieval + * by the {@link #getMessage()} method). + * @param cause the cause (which is saved for later retrieval by the + * {@link #getCause()} method). (A null value is + * permitted, and indicates that the cause is nonexistent or + * unknown.) + * @since 1.4 + */ + public FieldReaderException(String message, Throwable cause) { + super(message, cause); + } +} diff --git a/src/java/org/apache/lucene/index/FieldsReader.java b/src/java/org/apache/lucene/index/FieldsReader.java index ecb9f2986a2..e950204e2b0 100644 --- a/src/java/org/apache/lucene/index/FieldsReader.java +++ b/src/java/org/apache/lucene/index/FieldsReader.java @@ -16,19 +16,19 @@ package org.apache.lucene.index; * limitations under the License. */ -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.util.zip.DataFormatException; -import java.util.zip.Inflater; - -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; +import org.apache.lucene.document.*; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IndexInput; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.Reader; +import java.util.zip.DataFormatException; +import java.util.zip.Inflater; + /** * Class responsible for access to stored document fields. - * + *

* It uses <segment>.fdt and <segment>.fdx; files. * * @version $Id$ @@ -39,25 +39,37 @@ final class FieldsReader { private IndexInput indexStream; private int size; + private static ThreadLocal fieldsStreamTL = new ThreadLocal(); + FieldsReader(Directory d, String segment, FieldInfos fn) throws IOException { fieldInfos = fn; fieldsStream = d.openInput(segment + ".fdt"); indexStream = d.openInput(segment + ".fdx"); - - size = (int)(indexStream.length() / 8); + size = (int) (indexStream.length() / 8); } + /** + * Cloeses the underlying {@link org.apache.lucene.store.IndexInput} streams, including any ones associated with a + * lazy implementation of a Field. This means that the Fields values will not be accessible. + * + * @throws IOException + */ final void close() throws IOException { fieldsStream.close(); indexStream.close(); + IndexInput localFieldsStream = (IndexInput) fieldsStreamTL.get(); + if (localFieldsStream != null) { + localFieldsStream.close(); + fieldsStreamTL.set(null); + } } final int size() { return size; } - final Document doc(int n) throws IOException { + final Document doc(int n, FieldSelector fieldSelector) throws IOException { indexStream.seek(n * 8L); long position = indexStream.readLong(); fieldsStream.seek(position); @@ -67,89 +79,277 @@ final class FieldsReader { for (int i = 0; i < numFields; i++) { int fieldNumber = fieldsStream.readVInt(); FieldInfo fi = fieldInfos.fieldInfo(fieldNumber); - - byte bits = fieldsStream.readByte(); + FieldSelectorResult acceptField = fieldSelector == null ? FieldSelectorResult.LOAD : fieldSelector.accept(fi.name); + boolean lazy = acceptField.equals(FieldSelectorResult.LAZY_LOAD) == true; + byte bits = fieldsStream.readByte(); boolean compressed = (bits & FieldsWriter.FIELD_IS_COMPRESSED) != 0; boolean tokenize = (bits & FieldsWriter.FIELD_IS_TOKENIZED) != 0; - - if ((bits & FieldsWriter.FIELD_IS_BINARY) != 0) { - final byte[] b = new byte[fieldsStream.readVInt()]; - fieldsStream.readBytes(b, 0, b.length); - if (compressed) - doc.add(new Field(fi.name, uncompress(b), Field.Store.COMPRESS)); - else - doc.add(new Field(fi.name, b, Field.Store.YES)); + boolean binary = (bits & FieldsWriter.FIELD_IS_BINARY) != 0; + if (acceptField.equals(FieldSelectorResult.LOAD) == true) { + addField(doc, fi, binary, compressed, tokenize); } + else if (acceptField.equals(FieldSelectorResult.LOAD_AND_BREAK) == true){ + addField(doc, fi, binary, compressed, tokenize); + break;//Get out of this loop + } + else if (lazy == true){ + addFieldLazy(doc, fi, binary, compressed, tokenize); + } else { - Field.Index index; - Field.Store store = Field.Store.YES; - - if (fi.isIndexed && tokenize) - index = Field.Index.TOKENIZED; - else if (fi.isIndexed && !tokenize) - index = Field.Index.UN_TOKENIZED; - else - index = Field.Index.NO; - - Field.TermVector termVector = null; - if (fi.storeTermVector) { - if (fi.storeOffsetWithTermVector) { - if (fi.storePositionWithTermVector) { - termVector = Field.TermVector.WITH_POSITIONS_OFFSETS; - } - else { - termVector = Field.TermVector.WITH_OFFSETS; - } - } - else if (fi.storePositionWithTermVector) { - termVector = Field.TermVector.WITH_POSITIONS; - } - else { - termVector = Field.TermVector.YES; - } - } - else { - termVector = Field.TermVector.NO; - } - - if (compressed) { - store = Field.Store.COMPRESS; - final byte[] b = new byte[fieldsStream.readVInt()]; - fieldsStream.readBytes(b, 0, b.length); - Field f = new Field(fi.name, // field name - new String(uncompress(b), "UTF-8"), // uncompress the value and add as string - store, - index, - termVector); - f.setOmitNorms(fi.omitNorms); - doc.add(f); - } - else { - Field f = new Field(fi.name, // name - fieldsStream.readString(), // read value - store, - index, - termVector); - f.setOmitNorms(fi.omitNorms); - doc.add(f); - } + skipField(binary, compressed); } } return doc; } - + + /** + * Skip the field. We still have to read some of the information about the field, but can skip past the actual content. + * This will have the most payoff on large fields. + */ + private void skipField(boolean binary, boolean compressed) throws IOException { + + int toRead = fieldsStream.readVInt(); + + if (binary || compressed) { + long pointer = fieldsStream.getFilePointer(); + fieldsStream.seek(pointer + toRead); + } else { + //We need to skip chars. This will slow us down, but still better + fieldsStream.skipChars(toRead); + } + } + + private void addFieldLazy(Document doc, FieldInfo fi, boolean binary, boolean compressed, boolean tokenize) throws IOException { + if (binary == true) { + int toRead = fieldsStream.readVInt(); + long pointer = fieldsStream.getFilePointer(); + if (compressed) { + //was: doc.add(new Fieldable(fi.name, uncompress(b), Fieldable.Store.COMPRESS)); + doc.add(new LazyField(fi.name, Field.Store.COMPRESS, toRead, pointer)); + } else { + //was: doc.add(new Fieldable(fi.name, b, Fieldable.Store.YES)); + doc.add(new LazyField(fi.name, Field.Store.YES, toRead, pointer)); + } + //Need to move the pointer ahead by toRead positions + fieldsStream.seek(pointer + toRead); + } else { + Field.Store store = Field.Store.YES; + Field.Index index = getIndexType(fi, tokenize); + Field.TermVector termVector = getTermVectorType(fi); + + Fieldable f; + if (compressed) { + store = Field.Store.COMPRESS; + int toRead = fieldsStream.readVInt(); + long pointer = fieldsStream.getFilePointer(); + f = new LazyField(fi.name, store, toRead, pointer); + //skip over the part that we aren't loading + fieldsStream.seek(pointer + toRead); + f.setOmitNorms(fi.omitNorms); + } else { + int length = fieldsStream.readVInt(); + long pointer = fieldsStream.getFilePointer(); + //Skip ahead of where we are by the length of what is stored + fieldsStream.skipChars(length); + f = new LazyField(fi.name, store, index, termVector, length, pointer); + f.setOmitNorms(fi.omitNorms); + } + doc.add(f); + } + + } + + private void addField(Document doc, FieldInfo fi, boolean binary, boolean compressed, boolean tokenize) throws IOException { + + //we have a binary stored field, and it may be compressed + if (binary) { + int toRead = fieldsStream.readVInt(); + final byte[] b = new byte[toRead]; + fieldsStream.readBytes(b, 0, b.length); + if (compressed) + doc.add(new Field(fi.name, uncompress(b), Field.Store.COMPRESS)); + else + doc.add(new Field(fi.name, b, Field.Store.YES)); + + } else { + Field.Store store = Field.Store.YES; + Field.Index index = getIndexType(fi, tokenize); + Field.TermVector termVector = getTermVectorType(fi); + + Fieldable f; + if (compressed) { + store = Field.Store.COMPRESS; + int toRead = fieldsStream.readVInt(); + + final byte[] b = new byte[toRead]; + fieldsStream.readBytes(b, 0, b.length); + f = new Field(fi.name, // field name + new String(uncompress(b), "UTF-8"), // uncompress the value and add as string + store, + index, + termVector); + f.setOmitNorms(fi.omitNorms); + } else { + f = new Field(fi.name, // name + fieldsStream.readString(), // read value + store, + index, + termVector); + f.setOmitNorms(fi.omitNorms); + } + doc.add(f); + } + } + + private Field.TermVector getTermVectorType(FieldInfo fi) { + Field.TermVector termVector = null; + if (fi.storeTermVector) { + if (fi.storeOffsetWithTermVector) { + if (fi.storePositionWithTermVector) { + termVector = Field.TermVector.WITH_POSITIONS_OFFSETS; + } else { + termVector = Field.TermVector.WITH_OFFSETS; + } + } else if (fi.storePositionWithTermVector) { + termVector = Field.TermVector.WITH_POSITIONS; + } else { + termVector = Field.TermVector.YES; + } + } else { + termVector = Field.TermVector.NO; + } + return termVector; + } + + private Field.Index getIndexType(FieldInfo fi, boolean tokenize) { + Field.Index index; + if (fi.isIndexed && tokenize) + index = Field.Index.TOKENIZED; + else if (fi.isIndexed && !tokenize) + index = Field.Index.UN_TOKENIZED; + else + index = Field.Index.NO; + return index; + } + + /** + * A Lazy implementation of Fieldable that differs loading of fields until asked for, instead of when the Document is + * loaded. + */ + private class LazyField extends AbstractField implements Fieldable { + private int toRead; + private long pointer; + //internal buffer + private char[] chars; + + + public LazyField(String name, Field.Store store, int toRead, long pointer) { + super(name, store, Field.Index.NO, Field.TermVector.NO); + this.toRead = toRead; + this.pointer = pointer; + lazy = true; + } + + public LazyField(String name, Field.Store store, Field.Index index, Field.TermVector termVector, int toRead, long pointer) { + super(name, store, index, termVector); + this.toRead = toRead; + this.pointer = pointer; + lazy = true; + } + + /** + * The value of the field in Binary, or null. If null, the Reader or + * String value is used. Exactly one of stringValue(), readerValue() and + * binaryValue() must be set. + */ + public byte[] binaryValue() { + if (fieldsData == null) { + final byte[] b = new byte[toRead]; + IndexInput localFieldsStream = (IndexInput) fieldsStreamTL.get(); + if (localFieldsStream == null) { + localFieldsStream = (IndexInput) fieldsStream.clone(); + fieldsStreamTL.set(localFieldsStream); + } + //Throw this IO Exception since IndexREader.document does so anyway, so probably not that big of a change for people + //since they are already handling this exception when getting the document + try { + localFieldsStream.seek(pointer); + localFieldsStream.readBytes(b, 0, b.length); + if (isCompressed == true) { + fieldsData = uncompress(b); + } else { + fieldsData = b; + } + } catch (IOException e) { + throw new FieldReaderException(e); + } + } + return fieldsData instanceof byte[] ? (byte[]) fieldsData : null; + } + + /** + * The value of the field as a Reader, or null. If null, the String value + * or binary value is used. Exactly one of stringValue(), readerValue(), + * and binaryValue() must be set. + */ + public Reader readerValue() { + return fieldsData instanceof Reader ? (Reader) fieldsData : null; + } + + /** + * The value of the field as a String, or null. If null, the Reader value + * or binary value is used. Exactly one of stringValue(), readerValue(), and + * binaryValue() must be set. + */ + public String stringValue() { + if (fieldsData == null) { + IndexInput localFieldsStream = (IndexInput) fieldsStreamTL.get(); + if (localFieldsStream == null) { + localFieldsStream = (IndexInput) fieldsStream.clone(); + fieldsStreamTL.set(localFieldsStream); + } + try { + localFieldsStream.seek(pointer); + //read in chars b/c we already know the length we need to read + if (chars == null || toRead > chars.length) + chars = new char[toRead]; + localFieldsStream.readChars(chars, 0, toRead); + fieldsData = new String(chars, 0, toRead);//fieldsStream.readString(); + } catch (IOException e) { + throw new FieldReaderException(e); + } + } + return fieldsData instanceof String ? (String) fieldsData : null; + } + + public long getPointer() { + return pointer; + } + + public void setPointer(long pointer) { + this.pointer = pointer; + } + + public int getToRead() { + return toRead; + } + + public void setToRead(int toRead) { + this.toRead = toRead; + } + } + private final byte[] uncompress(final byte[] input) - throws IOException - { - + throws IOException { + Inflater decompressor = new Inflater(); decompressor.setInput(input); - + // Create an expandable byte array to hold the decompressed data ByteArrayOutputStream bos = new ByteArrayOutputStream(input.length); - + // Decompress the data byte[] buf = new byte[1024]; while (!decompressor.finished()) { diff --git a/src/java/org/apache/lucene/index/FilterIndexReader.java b/src/java/org/apache/lucene/index/FilterIndexReader.java index 4934d0adec0..c8bba18740a 100644 --- a/src/java/org/apache/lucene/index/FilterIndexReader.java +++ b/src/java/org/apache/lucene/index/FilterIndexReader.java @@ -17,6 +17,8 @@ package org.apache.lucene.index; */ import org.apache.lucene.document.Document; +import org.apache.lucene.document.FieldSelector; + import java.io.IOException; import java.util.Collection; @@ -100,7 +102,7 @@ public class FilterIndexReader extends IndexReader { public int numDocs() { return in.numDocs(); } public int maxDoc() { return in.maxDoc(); } - public Document document(int n) throws IOException { return in.document(n); } + public Document document(int n, FieldSelector fieldSelector) throws IOException { return in.document(n, fieldSelector); } public boolean isDeleted(int n) { return in.isDeleted(n); } public boolean hasDeletions() { return in.hasDeletions(); } @@ -133,7 +135,7 @@ public class FilterIndexReader extends IndexReader { protected void doCommit() throws IOException { in.commit(); } protected void doClose() throws IOException { in.close(); } - + public Collection getFieldNames(IndexReader.FieldOption fieldNames) { return in.getFieldNames(fieldNames); } diff --git a/src/java/org/apache/lucene/index/IndexModifier.java b/src/java/org/apache/lucene/index/IndexModifier.java index cfc52bbfd93..a13541a5eb0 100644 --- a/src/java/org/apache/lucene/index/IndexModifier.java +++ b/src/java/org/apache/lucene/index/IndexModifier.java @@ -273,7 +273,7 @@ public class IndexModifier { } } - + /** * Returns the number of documents currently in this index. * @see IndexWriter#docCount() @@ -407,7 +407,7 @@ public class IndexModifier { * the number of files open in a FSDirectory. * *

The default value is 10. - * + * * @see IndexWriter#setMaxBufferedDocs(int) * @throws IllegalStateException if the index is closed * @throws IllegalArgumentException if maxBufferedDocs is smaller than 2 @@ -500,8 +500,8 @@ public class IndexModifier { // create an index in /tmp/index, overwriting an existing one: IndexModifier indexModifier = new IndexModifier("/tmp/index", analyzer, true); Document doc = new Document(); - doc.add(new Field("id", "1", Field.Store.YES, Field.Index.UN_TOKENIZED)); - doc.add(new Field("body", "a simple test", Field.Store.YES, Field.Index.TOKENIZED)); + doc.add(new Fieldable("id", "1", Fieldable.Store.YES, Fieldable.Index.UN_TOKENIZED)); + doc.add(new Fieldable("body", "a simple test", Fieldable.Store.YES, Fieldable.Index.TOKENIZED)); indexModifier.addDocument(doc); int deleted = indexModifier.delete(new Term("id", "1")); System.out.println("Deleted " + deleted + " document"); diff --git a/src/java/org/apache/lucene/index/IndexReader.java b/src/java/org/apache/lucene/index/IndexReader.java index 07e39f0bc14..47f9d580543 100644 --- a/src/java/org/apache/lucene/index/IndexReader.java +++ b/src/java/org/apache/lucene/index/IndexReader.java @@ -17,7 +17,7 @@ package org.apache.lucene.index; */ import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; +import org.apache.lucene.document.FieldSelector; import org.apache.lucene.search.Similarity; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; @@ -42,7 +42,7 @@ import java.util.Collection; document in the index. These document numbers are ephemeral--they may change as documents are added to and deleted from an index. Clients should thus not rely on a given document having the same number between sessions. - +

An IndexReader can be opened on a directory for which an IndexWriter is opened already, but it cannot be used to delete documents from the index then. @@ -50,13 +50,13 @@ import java.util.Collection; @version $Id$ */ public abstract class IndexReader { - + public static final class FieldOption { private String option; private FieldOption() { } private FieldOption(String option) { this.option = option; - } + } public String toString() { return this.option; } @@ -79,7 +79,7 @@ public abstract class IndexReader { // all fields where termvectors with offset and position values set public static final FieldOption TERMVECTOR_WITH_POSITION_OFFSET = new FieldOption ("TERMVECTOR_WITH_POSITION_OFFSET"); } - + /** * Constructor used if IndexReader is not owner of its directory. * This is used for IndexReaders that are used within other IndexReaders that take care or locking directories. @@ -89,7 +89,7 @@ public abstract class IndexReader { protected IndexReader(Directory directory) { this.directory = directory; } - + /** * Constructor used if IndexReader is owner of its directory. * If IndexReader is owner of its directory, it locks its directory in case of write operations. @@ -117,7 +117,7 @@ public abstract class IndexReader { private Lock writeLock; private boolean stale; private boolean hasChanges; - + /** Returns an IndexReader reading the index in an FSDirectory in the named path. */ @@ -130,7 +130,7 @@ public abstract class IndexReader { public static IndexReader open(File path) throws IOException { return open(FSDirectory.getDirectory(path, false), true); } - + /** Returns an IndexReader reading the index in the given Directory. */ public static IndexReader open(final Directory directory) throws IOException { return open(directory, false); @@ -151,7 +151,7 @@ public abstract class IndexReader { for (int i = 0; i < infos.size(); i++) readers[i] = SegmentReader.get(infos.info(i)); return new MultiReader(directory, infos, closeDirectory, readers); - + } }.run(); } @@ -160,7 +160,7 @@ public abstract class IndexReader { /** Returns the directory this index resides in. */ public Directory directory() { return directory; } - /** + /** * Returns the time the index in the named directory was last modified. * Do not use this to check whether the reader is still up-to-date, use * {@link #isCurrent()} instead. @@ -169,7 +169,7 @@ public abstract class IndexReader { return lastModified(new File(directory)); } - /** + /** * Returns the time the index in the named directory was last modified. * Do not use this to check whether the reader is still up-to-date, use * {@link #isCurrent()} instead. @@ -178,7 +178,7 @@ public abstract class IndexReader { return FSDirectory.fileModified(directory, IndexFileNames.SEGMENTS); } - /** + /** * Returns the time the index in the named directory was last modified. * Do not use this to check whether the reader is still up-to-date, use * {@link #isCurrent()} instead. @@ -228,12 +228,12 @@ public abstract class IndexReader { public static long getCurrentVersion(Directory directory) throws IOException { synchronized (directory) { // in- & inter-process sync Lock commitLock=directory.makeLock(IndexWriter.COMMIT_LOCK_NAME); - + boolean locked=false; - + try { locked=commitLock.obtain(IndexWriter.COMMIT_LOCK_TIMEOUT); - + return SegmentInfos.readCurrentVersion(directory); } finally { if (locked) { @@ -242,7 +242,7 @@ public abstract class IndexReader { } } } - + /** * Version number when this IndexReader was opened. */ @@ -260,12 +260,12 @@ public abstract class IndexReader { public boolean isCurrent() throws IOException { synchronized (directory) { // in- & inter-process sync Lock commitLock=directory.makeLock(IndexWriter.COMMIT_LOCK_NAME); - + boolean locked=false; - + try { locked=commitLock.obtain(IndexWriter.COMMIT_LOCK_TIMEOUT); - + return SegmentInfos.readCurrentVersion(directory) == segmentInfos.getVersion(); } finally { if (locked) { @@ -292,7 +292,7 @@ public abstract class IndexReader { abstract public TermFreqVector[] getTermFreqVectors(int docNumber) throws IOException; - + /** * Return a term frequency vector for the specified document and field. The * returned vector contains terms and frequencies for the terms in @@ -309,7 +309,7 @@ public abstract class IndexReader { */ abstract public TermFreqVector getTermFreqVector(int docNumber, String field) throws IOException; - + /** * Returns true if an index exists at the specified directory. * If the directory does not exist or if there is no index in it. @@ -353,14 +353,40 @@ public abstract class IndexReader { /** Returns the stored fields of the nth Document in this index. */ - public abstract Document document(int n) throws IOException; + public Document document(int n) throws IOException{ + return document(n, null); + } + + /** + * Get the {@link org.apache.lucene.document.Document} at the nth position. The {@link org.apache.lucene.document.FieldSelector} + * may be used to determine what {@link org.apache.lucene.document.Field}s to load and how they should be loaded. + * + * NOTE: If this Reader (more specifically, the underlying {@link FieldsReader} is closed before the lazy {@link org.apache.lucene.document.Field} is + * loaded an exception may be thrown. If you want the value of a lazy {@link org.apache.lucene.document.Field} to be available after closing you must + * explicitly load it or fetch the Document again with a new loader. + * + * + * @param n Get the document at the nth position + * @param fieldSelector The {@link org.apache.lucene.document.FieldSelector} to use to determine what Fields should be loaded on the Document. May be null, in which case all Fields will be loaded. + * @return The stored fields of the {@link org.apache.lucene.document.Document} at the nth position + * @throws IOException If there is a problem reading this document + * + * @see org.apache.lucene.document.Fieldable + * @see org.apache.lucene.document.FieldSelector + * @see org.apache.lucene.document.SetBasedFieldSelector + * @see org.apache.lucene.document.LoadFirstFieldSelector + */ + //When we convert to JDK 1.5 make this Set + public abstract Document document(int n, FieldSelector fieldSelector) throws IOException; + + /** Returns true if document n has been deleted */ public abstract boolean isDeleted(int n); /** Returns true if any documents have been deleted */ public abstract boolean hasDeletions(); - + /** Returns true if there are norms stored for this field. */ public boolean hasNorms(String field) throws IOException { // backward compatible implementation. @@ -371,21 +397,21 @@ public abstract class IndexReader { /** Returns the byte-encoded normalization factor for the named field of * every document. This is used by the search code to score documents. * - * @see Field#setBoost(float) + * @see org.apache.lucene.document.Field#setBoost(float) */ public abstract byte[] norms(String field) throws IOException; /** Reads the byte-encoded normalization factor for the named field of every * document. This is used by the search code to score documents. * - * @see Field#setBoost(float) + * @see org.apache.lucene.document.Field#setBoost(float) */ public abstract void norms(String field, byte[] bytes, int offset) throws IOException; /** Expert: Resets the normalization factor for the named field of the named * document. The norm represents the product of the field's {@link - * Field#setBoost(float) boost} and its {@link Similarity#lengthNorm(String, + * Fieldable#setBoost(float) boost} and its {@link Similarity#lengthNorm(String, * int) length normalization}. Thus, to preserve the length normalization * values when resetting this, one should base the new value upon the old. * @@ -399,9 +425,9 @@ public abstract class IndexReader { doSetNorm(doc, field, value); hasChanges = true; } - + /** Implements setNorm in subclass.*/ - protected abstract void doSetNorm(int doc, String field, byte value) + protected abstract void doSetNorm(int doc, String field, byte value) throws IOException; /** Expert: Resets the normalization factor for the named field of the named @@ -554,7 +580,7 @@ public abstract class IndexReader { doUndeleteAll(); hasChanges = true; } - + /** Implements actual undeleteAll() in subclass. */ protected abstract void doUndeleteAll() throws IOException; @@ -586,10 +612,10 @@ public abstract class IndexReader { } hasChanges = false; } - + /** Implements commit. */ protected abstract void doCommit() throws IOException; - + /** * Closes files associated with this index. * Also saves any new deletions to disk. @@ -613,7 +639,7 @@ public abstract class IndexReader { } } - + /** * Get a list of unique field names that exist in this index and have the specified * field option information. @@ -659,7 +685,7 @@ public abstract class IndexReader { directory.makeLock(IndexWriter.WRITE_LOCK_NAME).release(); directory.makeLock(IndexWriter.COMMIT_LOCK_NAME).release(); } - + /** * Prints the filename and size of each file within a given compound file. * Add the -extract flag to extract files to the current working directory. @@ -686,7 +712,7 @@ public abstract class IndexReader { Directory dir = null; CompoundFileReader cfr = null; - + try { File file = new File(filename); String dirname = file.getAbsoluteFile().getParent(); @@ -696,7 +722,7 @@ public abstract class IndexReader { String [] files = cfr.list(); Arrays.sort(files); // sort the array of filename so that the output is more readable - + for (int i = 0; i < files.length; ++i) { long len = cfr.fileLength(files[i]); @@ -705,7 +731,7 @@ public abstract class IndexReader { IndexInput ii = cfr.openInput(files[i]); FileOutputStream f = new FileOutputStream(files[i]); - + // read and write with a small buffer, which is more effectiv than reading byte by byte byte[] buffer = new byte[1024]; int chunk = buffer.length; @@ -715,7 +741,7 @@ public abstract class IndexReader { f.write(buffer, 0, bufLen); len -= bufLen; } - + f.close(); ii.close(); } diff --git a/src/java/org/apache/lucene/index/MultiReader.java b/src/java/org/apache/lucene/index/MultiReader.java index 5acde77185c..610a7018b7d 100644 --- a/src/java/org/apache/lucene/index/MultiReader.java +++ b/src/java/org/apache/lucene/index/MultiReader.java @@ -17,11 +17,14 @@ package org.apache.lucene.index; */ import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; +import org.apache.lucene.document.FieldSelector; import org.apache.lucene.store.Directory; import java.io.IOException; -import java.util.*; +import java.util.Collection; +import java.util.HashSet; +import java.util.Hashtable; +import java.util.Set; /** An IndexReader which reads multiple indexes, appending their content. * @@ -99,9 +102,9 @@ public class MultiReader extends IndexReader { return maxDoc; } - public Document document(int n) throws IOException { + public Document document(int n, FieldSelector fieldSelector) throws IOException { int i = readerIndex(n); // find segment num - return subReaders[i].document(n - starts[i]); // dispatch to segment reader + return subReaders[i].document(n - starts[i], fieldSelector); // dispatch to segment reader } public boolean isDeleted(int n) { diff --git a/src/java/org/apache/lucene/index/ParallelReader.java b/src/java/org/apache/lucene/index/ParallelReader.java index 7e5e72026ae..43470c5fd16 100644 --- a/src/java/org/apache/lucene/index/ParallelReader.java +++ b/src/java/org/apache/lucene/index/ParallelReader.java @@ -16,20 +16,24 @@ package org.apache.lucene.index; * limitations under the License. */ -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Enumeration; -import java.util.HashSet; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.SortedMap; -import java.util.TreeMap; - import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; +import org.apache.lucene.document.Fieldable; +import org.apache.lucene.document.FieldSelector; +import org.apache.lucene.document.FieldSelectorResult; + +import java.io.IOException; +import java.util.SortedMap; +import java.util.ArrayList; +import java.util.List; +import java.util.HashMap; +import java.util.Map; +import java.util.TreeMap; +import java.util.Collection; +import java.util.Iterator; +import java.util.Enumeration; +import java.util.Set; +import java.util.HashSet; + /** An IndexReader which reads multiple, parallel indexes. Each index added * must have the same number of documents, but typically each contains @@ -41,7 +45,7 @@ import org.apache.lucene.document.Field; * change rarely and small fields that change more frequently. The smaller * fields may be re-indexed in a new index and both indexes may be searched * together. - * + * *

Warning: It is up to you to make sure all indexes * are created and modified the same way. For example, if you add * documents to one index, you need to add the same documents in the @@ -51,7 +55,8 @@ import org.apache.lucene.document.Field; public class ParallelReader extends IndexReader { private List readers = new ArrayList(); private SortedMap fieldToReader = new TreeMap(); - private List storedFieldReaders = new ArrayList(); + private Map readerToFields = new HashMap(); + private List storedFieldReaders = new ArrayList(); private int maxDoc; private int numDocs; @@ -59,7 +64,7 @@ public class ParallelReader extends IndexReader { /** Construct a ParallelReader. */ public ParallelReader() throws IOException { super(null); } - + /** Add an IndexReader. */ public void add(IndexReader reader) throws IOException { add(reader, false); @@ -68,10 +73,10 @@ public class ParallelReader extends IndexReader { /** Add an IndexReader whose stored fields will not be returned. This can * accellerate search when stored fields are only needed from a subset of * the IndexReaders. - * - * @throws IllegalArgumentException if not all indexes contain the same number + * + * @throws IllegalArgumentException if not all indexes contain the same number * of documents - * @throws IllegalArgumentException if not all indexes have the same value + * @throws IllegalArgumentException if not all indexes have the same value * of {@link IndexReader#maxDoc()} */ public void add(IndexReader reader, boolean ignoreStoredFields) @@ -89,8 +94,10 @@ public class ParallelReader extends IndexReader { if (reader.numDocs() != numDocs) throw new IllegalArgumentException ("All readers must have same numDocs: "+numDocs+"!="+reader.numDocs()); - - Iterator i = reader.getFieldNames(IndexReader.FieldOption.ALL).iterator(); + + Collection fields = reader.getFieldNames(IndexReader.FieldOption.ALL); + readerToFields.put(reader, fields); + Iterator i = fields.iterator(); while (i.hasNext()) { // update fieldToReader map String field = (String)i.next(); if (fieldToReader.get(field) == null) @@ -132,13 +139,25 @@ public class ParallelReader extends IndexReader { } // append fields from storedFieldReaders - public Document document(int n) throws IOException { + public Document document(int n, FieldSelector fieldSelector) throws IOException { Document result = new Document(); for (int i = 0; i < storedFieldReaders.size(); i++) { IndexReader reader = (IndexReader)storedFieldReaders.get(i); - Enumeration fields = reader.document(n).fields(); - while (fields.hasMoreElements()) { - result.add((Field)fields.nextElement()); + + boolean include = (fieldSelector==null); + if (!include) { + Iterator it = ((Collection) readerToFields.get(reader)).iterator(); + while (it.hasNext()) + if (fieldSelector.accept((String)it.next())!=FieldSelectorResult.NO_LOAD) { + include = true; + break; + } + } + if (include) { + Enumeration fields = reader.document(n, fieldSelector).fields(); + while (fields.hasMoreElements()) { + result.add((Fieldable)fields.nextElement()); + } } } return result; diff --git a/src/java/org/apache/lucene/index/SegmentMerger.java b/src/java/org/apache/lucene/index/SegmentMerger.java index d4fd203db69..b4ec5549514 100644 --- a/src/java/org/apache/lucene/index/SegmentMerger.java +++ b/src/java/org/apache/lucene/index/SegmentMerger.java @@ -120,7 +120,7 @@ final class SegmentMerger { files.add(segment + "." + IndexFileNames.COMPOUND_EXTENSIONS[i]); } - // Field norm files + // Fieldable norm files for (int i = 0; i < fieldInfos.size(); i++) { FieldInfo fi = fieldInfos.fieldInfo(i); if (fi.isIndexed && !fi.omitNorms) { diff --git a/src/java/org/apache/lucene/index/SegmentReader.java b/src/java/org/apache/lucene/index/SegmentReader.java index 520e4a6bab3..782a731cff0 100644 --- a/src/java/org/apache/lucene/index/SegmentReader.java +++ b/src/java/org/apache/lucene/index/SegmentReader.java @@ -16,16 +16,16 @@ package org.apache.lucene.index; * limitations under the License. */ -import java.io.IOException; -import java.util.*; - import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; +import org.apache.lucene.document.FieldSelector; +import org.apache.lucene.search.DefaultSimilarity; +import org.apache.lucene.store.Directory; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; -import org.apache.lucene.store.Directory; import org.apache.lucene.util.BitVector; -import org.apache.lucene.search.DefaultSimilarity; + +import java.io.IOException; +import java.util.*; /** * @version $Id$ @@ -277,11 +277,11 @@ class SegmentReader extends IndexReader { return tis.terms(t); } - public synchronized Document document(int n) throws IOException { + public synchronized Document document(int n, FieldSelector fieldSelector) throws IOException { if (isDeleted(n)) throw new IllegalArgumentException ("attempt to access a deleted document"); - return fieldsReader.doc(n); + return fieldsReader.doc(n, fieldSelector); } public synchronized boolean isDeleted(int n) { diff --git a/src/java/org/apache/lucene/index/TermVectorsReader.java b/src/java/org/apache/lucene/index/TermVectorsReader.java index dc7005e6dc8..2d8b5cb9c17 100644 --- a/src/java/org/apache/lucene/index/TermVectorsReader.java +++ b/src/java/org/apache/lucene/index/TermVectorsReader.java @@ -127,7 +127,7 @@ class TermVectorsReader implements Cloneable { result = readTermVector(field, position); } else { - //System.out.println("Field not found"); + //System.out.println("Fieldable not found"); } } else { //System.out.println("No tvx file"); diff --git a/src/java/org/apache/lucene/index/TermVectorsWriter.java b/src/java/org/apache/lucene/index/TermVectorsWriter.java index 7e9f839c32c..80c0448240c 100644 --- a/src/java/org/apache/lucene/index/TermVectorsWriter.java +++ b/src/java/org/apache/lucene/index/TermVectorsWriter.java @@ -150,7 +150,7 @@ final class TermVectorsWriter { return currentField != null; } - /** Add term to the field's term vector. Field must already be open. + /** Add term to the field's term vector. Fieldable must already be open. * Terms should be added in * increasing order of terms, one call per unique termNum. ProxPointer * is a pointer into the TermPosition file (prx). Freq is the number of @@ -268,7 +268,7 @@ final class TermVectorsWriter { private void writeField() throws IOException { // remember where this field is written currentField.tvfPointer = tvf.getFilePointer(); - //System.out.println("Field Pointer: " + currentField.tvfPointer); + //System.out.println("Fieldable Pointer: " + currentField.tvfPointer); final int size = terms.size(); tvf.writeVInt(size); diff --git a/src/java/org/apache/lucene/search/FieldCacheImpl.java b/src/java/org/apache/lucene/search/FieldCacheImpl.java index fb9fde2d79b..2accf016a45 100644 --- a/src/java/org/apache/lucene/search/FieldCacheImpl.java +++ b/src/java/org/apache/lucene/search/FieldCacheImpl.java @@ -20,7 +20,6 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermDocs; import org.apache.lucene.index.TermEnum; -import org.apache.lucene.search.FieldCache.StringIndex; // required by GCJ import java.io.IOException; import java.util.Locale; @@ -43,7 +42,7 @@ implements FieldCache { /** Expert: Every key in the internal cache is of this type. */ static class Entry { - final String field; // which Field + final String field; // which Fieldable final int type; // which SortField type final Object custom; // which custom comparator final Locale locale; // the locale we're sorting (if string) diff --git a/src/java/org/apache/lucene/search/FieldDocSortedHitQueue.java b/src/java/org/apache/lucene/search/FieldDocSortedHitQueue.java index 59f1568b76d..f47e066e08e 100644 --- a/src/java/org/apache/lucene/search/FieldDocSortedHitQueue.java +++ b/src/java/org/apache/lucene/search/FieldDocSortedHitQueue.java @@ -45,7 +45,7 @@ extends PriorityQueue { /** * Creates a hit queue sorted by the given list of fields. - * @param fields Field names, in priority order (highest priority first). + * @param fields Fieldable names, in priority order (highest priority first). * @param size The number of hits to retain. Must be greater than zero. */ FieldDocSortedHitQueue (SortField[] fields, int size) { diff --git a/src/java/org/apache/lucene/search/FieldSortedHitQueue.java b/src/java/org/apache/lucene/search/FieldSortedHitQueue.java index 528f6eb1e7a..67c4aafab0d 100644 --- a/src/java/org/apache/lucene/search/FieldSortedHitQueue.java +++ b/src/java/org/apache/lucene/search/FieldSortedHitQueue.java @@ -44,7 +44,7 @@ extends PriorityQueue { /** * Creates a hit queue sorted by the given list of fields. * @param reader Index to use. - * @param fields Field names, in priority order (highest priority first). Cannot be null or empty. + * @param fields Fieldable names, in priority order (highest priority first). Cannot be null or empty. * @param size The number of hits to retain. Must be greater than zero. * @throws IOException */ @@ -212,7 +212,7 @@ extends PriorityQueue { /** * Returns a comparator for sorting hits according to a field containing integers. * @param reader Index to use. - * @param fieldname Field containg integer values. + * @param fieldname Fieldable containg integer values. * @return Comparator for sorting hits. * @throws IOException If an error occurs reading the index. */ @@ -243,7 +243,7 @@ extends PriorityQueue { /** * Returns a comparator for sorting hits according to a field containing floats. * @param reader Index to use. - * @param fieldname Field containg float values. + * @param fieldname Fieldable containg float values. * @return Comparator for sorting hits. * @throws IOException If an error occurs reading the index. */ @@ -274,7 +274,7 @@ extends PriorityQueue { /** * Returns a comparator for sorting hits according to a field containing strings. * @param reader Index to use. - * @param fieldname Field containg string values. + * @param fieldname Fieldable containg string values. * @return Comparator for sorting hits. * @throws IOException If an error occurs reading the index. */ @@ -305,7 +305,7 @@ extends PriorityQueue { /** * Returns a comparator for sorting hits according to a field containing strings. * @param reader Index to use. - * @param fieldname Field containg string values. + * @param fieldname Fieldable containg string values. * @return Comparator for sorting hits. * @throws IOException If an error occurs reading the index. */ @@ -336,7 +336,7 @@ extends PriorityQueue { * floats or strings. Once the type is determined, one of the other static methods * in this class is called to get the comparator. * @param reader Index to use. - * @param fieldname Field containg values. + * @param fieldname Fieldable containg values. * @return Comparator for sorting hits. * @throws IOException If an error occurs reading the index. */ diff --git a/src/java/org/apache/lucene/search/Similarity.java b/src/java/org/apache/lucene/search/Similarity.java index 4674dd2da0f..1e6e152bf55 100644 --- a/src/java/org/apache/lucene/search/Similarity.java +++ b/src/java/org/apache/lucene/search/Similarity.java @@ -16,19 +16,16 @@ package org.apache.lucene.search; * limitations under the License. */ +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.util.SmallFloat; + import java.io.IOException; import java.io.Serializable; - import java.util.Collection; import java.util.Iterator; -import org.apache.lucene.index.Term; - -import org.apache.lucene.index.IndexReader; // for javadoc -import org.apache.lucene.index.IndexWriter; // for javadoc -import org.apache.lucene.document.Field; // for javadoc -import org.apache.lucene.util.SmallFloat; - /** Expert: Scoring API. *

Subclasses implement search scoring. * @@ -44,7 +41,7 @@ import org.apache.lucene.util.SmallFloat; * ( {@link #tf(int) tf}(t in d) * * {@link #idf(Term,Searcher) idf}(t)^2 * * {@link Query#getBoost getBoost}(t in q) * - * {@link Field#getBoost getBoost}(t.field in d) * + * {@link org.apache.lucene.document.Field#getBoost getBoost}(t.field in d) * * {@link #lengthNorm(String,int) lengthNorm}(t.field in d) ) * *  * @@ -152,7 +149,7 @@ public abstract class Similarity implements Serializable { * fieldName of doc. * @return a normalization factor for hits on this field of this document * - * @see Field#setBoost(float) + * @see org.apache.lucene.document.Field#setBoost(float) */ public abstract float lengthNorm(String fieldName, int numTokens); @@ -179,7 +176,7 @@ public abstract class Similarity implements Serializable { * small to represent are rounded up to the smallest positive representable * value. * - * @see Field#setBoost(float) + * @see org.apache.lucene.document.Field#setBoost(float) * @see SmallFloat */ public static byte encodeNorm(float f) { diff --git a/src/java/org/apache/lucene/search/Sort.java b/src/java/org/apache/lucene/search/Sort.java index eea965e0baf..2168f42e772 100644 --- a/src/java/org/apache/lucene/search/Sort.java +++ b/src/java/org/apache/lucene/search/Sort.java @@ -29,7 +29,7 @@ import java.io.Serializable; * and does not need to be stored (unless you happen to want it back with the * rest of your document data). In other words: * - *

document.add (new Field ("byNumber", Integer.toString(x), Field.Store.NO, Field.Index.UN_TOKENIZED));

+ *

document.add (new Fieldable ("byNumber", Integer.toString(x), Fieldable.Store.NO, Fieldable.Index.UN_TOKENIZED));

* * *

Valid Types of Values

diff --git a/src/java/org/apache/lucene/search/SortComparatorSource.java b/src/java/org/apache/lucene/search/SortComparatorSource.java index c4508571a52..aecd5798bb3 100644 --- a/src/java/org/apache/lucene/search/SortComparatorSource.java +++ b/src/java/org/apache/lucene/search/SortComparatorSource.java @@ -19,7 +19,7 @@ extends Serializable { /** * Creates a comparator for the field in the given index. * @param reader Index to create comparator for. - * @param fieldname Field to create comparator for. + * @param fieldname Fieldable to create comparator for. * @return Comparator of ScoreDoc objects. * @throws IOException If an error occurs reading the index. */ diff --git a/src/java/org/apache/lucene/store/IndexInput.java b/src/java/org/apache/lucene/store/IndexInput.java index 91376f5d0e5..bdd2ca96363 100644 --- a/src/java/org/apache/lucene/store/IndexInput.java +++ b/src/java/org/apache/lucene/store/IndexInput.java @@ -116,6 +116,32 @@ public abstract class IndexInput implements Cloneable { } } + /** + * Expert + * + * Similar to {@link #readChars(char[], int, int)} but does not do any conversion operations on the bytes it is reading in. It still + * has to invoke {@link #readByte()} just as {@link #readChars(char[], int, int)} does, but it does not need a buffer to store anything + * and it does not have to do any of the bitwise operations, since we don't actually care what is in the byte except to determine + * how many more bytes to read + * @param length The number of chars to read + */ + public void skipChars(int length) throws IOException{ + for (int i = 0; i < length; i++) { + byte b = readByte(); + if ((b & 0x80) == 0){ + //do nothing, we only need one byte + } + else if ((b & 0xE0) != 0xE0) { + readByte();//read an additional byte + } else{ + //read two additional bytes. + readByte(); + readByte(); + } + } + } + + /** Closes the stream to futher operations. */ public abstract void close() throws IOException; diff --git a/src/test/org/apache/lucene/document/TestBinaryDocument.java b/src/test/org/apache/lucene/document/TestBinaryDocument.java index 94b8712f991..fb57b89a0eb 100644 --- a/src/test/org/apache/lucene/document/TestBinaryDocument.java +++ b/src/test/org/apache/lucene/document/TestBinaryDocument.java @@ -38,10 +38,10 @@ public class TestBinaryDocument extends TestCase public void testBinaryFieldInIndex() throws Exception { - Field binaryFldStored = new Field("binaryStored", binaryValStored.getBytes(), Field.Store.YES); - Field binaryFldCompressed = new Field("binaryCompressed", binaryValCompressed.getBytes(), Field.Store.COMPRESS); - Field stringFldStored = new Field("stringStored", binaryValStored, Field.Store.YES, Field.Index.NO, Field.TermVector.NO); - Field stringFldCompressed = new Field("stringCompressed", binaryValCompressed, Field.Store.COMPRESS, Field.Index.NO, Field.TermVector.NO); + Fieldable binaryFldStored = new Field("binaryStored", binaryValStored.getBytes(), Field.Store.YES); + Fieldable binaryFldCompressed = new Field("binaryCompressed", binaryValCompressed.getBytes(), Field.Store.COMPRESS); + Fieldable stringFldStored = new Field("stringStored", binaryValStored, Field.Store.YES, Field.Index.NO, Field.TermVector.NO); + Fieldable stringFldCompressed = new Field("stringCompressed", binaryValCompressed, Field.Store.COMPRESS, Field.Index.NO, Field.TermVector.NO); try { // binary fields with store off are not allowed diff --git a/src/test/org/apache/lucene/document/TestDocument.java b/src/test/org/apache/lucene/document/TestDocument.java index 937c7fff4f2..4ef931b25c2 100644 --- a/src/test/org/apache/lucene/document/TestDocument.java +++ b/src/test/org/apache/lucene/document/TestDocument.java @@ -46,9 +46,9 @@ public class TestDocument extends TestCase throws Exception { Document doc = new Document(); - Field stringFld = new Field("string", binaryVal, Field.Store.YES, Field.Index.NO); - Field binaryFld = new Field("binary", binaryVal.getBytes(), Field.Store.YES); - Field binaryFld2 = new Field("binary", binaryVal2.getBytes(), Field.Store.YES); + Fieldable stringFld = new Field("string", binaryVal, Field.Store.YES, Field.Index.NO); + Fieldable binaryFld = new Field("binary", binaryVal.getBytes(), Field.Store.YES); + Fieldable binaryFld2 = new Field("binary", binaryVal2.getBytes(), Field.Store.YES); doc.add(stringFld); doc.add(binaryFld); diff --git a/src/test/org/apache/lucene/index/DocHelper.java b/src/test/org/apache/lucene/index/DocHelper.java index c6fafeaa484..51a8eb73491 100644 --- a/src/test/org/apache/lucene/index/DocHelper.java +++ b/src/test/org/apache/lucene/index/DocHelper.java @@ -18,12 +18,12 @@ package org.apache.lucene.index; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.WhitespaceAnalyzer; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; +import org.apache.lucene.document.*; import org.apache.lucene.search.Similarity; import org.apache.lucene.store.Directory; import java.io.IOException; +import java.io.UnsupportedEncodingException; import java.util.HashMap; import java.util.Map; import java.util.Enumeration; @@ -39,6 +39,13 @@ class DocHelper { public static final int [] FIELD_2_FREQS = {3, 1, 1}; public static final String TEXT_FIELD_2_KEY = "textField2"; public static Field textField2 = new Field(TEXT_FIELD_2_KEY, FIELD_2_TEXT, Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS); + + public static final String FIELD_2_COMPRESSED_TEXT = "field field field two text"; + //Fields will be lexicographically sorted. So, the order is: field, text, two + public static final int [] COMPRESSED_FIELD_2_FREQS = {3, 1, 1}; + public static final String COMPRESSED_TEXT_FIELD_2_KEY = "compressedTextField2"; + public static Field compressedTextField2 = new Field(COMPRESSED_TEXT_FIELD_2_KEY, FIELD_2_COMPRESSED_TEXT, Field.Store.COMPRESS, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS); + public static final String FIELD_3_TEXT = "aaaNoNorms aaaNoNorms bbbNoNorms"; public static final String TEXT_FIELD_3_KEY = "textField3"; @@ -71,6 +78,34 @@ class DocHelper { public static Field unStoredField2 = new Field(UNSTORED_FIELD_2_KEY, UNSTORED_2_FIELD_TEXT, Field.Store.NO, Field.Index.TOKENIZED, Field.TermVector.YES); + public static final String LAZY_FIELD_BINARY_KEY = "lazyFieldBinary"; + public static byte [] LAZY_FIELD_BINARY_BYTES; + public static Field lazyFieldBinary; + + public static final String LAZY_FIELD_KEY = "lazyField"; + public static final String LAZY_FIELD_TEXT = "These are some field bytes"; + public static Field lazyField = new Field(LAZY_FIELD_KEY, LAZY_FIELD_TEXT, Field.Store.YES, Field.Index.TOKENIZED); + + public static final String LARGE_LAZY_FIELD_KEY = "largeLazyField"; + public static String LARGE_LAZY_FIELD_TEXT; + public static Field largeLazyField; + + //From Issue 509 + public static final String FIELD_UTF1_TEXT = "field one \u4e00text"; + public static final String TEXT_FIELD_UTF1_KEY = "textField1Utf8"; + public static Field textUtfField1 = new Field(TEXT_FIELD_UTF1_KEY, FIELD_UTF1_TEXT, + Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.NO); + + public static final String FIELD_UTF2_TEXT = "field field field \u4e00two text"; + //Fields will be lexicographically sorted. So, the order is: field, text, two + public static final int [] FIELD_UTF2_FREQS = {3, 1, 1}; + public static final String TEXT_FIELD_UTF2_KEY = "textField2Utf8"; + public static Field textUtfField2 = new Field(TEXT_FIELD_UTF2_KEY, FIELD_UTF2_TEXT, Field.Store.YES, + Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS); + + + + public static Map nameValues = null; // ordered list of all the fields... @@ -79,14 +114,20 @@ class DocHelper { textField1, textField2, textField3, + compressedTextField2, keyField, noNormsField, unIndField, unStoredField1, unStoredField2, + textUtfField1, + textUtfField2, + lazyField, + lazyFieldBinary,//placeholder for binary field, since this is null. It must be second to last. + largeLazyField//placeholder for large field, since this is null. It must always be last }; - // Map + // Map public static Map all=new HashMap(); public static Map indexed=new HashMap(); public static Map stored=new HashMap(); @@ -94,11 +135,28 @@ class DocHelper { public static Map unindexed=new HashMap(); public static Map termvector=new HashMap(); public static Map notermvector=new HashMap(); + public static Map lazy= new HashMap(); public static Map noNorms=new HashMap(); static { + //Initialize the large Lazy Field + StringBuffer buffer = new StringBuffer(); + for (int i = 0; i < 10000; i++) + { + buffer.append("Lazily loading lengths of language in lieu of laughing "); + } + + try { + LAZY_FIELD_BINARY_BYTES = "These are some binary field bytes".getBytes("UTF8"); + } catch (UnsupportedEncodingException e) { + } + lazyFieldBinary = new Field(LAZY_FIELD_BINARY_KEY, LAZY_FIELD_BINARY_BYTES, Field.Store.YES); + fields[fields.length - 2] = lazyFieldBinary; + LARGE_LAZY_FIELD_TEXT = buffer.toString(); + largeLazyField = new Field(LARGE_LAZY_FIELD_KEY, LARGE_LAZY_FIELD_TEXT, Field.Store.YES, Field.Index.TOKENIZED); + fields[fields.length - 1] = largeLazyField; for (int i=0; i + * Must test using a File based directory + * + * @throws Exception + */ + public void testLazyPerformance() throws Exception { + String tmpIODir = System.getProperty("java.io.tmpdir"); + String path = tmpIODir + File.separator + "lazyDir"; + File file = new File(path); + FSDirectory tmpDir = FSDirectory.getDirectory(file, true); + assertTrue(tmpDir != null); + DocumentWriter writer = new DocumentWriter(tmpDir, new WhitespaceAnalyzer(), + Similarity.getDefault(), 50); + assertTrue(writer != null); + writer.addDocument("test", testDoc); + assertTrue(fieldInfos != null); + FieldsReader reader; + long lazyTime = 0; + long regularTime = 0; + int length = 50; + Set lazyFieldNames = new HashSet(); + lazyFieldNames.add(DocHelper.LARGE_LAZY_FIELD_KEY); + SetBasedFieldSelector fieldSelector = new SetBasedFieldSelector(Collections.EMPTY_SET, lazyFieldNames); + + for (int i = 0; i < length; i++) { + reader = new FieldsReader(tmpDir, "test", fieldInfos); + assertTrue(reader != null); + assertTrue(reader.size() == 1); + + Document doc; + doc = reader.doc(0, null);//Load all of them + assertTrue("doc is null and it shouldn't be", doc != null); + Fieldable field = doc.getField(DocHelper.LARGE_LAZY_FIELD_KEY); + assertTrue("field is lazy", field.isLazy() == false); + String value; + long start; + long finish; + start = System.currentTimeMillis(); + //On my machine this was always 0ms. + value = field.stringValue(); + finish = System.currentTimeMillis(); + assertTrue("value is null and it shouldn't be", value != null); + assertTrue("field is null and it shouldn't be", field != null); + regularTime += (finish - start); + reader.close(); + reader = null; + doc = null; + //Hmmm, are we still in cache??? + System.gc(); + reader = new FieldsReader(tmpDir, "test", fieldInfos); + doc = reader.doc(0, fieldSelector); + field = doc.getField(DocHelper.LARGE_LAZY_FIELD_KEY); + assertTrue("field is not lazy", field.isLazy() == true); + start = System.currentTimeMillis(); + //On my machine this took around 50 - 70ms + value = field.stringValue(); + finish = System.currentTimeMillis(); + assertTrue("value is null and it shouldn't be", value != null); + lazyTime += (finish - start); + reader.close(); + + } + System.out.println("Average Non-lazy time (should be very close to zero): " + regularTime / length + " ms for " + length + " reads"); + System.out.println("Average Lazy Time (should be greater than zero): " + lazyTime / length + " ms for " + length + " reads"); + } + + } diff --git a/src/test/org/apache/lucene/index/TestIndexInput.java b/src/test/org/apache/lucene/index/TestIndexInput.java index 062cf805efe..fcd8f5c0e49 100644 --- a/src/test/org/apache/lucene/index/TestIndexInput.java +++ b/src/test/org/apache/lucene/index/TestIndexInput.java @@ -22,16 +22,56 @@ import org.apache.lucene.store.IndexInput; import java.io.IOException; public class TestIndexInput extends TestCase { - public void testRead() throws IOException { - IndexInput is = new MockIndexInput(new byte[] { (byte) 0x80, 0x01, - (byte) 0xFF, 0x7F, - (byte) 0x80, (byte) 0x80, 0x01, - (byte) 0x81, (byte) 0x80, 0x01, - 0x06, 'L', 'u', 'c', 'e', 'n', 'e'}); - assertEquals(128,is.readVInt()); - assertEquals(16383,is.readVInt()); - assertEquals(16384,is.readVInt()); - assertEquals(16385,is.readVInt()); - assertEquals("Lucene",is.readString()); - } + public void testRead() throws IOException { + IndexInput is = new MockIndexInput(new byte[]{(byte) 0x80, 0x01, + (byte) 0xFF, 0x7F, + (byte) 0x80, (byte) 0x80, 0x01, + (byte) 0x81, (byte) 0x80, 0x01, + 0x06, 'L', 'u', 'c', 'e', 'n', 'e'}); + assertEquals(128, is.readVInt()); + assertEquals(16383, is.readVInt()); + assertEquals(16384, is.readVInt()); + assertEquals(16385, is.readVInt()); + assertEquals("Lucene", is.readString()); + } + + /** + * Expert + * + * @throws IOException + */ + public void testSkipChars() throws IOException { + byte[] bytes = new byte[]{(byte) 0x80, 0x01, + (byte) 0xFF, 0x7F, + (byte) 0x80, (byte) 0x80, 0x01, + (byte) 0x81, (byte) 0x80, 0x01, + 0x06, 'L', 'u', 'c', 'e', 'n', 'e', + }; + String utf8Str = "\u0634\u1ea1"; + byte [] utf8Bytes = utf8Str.getBytes("UTF-8"); + byte [] theBytes = new byte[bytes.length + 1 + utf8Bytes.length]; + System.arraycopy(bytes, 0, theBytes, 0, bytes.length); + theBytes[bytes.length] = (byte)utf8Str.length();//Add in the number of chars we are storing, which should fit in a byte for this test + System.arraycopy(utf8Bytes, 0, theBytes, bytes.length + 1, utf8Bytes.length); + IndexInput is = new MockIndexInput(theBytes); + assertEquals(128, is.readVInt()); + assertEquals(16383, is.readVInt()); + assertEquals(16384, is.readVInt()); + assertEquals(16385, is.readVInt()); + int charsToRead = is.readVInt();//number of chars in the Lucene string + assertTrue(0x06 + " does not equal: " + charsToRead, 0x06 == charsToRead); + is.skipChars(3); + char [] chars = new char[3];//there should be 6 chars remaining + is.readChars(chars, 0, 3); + String tmpStr = new String(chars); + assertTrue(tmpStr + " is not equal to " + "ene", tmpStr.equals("ene" ) == true); + //Now read the UTF8 stuff + charsToRead = is.readVInt() - 1;//since we are skipping one + is.skipChars(1); + assertTrue(utf8Str.length() - 1 + " does not equal: " + charsToRead, utf8Str.length() - 1 == charsToRead); + chars = new char[charsToRead]; + is.readChars(chars, 0, charsToRead); + tmpStr = new String(chars); + assertTrue(tmpStr + " is not equal to " + utf8Str.substring(1), tmpStr.equals(utf8Str.substring(1)) == true); + } } diff --git a/src/test/org/apache/lucene/index/TestIndexModifier.java b/src/test/org/apache/lucene/index/TestIndexModifier.java index 80a6e2084e5..30ca60e4203 100644 --- a/src/test/org/apache/lucene/index/TestIndexModifier.java +++ b/src/test/org/apache/lucene/index/TestIndexModifier.java @@ -22,6 +22,8 @@ import org.apache.lucene.analysis.SimpleAnalyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.document.Field.Index; +import org.apache.lucene.document.Field.Store; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.RAMDirectory; @@ -270,9 +272,9 @@ class IndexThread extends Thread { id++; } // add random stuff: - doc.add(new Field("content", new Integer(random.nextInt(1000)).toString(), Field.Store.YES, + doc.add(new Field("content", new Integer(random.nextInt(1000)).toString(), Field.Store.YES, Field.Index.TOKENIZED)); - doc.add(new Field("content", new Integer(random.nextInt(1000)).toString(), Field.Store.YES, + doc.add(new Field("content", new Integer(random.nextInt(1000)).toString(), Field.Store.YES, Field.Index.TOKENIZED)); doc.add(new Field("all", "x", Field.Store.YES, Field.Index.TOKENIZED)); return doc; diff --git a/src/test/org/apache/lucene/index/TestParallelReader.java b/src/test/org/apache/lucene/index/TestParallelReader.java index 6b36318e08b..f33a4c87606 100644 --- a/src/test/org/apache/lucene/index/TestParallelReader.java +++ b/src/test/org/apache/lucene/index/TestParallelReader.java @@ -16,20 +16,25 @@ package org.apache.lucene.index; * limitations under the License. */ -import java.io.IOException; -import java.util.Collection; - import junit.framework.TestCase; - import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; -import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.index.Term; +import org.apache.lucene.document.MapFieldSelector; +import org.apache.lucene.search.BooleanClause.Occur; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.Hits; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.Searcher; +import org.apache.lucene.search.TermQuery; import org.apache.lucene.store.Directory; import org.apache.lucene.store.RAMDirectory; -import org.apache.lucene.search.BooleanClause.Occur; -import org.apache.lucene.search.*; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Collection; +import java.util.Enumeration; public class TestParallelReader extends TestCase { @@ -71,6 +76,35 @@ public class TestParallelReader extends TestCase { assertTrue(fieldNames.contains("f4")); } + public void testDocument() throws IOException { + Directory dir1 = getDir1(); + Directory dir2 = getDir2(); + ParallelReader pr = new ParallelReader(); + pr.add(IndexReader.open(dir1)); + pr.add(IndexReader.open(dir2)); + + Document doc11 = pr.document(0, new MapFieldSelector(new String[] {"f1"})); + Document doc24 = pr.document(1, new MapFieldSelector(Arrays.asList(new String[] {"f4"}))); + Document doc223 = pr.document(1, new MapFieldSelector(new String[] {"f2", "f3"})); + + assertEquals(1, numFields(doc11)); + assertEquals(1, numFields(doc24)); + assertEquals(2, numFields(doc223)); + + assertEquals("v1", doc11.get("f1")); + assertEquals("v2", doc24.get("f4")); + assertEquals("v2", doc223.get("f2")); + assertEquals("v2", doc223.get("f3")); + } + + private int numFields(Document doc) { + int num; + Enumeration e = doc.fields(); + for (num=0; e.hasMoreElements(); num++) + e.nextElement(); + return num; + } + public void testIncompatibleIndexes() throws IOException { // two documents: Directory dir1 = getDir1(); diff --git a/src/test/org/apache/lucene/index/TestSegmentMerger.java b/src/test/org/apache/lucene/index/TestSegmentMerger.java index c73e85ebf3a..d826df531f0 100644 --- a/src/test/org/apache/lucene/index/TestSegmentMerger.java +++ b/src/test/org/apache/lucene/index/TestSegmentMerger.java @@ -87,7 +87,7 @@ public class TestSegmentMerger extends TestCase { Collection stored = mergedReader.getFieldNames(IndexReader.FieldOption.INDEXED_WITH_TERMVECTOR); assertTrue(stored != null); //System.out.println("stored size: " + stored.size()); - assertTrue(stored.size() == 2); + assertTrue("We do not have 4 fields that were indexed with term vector",stored.size() == 4); TermFreqVector vector = mergedReader.getTermFreqVector(0, DocHelper.TEXT_FIELD_2_KEY); assertTrue(vector != null); diff --git a/src/test/org/apache/lucene/index/TestSegmentReader.java b/src/test/org/apache/lucene/index/TestSegmentReader.java index 345f4f97a58..bdd55a317e8 100644 --- a/src/test/org/apache/lucene/index/TestSegmentReader.java +++ b/src/test/org/apache/lucene/index/TestSegmentReader.java @@ -19,7 +19,7 @@ package org.apache.lucene.index; import junit.framework.TestCase; import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; +import org.apache.lucene.document.Fieldable; import org.apache.lucene.search.DefaultSimilarity; import java.io.IOException; @@ -64,7 +64,7 @@ public class TestSegmentReader extends TestCase { Enumeration fields = result.fields(); while (fields.hasMoreElements()) { - Field field = (Field) fields.nextElement(); + Fieldable field = (Fieldable) fields.nextElement(); assertTrue(field != null); assertTrue(DocHelper.nameValues.containsKey(field.name())); } @@ -166,7 +166,7 @@ public class TestSegmentReader extends TestCase { public static void checkNorms(IndexReader reader) throws IOException { // test omit norms for (int i=0; i