Implementation of Issue 545. Introduction of new Fieldable interface (extracted from Field) which is now used where Field used to be used. Field now implements Fieldable.

Added new method to IndexReader and derived classes for working with the new FieldSelector interface.  The FieldSelector interface defines a mechanism for doing lazy loading, amongst other things.  Implemented Lazy loading of fields in the FieldsReader class.  Added test case to TestFieldsReader.java

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@413201 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Grant Ingersoll 2006-06-10 01:23:22 +00:00
parent 2b9effb894
commit d4cc19d03e
48 changed files with 1592 additions and 560 deletions

View File

@ -9,6 +9,9 @@ New features
1. LUCENE-503: New ThaiAnalyzer and ThaiWordFilter in contrib/analyzers
(Samphan Raruenrom va Chris Hostetter)
2. LUCENE-545: New FieldSelector API and associated changes to IndexReader and implementations.
New Fieldable interface for use with the lazy field loading mechanism. (Grant Ingersoll and Chuck Williams via Grant Ingersoll)
API Changes
1. LUCENE-438: Remove "final" from Token, implement Cloneable, allow

View File

@ -16,20 +16,11 @@ package org.apache.lucene.index.memory;
* limitations under the License.
*/
import java.io.IOException;
import java.io.Serializable;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.FieldSelector;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermDocs;
@ -43,6 +34,16 @@ import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import org.apache.lucene.search.Similarity;
import java.io.IOException;
import java.io.Serializable;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
/**
* High-performance single-document main memory Apache Lucene fulltext search index.
*
@ -1005,6 +1006,12 @@ public class MemoryIndex {
return new Document(); // there are no stored fields
}
//When we convert to JDK 1.5 make this Set<String>
public Document document(int n, FieldSelector fieldSelector) throws IOException {
if (DEBUG) System.err.println("MemoryIndexReader.document");
return new Document(); // there are no stored fields
}
public boolean isDeleted(int n) {
if (DEBUG) System.err.println("MemoryIndexReader.isDeleted");
return false;

View File

@ -22,6 +22,7 @@ import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Hits;
@ -190,7 +191,7 @@ public class ListSearcher extends AbstractListModel {
//tabble model row that we are mapping to
for (int t=0; t<hits.length(); t++){
Document document = hits.doc(t);
Field field = document.getField(ROW_NUMBER);
Fieldable field = document.getField(ROW_NUMBER);
rowToModelIndex.add(new Integer(field.stringValue()));
}
} catch (Exception e){

View File

@ -16,26 +16,23 @@ package org.apache.lucene.swing.models;
* limitations under the License.
*/
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Hits;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.store.RAMDirectory;
import java.awt.*;
import java.awt.event.*;
import java.util.*;
import java.util.List;
import javax.swing.*;
import javax.swing.event.TableModelEvent;
import javax.swing.event.TableModelListener;
import javax.swing.table.*;
import javax.swing.table.AbstractTableModel;
import javax.swing.table.TableModel;
import java.util.ArrayList;
/**
@ -275,7 +272,7 @@ public class TableSearcher extends AbstractTableModel {
//tabble model row that we are mapping to
for (int t=0; t<hits.length(); t++){
Document document = hits.doc(t);
Field field = document.getField(ROW_NUMBER);
Fieldable field = document.getField(ROW_NUMBER);
rowToModelIndex.add(new Integer(field.stringValue()));
}
} catch (Exception e){

View File

@ -38,16 +38,16 @@ public abstract class Analyzer {
/**
* Invoked before indexing a Field instance if
* Invoked before indexing a Fieldable instance if
* terms have already been added to that field. This allows custom
* analyzers to place an automatic position increment gap between
* Field instances using the same field name. The default value
* Fieldable instances using the same field name. The default value
* position increment gap is 0. With a 0 position increment gap and
* the typical default token position increment of 1, all terms in a field,
* including across Field instances, are in successive positions, allowing
* exact PhraseQuery matches, for instance, across Field instance boundaries.
* including across Fieldable instances, are in successive positions, allowing
* exact PhraseQuery matches, for instance, across Fieldable instance boundaries.
*
* @param fieldName Field name being indexed.
* @param fieldName Fieldable name being indexed.
* @return position increment gap, added to the next token emitted from {@link #tokenStream(String,Reader)}
*/
public int getPositionIncrementGap(String fieldName)

View File

@ -0,0 +1,274 @@
package org.apache.lucene.document;
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
*
*
**/
public abstract class AbstractField implements Fieldable {
protected String name = "body";
protected boolean storeTermVector = false;
protected boolean storeOffsetWithTermVector = false;
protected boolean storePositionWithTermVector = false;
protected boolean omitNorms = false;
protected boolean isStored = false;
protected boolean isIndexed = true;
protected boolean isTokenized = true;
protected boolean isBinary = false;
protected boolean isCompressed = false;
protected boolean lazy = false;
protected float boost = 1.0f;
// the one and only data object for all different kind of field values
protected Object fieldsData = null;
protected AbstractField()
{
}
protected AbstractField(String name, Field.Store store, Field.Index index, Field.TermVector termVector) {
if (name == null)
throw new NullPointerException("name cannot be null");
this.name = name.intern(); // field names are interned
if (store == Field.Store.YES){
this.isStored = true;
this.isCompressed = false;
}
else if (store == Field.Store.COMPRESS) {
this.isStored = true;
this.isCompressed = true;
}
else if (store == Field.Store.NO){
this.isStored = false;
this.isCompressed = false;
}
else
throw new IllegalArgumentException("unknown store parameter " + store);
if (index == Field.Index.NO) {
this.isIndexed = false;
this.isTokenized = false;
} else if (index == Field.Index.TOKENIZED) {
this.isIndexed = true;
this.isTokenized = true;
} else if (index == Field.Index.UN_TOKENIZED) {
this.isIndexed = true;
this.isTokenized = false;
} else if (index == Field.Index.NO_NORMS) {
this.isIndexed = true;
this.isTokenized = false;
this.omitNorms = true;
} else {
throw new IllegalArgumentException("unknown index parameter " + index);
}
this.isBinary = false;
setStoreTermVector(termVector);
}
/** Sets the boost factor hits on this field. This value will be
* multiplied into the score of all hits on this this field of this
* document.
*
* <p>The boost is multiplied by {@link org.apache.lucene.document.Document#getBoost()} of the document
* containing this field. If a document has multiple fields with the same
* name, all such values are multiplied together. This product is then
* multipled by the value {@link org.apache.lucene.search.Similarity#lengthNorm(String,int)}, and
* rounded by {@link org.apache.lucene.search.Similarity#encodeNorm(float)} before it is stored in the
* index. One should attempt to ensure that this product does not overflow
* the range of that encoding.
*
* @see org.apache.lucene.document.Document#setBoost(float)
* @see org.apache.lucene.search.Similarity#lengthNorm(String, int)
* @see org.apache.lucene.search.Similarity#encodeNorm(float)
*/
public void setBoost(float boost) {
this.boost = boost;
}
/** Returns the boost factor for hits for this field.
*
* <p>The default value is 1.0.
*
* <p>Note: this value is not stored directly with the document in the index.
* Documents returned from {@link org.apache.lucene.index.IndexReader#document(int)} and
* {@link org.apache.lucene.search.Hits#doc(int)} may thus not have the same value present as when
* this field was indexed.
*
* @see #setBoost(float)
*/
public float getBoost() {
return boost;
}
/** Returns the name of the field as an interned string.
* For example "date", "title", "body", ...
*/
public String name() { return name; }
protected void setStoreTermVector(Field.TermVector termVector) {
if (termVector == Field.TermVector.NO) {
this.storeTermVector = false;
this.storePositionWithTermVector = false;
this.storeOffsetWithTermVector = false;
}
else if (termVector == Field.TermVector.YES) {
this.storeTermVector = true;
this.storePositionWithTermVector = false;
this.storeOffsetWithTermVector = false;
}
else if (termVector == Field.TermVector.WITH_POSITIONS) {
this.storeTermVector = true;
this.storePositionWithTermVector = true;
this.storeOffsetWithTermVector = false;
}
else if (termVector == Field.TermVector.WITH_OFFSETS) {
this.storeTermVector = true;
this.storePositionWithTermVector = false;
this.storeOffsetWithTermVector = true;
}
else if (termVector == Field.TermVector.WITH_POSITIONS_OFFSETS) {
this.storeTermVector = true;
this.storePositionWithTermVector = true;
this.storeOffsetWithTermVector = true;
}
else {
throw new IllegalArgumentException("unknown termVector parameter " + termVector);
}
}
/** True iff the value of the field is to be stored in the index for return
with search hits. It is an error for this to be true if a field is
Reader-valued. */
public final boolean isStored() { return isStored; }
/** True iff the value of the field is to be indexed, so that it may be
searched on. */
public final boolean isIndexed() { return isIndexed; }
/** True iff the value of the field should be tokenized as text prior to
indexing. Un-tokenized fields are indexed as a single word and may not be
Reader-valued. */
public final boolean isTokenized() { return isTokenized; }
/** True if the value of the field is stored and compressed within the index */
public final boolean isCompressed() { return isCompressed; }
/** True iff the term or terms used to index this field are stored as a term
* vector, available from {@link org.apache.lucene.index.IndexReader#getTermFreqVector(int,String)}.
* These methods do not provide access to the original content of the field,
* only to terms used to index it. If the original content must be
* preserved, use the <code>stored</code> attribute instead.
*
* @see org.apache.lucene.index.IndexReader#getTermFreqVector(int, String)
*/
public final boolean isTermVectorStored() { return storeTermVector; }
/**
* True iff terms are stored as term vector together with their offsets
* (start and end positon in source text).
*/
public boolean isStoreOffsetWithTermVector(){
return storeOffsetWithTermVector;
}
/**
* True iff terms are stored as term vector together with their token positions.
*/
public boolean isStorePositionWithTermVector(){
return storePositionWithTermVector;
}
/** True iff the value of the filed is stored as binary */
public final boolean isBinary() { return isBinary; }
/** True if norms are omitted for this indexed field */
public boolean getOmitNorms() { return omitNorms; }
/** Expert:
*
* If set, omit normalization factors associated with this indexed field.
* This effectively disables indexing boosts and length normalization for this field.
*/
public void setOmitNorms(boolean omitNorms) { this.omitNorms=omitNorms; }
public boolean isLazy() {
return lazy;
}
/** Prints a Field for human consumption. */
public final String toString() {
StringBuffer result = new StringBuffer();
if (isStored) {
result.append("stored");
if (isCompressed)
result.append("/compressed");
else
result.append("/uncompressed");
}
if (isIndexed) {
if (result.length() > 0)
result.append(",");
result.append("indexed");
}
if (isTokenized) {
if (result.length() > 0)
result.append(",");
result.append("tokenized");
}
if (storeTermVector) {
if (result.length() > 0)
result.append(",");
result.append("termVector");
}
if (storeOffsetWithTermVector) {
if (result.length() > 0)
result.append(",");
result.append("termVectorOffsets");
}
if (storePositionWithTermVector) {
if (result.length() > 0)
result.append(",");
result.append("termVectorPosition");
}
if (isBinary) {
if (result.length() > 0)
result.append(",");
result.append("binary");
}
if (omitNorms) {
result.append(",omitNorms");
}
if (lazy){
result.append(",lazy");
}
result.append('<');
result.append(name);
result.append(':');
if (fieldsData != null && lazy == false) {
result.append(fieldsData);
}
result.append('>');
return result.toString();
}
}

View File

@ -16,24 +16,21 @@ package org.apache.lucene.document;
* limitations under the License.
*/
import java.util.Enumeration;
import java.util.Iterator;
import java.util.List;
import java.util.ArrayList;
import java.util.Vector;
import org.apache.lucene.index.IndexReader; // for javadoc
import org.apache.lucene.search.Searcher; // for javadoc
import org.apache.lucene.search.Hits; // for javadoc
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.Searcher;
import java.util.*; // for javadoc
/** Documents are the unit of indexing and search.
*
* A Document is a set of fields. Each field has a name and a textual value.
* A field may be {@link Field#isStored() stored} with the document, in which
* A field may be {@link Fieldable#isStored() stored} with the document, in which
* case it is returned with search hits on the document. Thus each document
* should typically contain one or more stored fields which uniquely identify
* it.
*
* <p>Note that fields which are <i>not</i> {@link Field#isStored() stored} are
* <p>Note that fields which are <i>not</i> {@link Fieldable#isStored() stored} are
* <i>not</i> available in documents retrieved from the index, e.g. with {@link
* Hits#doc(int)}, {@link Searcher#doc(int)} or {@link
* IndexReader#document(int)}.
@ -50,11 +47,11 @@ public final class Document implements java.io.Serializable {
/** Sets a boost factor for hits on any field of this document. This value
* will be multiplied into the score of all hits on this document.
*
* <p>Values are multiplied into the value of {@link Field#getBoost()} of
* <p>Values are multiplied into the value of {@link Fieldable#getBoost()} of
* each field in this document. Thus, this method in effect sets a default
* boost for the fields of this document.
*
* @see Field#setBoost(float)
* @see Fieldable#setBoost(float)
*/
public void setBoost(float boost) {
this.boost = boost;
@ -85,7 +82,7 @@ public final class Document implements java.io.Serializable {
* a document has to be deleted from an index and a new changed version of that
* document has to be added.</p>
*/
public final void add(Field field) {
public final void add(Fieldable field) {
fields.add(field);
}
@ -102,7 +99,7 @@ public final class Document implements java.io.Serializable {
public final void removeField(String name) {
Iterator it = fields.iterator();
while (it.hasNext()) {
Field field = (Field)it.next();
Fieldable field = (Fieldable)it.next();
if (field.name().equals(name)) {
it.remove();
return;
@ -122,7 +119,7 @@ public final class Document implements java.io.Serializable {
public final void removeFields(String name) {
Iterator it = fields.iterator();
while (it.hasNext()) {
Field field = (Field)it.next();
Fieldable field = (Fieldable)it.next();
if (field.name().equals(name)) {
it.remove();
}
@ -133,9 +130,9 @@ public final class Document implements java.io.Serializable {
* null. If multiple fields exists with this name, this method returns the
* first value added.
*/
public final Field getField(String name) {
public final Fieldable getField(String name) {
for (int i = 0; i < fields.size(); i++) {
Field field = (Field)fields.get(i);
Fieldable field = (Fieldable)fields.get(i);
if (field.name().equals(name))
return field;
}
@ -149,7 +146,7 @@ public final class Document implements java.io.Serializable {
*/
public final String get(String name) {
for (int i = 0; i < fields.size(); i++) {
Field field = (Field)fields.get(i);
Fieldable field = (Fieldable)fields.get(i);
if (field.name().equals(name) && (!field.isBinary()))
return field.stringValue();
}
@ -162,16 +159,16 @@ public final class Document implements java.io.Serializable {
}
/**
* Returns an array of {@link Field}s with the given name.
* Returns an array of {@link Fieldable}s with the given name.
* This method can return <code>null</code>.
*
* @param name the name of the field
* @return a <code>Field[]</code> array
* @return a <code>Fieldable[]</code> array
*/
public final Field[] getFields(String name) {
public final Fieldable[] getFields(String name) {
List result = new ArrayList();
for (int i = 0; i < fields.size(); i++) {
Field field = (Field)fields.get(i);
Fieldable field = (Fieldable)fields.get(i);
if (field.name().equals(name)) {
result.add(field);
}
@ -180,7 +177,7 @@ public final class Document implements java.io.Serializable {
if (result.size() == 0)
return null;
return (Field[])result.toArray(new Field[result.size()]);
return (Fieldable[])result.toArray(new Fieldable[result.size()]);
}
/**
@ -193,7 +190,7 @@ public final class Document implements java.io.Serializable {
public final String[] getValues(String name) {
List result = new ArrayList();
for (int i = 0; i < fields.size(); i++) {
Field field = (Field)fields.get(i);
Fieldable field = (Fieldable)fields.get(i);
if (field.name().equals(name) && (!field.isBinary()))
result.add(field.stringValue());
}
@ -215,7 +212,7 @@ public final class Document implements java.io.Serializable {
public final byte[][] getBinaryValues(String name) {
List result = new ArrayList();
for (int i = 0; i < fields.size(); i++) {
Field field = (Field)fields.get(i);
Fieldable field = (Fieldable)fields.get(i);
if (field.name().equals(name) && (field.isBinary()))
result.add(field.binaryValue());
}
@ -237,7 +234,7 @@ public final class Document implements java.io.Serializable {
*/
public final byte[] getBinaryValue(String name) {
for (int i=0; i < fields.size(); i++) {
Field field = (Field)fields.get(i);
Fieldable field = (Fieldable)fields.get(i);
if (field.name().equals(name) && (field.isBinary()))
return field.binaryValue();
}
@ -249,7 +246,7 @@ public final class Document implements java.io.Serializable {
StringBuffer buffer = new StringBuffer();
buffer.append("Document<");
for (int i = 0; i < fields.size(); i++) {
Field field = (Field)fields.get(i);
Fieldable field = (Fieldable)fields.get(i);
buffer.append(field.toString());
if (i != fields.size()-1)
buffer.append(" ");

View File

@ -16,9 +16,6 @@ package org.apache.lucene.document;
* limitations under the License.
*/
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.Similarity;
import org.apache.lucene.util.Parameter;
import java.io.Reader;
@ -32,23 +29,7 @@ import java.io.Serializable;
index, so that they may be returned with hits on the document.
*/
public final class Field implements Serializable {
private String name = "body";
// the one and only data object for all different kind of field values
private Object fieldsData = null;
private boolean storeTermVector = false;
private boolean storeOffsetWithTermVector = false;
private boolean storePositionWithTermVector = false;
private boolean omitNorms = false;
private boolean isStored = false;
private boolean isIndexed = true;
private boolean isTokenized = true;
private boolean isBinary = false;
private boolean isCompressed = false;
private float boost = 1.0f;
public final class Field extends AbstractField implements Fieldable, Serializable {
/** Specifies whether and how a field should be stored. */
public static final class Store extends Parameter implements Serializable {
@ -146,44 +127,6 @@ public final class Field implements Serializable {
public static final TermVector WITH_POSITIONS_OFFSETS = new TermVector("WITH_POSITIONS_OFFSETS");
}
/** Sets the boost factor hits on this field. This value will be
* multiplied into the score of all hits on this this field of this
* document.
*
* <p>The boost is multiplied by {@link Document#getBoost()} of the document
* containing this field. If a document has multiple fields with the same
* name, all such values are multiplied together. This product is then
* multipled by the value {@link Similarity#lengthNorm(String,int)}, and
* rounded by {@link Similarity#encodeNorm(float)} before it is stored in the
* index. One should attempt to ensure that this product does not overflow
* the range of that encoding.
*
* @see Document#setBoost(float)
* @see Similarity#lengthNorm(String, int)
* @see Similarity#encodeNorm(float)
*/
public void setBoost(float boost) {
this.boost = boost;
}
/** Returns the boost factor for hits for this field.
*
* <p>The default value is 1.0.
*
* <p>Note: this value is not stored directly with the document in the index.
* Documents returned from {@link IndexReader#document(int)} and
* {@link Hits#doc(int)} may thus not have the same value present as when
* this field was indexed.
*
* @see #setBoost(float)
*/
public float getBoost() {
return boost;
}
/** Returns the name of the field as an interned string.
* For example "date", "title", "body", ...
*/
public String name() { return name; }
/** The value of the field as a String, or null. If null, the Reader value
* or binary value is used. Exactly one of stringValue(), readerValue(), and
@ -366,145 +309,5 @@ public final class Field implements Serializable {
setStoreTermVector(TermVector.NO);
}
private void setStoreTermVector(TermVector termVector) {
if (termVector == TermVector.NO) {
this.storeTermVector = false;
this.storePositionWithTermVector = false;
this.storeOffsetWithTermVector = false;
}
else if (termVector == TermVector.YES) {
this.storeTermVector = true;
this.storePositionWithTermVector = false;
this.storeOffsetWithTermVector = false;
}
else if (termVector == TermVector.WITH_POSITIONS) {
this.storeTermVector = true;
this.storePositionWithTermVector = true;
this.storeOffsetWithTermVector = false;
}
else if (termVector == TermVector.WITH_OFFSETS) {
this.storeTermVector = true;
this.storePositionWithTermVector = false;
this.storeOffsetWithTermVector = true;
}
else if (termVector == TermVector.WITH_POSITIONS_OFFSETS) {
this.storeTermVector = true;
this.storePositionWithTermVector = true;
this.storeOffsetWithTermVector = true;
}
else {
throw new IllegalArgumentException("unknown termVector parameter " + termVector);
}
}
/** True iff the value of the field is to be stored in the index for return
with search hits. It is an error for this to be true if a field is
Reader-valued. */
public final boolean isStored() { return isStored; }
/** True iff the value of the field is to be indexed, so that it may be
searched on. */
public final boolean isIndexed() { return isIndexed; }
/** True iff the value of the field should be tokenized as text prior to
indexing. Un-tokenized fields are indexed as a single word and may not be
Reader-valued. */
public final boolean isTokenized() { return isTokenized; }
/** True if the value of the field is stored and compressed within the index */
public final boolean isCompressed() { return isCompressed; }
/** True iff the term or terms used to index this field are stored as a term
* vector, available from {@link IndexReader#getTermFreqVector(int,String)}.
* These methods do not provide access to the original content of the field,
* only to terms used to index it. If the original content must be
* preserved, use the <code>stored</code> attribute instead.
*
* @see IndexReader#getTermFreqVector(int, String)
*/
public final boolean isTermVectorStored() { return storeTermVector; }
/**
* True iff terms are stored as term vector together with their offsets
* (start and end positon in source text).
*/
public boolean isStoreOffsetWithTermVector(){
return storeOffsetWithTermVector;
}
/**
* True iff terms are stored as term vector together with their token positions.
*/
public boolean isStorePositionWithTermVector(){
return storePositionWithTermVector;
}
/** True iff the value of the filed is stored as binary */
public final boolean isBinary() { return isBinary; }
/** True if norms are omitted for this indexed field */
public boolean getOmitNorms() { return omitNorms; }
/** Expert:
*
* If set, omit normalization factors associated with this indexed field.
* This effectively disables indexing boosts and length normalization for this field.
*/
public void setOmitNorms(boolean omitNorms) { this.omitNorms=omitNorms; }
/** Prints a Field for human consumption. */
public final String toString() {
StringBuffer result = new StringBuffer();
if (isStored) {
result.append("stored");
if (isCompressed)
result.append("/compressed");
else
result.append("/uncompressed");
}
if (isIndexed) {
if (result.length() > 0)
result.append(",");
result.append("indexed");
}
if (isTokenized) {
if (result.length() > 0)
result.append(",");
result.append("tokenized");
}
if (storeTermVector) {
if (result.length() > 0)
result.append(",");
result.append("termVector");
}
if (storeOffsetWithTermVector) {
if (result.length() > 0)
result.append(",");
result.append("termVectorOffsets");
}
if (storePositionWithTermVector) {
if (result.length() > 0)
result.append(",");
result.append("termVectorPosition");
}
if (isBinary) {
if (result.length() > 0)
result.append(",");
result.append("binary");
}
if (omitNorms) {
result.append(",omitNorms");
}
result.append('<');
result.append(name);
result.append(':');
if (fieldsData != null) {
result.append(fieldsData);
}
result.append('>');
return result.toString();
}
}

View File

@ -0,0 +1,24 @@
package org.apache.lucene.document;
/**
* Created by IntelliJ IDEA.
* User: Grant Ingersoll
* Date: Apr 14, 2006
* Time: 5:29:26 PM
* $Id:$
* Copyright 2005. Center For Natural Language Processing
*/
/**
* Similar to a {@link java.io.FileFilter}, the FieldSelector allows one to make decisions about
* what Fields get loaded on a {@link Document} by {@link org.apache.lucene.index.IndexReader#document(int,org.apache.lucene.document.FieldSelector)}
*
**/
public interface FieldSelector {
/**
*
* @param fieldName
* @return true if the {@link Field} with <code>fieldName</code> should be loaded or not
*/
FieldSelectorResult accept(String fieldName);
}

View File

@ -0,0 +1,44 @@
package org.apache.lucene.document;
/**
* Created by IntelliJ IDEA.
* User: Grant Ingersoll
* Date: Apr 14, 2006
* Time: 5:40:17 PM
* $Id:$
* Copyright 2005. Center For Natural Language Processing
*/
/**
* Provides information about what should be done with this Field
*
**/
//Replace with an enumerated type in 1.5
public final class FieldSelectorResult {
public static final FieldSelectorResult LOAD = new FieldSelectorResult(0);
public static final FieldSelectorResult LAZY_LOAD = new FieldSelectorResult(1);
public static final FieldSelectorResult NO_LOAD = new FieldSelectorResult(2);
public static final FieldSelectorResult LOAD_AND_BREAK = new FieldSelectorResult(3);
private int id;
private FieldSelectorResult(int id)
{
this.id = id;
}
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
final FieldSelectorResult that = (FieldSelectorResult) o;
if (id != that.id) return false;
return true;
}
public int hashCode() {
return id;
}
}

View File

@ -0,0 +1,137 @@
package org.apache.lucene.document;
/**
* Copyright 2004 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.Reader;
import java.io.Serializable;
/**
* Synonymous with {@link Field}.
*
**/
public interface Fieldable extends Serializable {
/** Sets the boost factor hits on this field. This value will be
* multiplied into the score of all hits on this this field of this
* document.
*
* <p>The boost is multiplied by {@link org.apache.lucene.document.Document#getBoost()} of the document
* containing this field. If a document has multiple fields with the same
* name, all such values are multiplied together. This product is then
* multipled by the value {@link org.apache.lucene.search.Similarity#lengthNorm(String,int)}, and
* rounded by {@link org.apache.lucene.search.Similarity#encodeNorm(float)} before it is stored in the
* index. One should attempt to ensure that this product does not overflow
* the range of that encoding.
*
* @see org.apache.lucene.document.Document#setBoost(float)
* @see org.apache.lucene.search.Similarity#lengthNorm(String, int)
* @see org.apache.lucene.search.Similarity#encodeNorm(float)
*/
void setBoost(float boost);
/** Returns the boost factor for hits for this field.
*
* <p>The default value is 1.0.
*
* <p>Note: this value is not stored directly with the document in the index.
* Documents returned from {@link org.apache.lucene.index.IndexReader#document(int)} and
* {@link org.apache.lucene.search.Hits#doc(int)} may thus not have the same value present as when
* this field was indexed.
*
* @see #setBoost(float)
*/
float getBoost();
/** Returns the name of the field as an interned string.
* For example "date", "title", "body", ...
*/
String name();
/** The value of the field as a String, or null. If null, the Reader value
* or binary value is used. Exactly one of stringValue(), readerValue(), and
* binaryValue() must be set. */
String stringValue();
/** The value of the field as a Reader, or null. If null, the String value
* or binary value is used. Exactly one of stringValue(), readerValue(),
* and binaryValue() must be set. */
Reader readerValue();
/** The value of the field in Binary, or null. If null, the Reader or
* String value is used. Exactly one of stringValue(), readerValue() and
* binaryValue() must be set. */
byte[] binaryValue();
/** True iff the value of the field is to be stored in the index for return
with search hits. It is an error for this to be true if a field is
Reader-valued. */
boolean isStored();
/** True iff the value of the field is to be indexed, so that it may be
searched on. */
boolean isIndexed();
/** True iff the value of the field should be tokenized as text prior to
indexing. Un-tokenized fields are indexed as a single word and may not be
Reader-valued. */
boolean isTokenized();
/** True if the value of the field is stored and compressed within the index */
boolean isCompressed();
/** True iff the term or terms used to index this field are stored as a term
* vector, available from {@link org.apache.lucene.index.IndexReader#getTermFreqVector(int,String)}.
* These methods do not provide access to the original content of the field,
* only to terms used to index it. If the original content must be
* preserved, use the <code>stored</code> attribute instead.
*
* @see org.apache.lucene.index.IndexReader#getTermFreqVector(int, String)
*/
boolean isTermVectorStored();
/**
* True iff terms are stored as term vector together with their offsets
* (start and end positon in source text).
*/
boolean isStoreOffsetWithTermVector();
/**
* True iff terms are stored as term vector together with their token positions.
*/
boolean isStorePositionWithTermVector();
/** True iff the value of the filed is stored as binary */
boolean isBinary();
/** True if norms are omitted for this indexed field */
boolean getOmitNorms();
/** Expert:
*
* If set, omit normalization factors associated with this indexed field.
* This effectively disables indexing boosts and length normalization for this field.
*/
void setOmitNorms(boolean omitNorms);
/**
* Indicates whether a Field is Lazy or not. The semantics of Lazy loading are such that if a Field is lazily loaded, retrieving
* it's values via {@link #stringValue()} or {@link #binaryValue()} is only valid as long as the {@link org.apache.lucene.index.IndexReader} that
* retrieved the {@link Document} is still open.
*
* @return true if this field can be loaded lazily
*/
boolean isLazy();
}

View File

@ -0,0 +1,22 @@
package org.apache.lucene.document;
/**
* Created by IntelliJ IDEA.
* User: Grant Ingersoll
* Date: Apr 15, 2006
* Time: 10:13:07 AM
* $Id:$
* Copyright 2005. Center For Natural Language Processing
*/
/**
* Load the First field and break.
* <p/>
* See {@link FieldSelectorResult#LOAD_AND_BREAK}
*/
public class LoadFirstFieldSelector implements FieldSelector {
public FieldSelectorResult accept(String fieldName) {
return FieldSelectorResult.LOAD_AND_BREAK;
}
}

View File

@ -0,0 +1,57 @@
/*
* MapFieldSelector.java
*
* Created on May 2, 2006, 6:49 PM
*
*/
package org.apache.lucene.document;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* A FieldSelector based on a Map of field names to FieldSelectorResults
*
* @author Chuck Williams
*/
public class MapFieldSelector implements FieldSelector {
Map fieldSelections;
/** Create a a MapFieldSelector
* @param fieldSelections maps from field names to FieldSelectorResults
*/
public MapFieldSelector(Map fieldSelections) {
this.fieldSelections = fieldSelections;
}
/** Create a a MapFieldSelector
* @param fields fields to LOAD. All other fields are NO_LOAD.
*/
public MapFieldSelector(List fields) {
fieldSelections = new HashMap(fields.size()*5/3);
for (int i=0; i<fields.size(); i++)
fieldSelections.put(fields.get(i), FieldSelectorResult.LOAD);
}
/** Create a a MapFieldSelector
* @param fields fields to LOAD. All other fields are NO_LOAD.
*/
public MapFieldSelector(String[] fields) {
fieldSelections = new HashMap(fields.length*5/3);
for (int i=0; i<fields.length; i++)
fieldSelections.put(fields[i], FieldSelectorResult.LOAD);
}
/** Load field according to its associated value in fieldSelections
* @param field a field name
* @return the fieldSelections value that field maps to or NO_LOAD if none.
*/
public FieldSelectorResult accept(String field) {
FieldSelectorResult selection = (FieldSelectorResult) fieldSelections.get(field);
return selection!=null ? selection : FieldSelectorResult.NO_LOAD;
}
}

View File

@ -0,0 +1,53 @@
package org.apache.lucene.document;
import java.util.Set;
/**
* Created by IntelliJ IDEA.
* User: Grant Ingersoll
* Date: Apr 14, 2006
* Time: 6:53:07 PM
* $Id:$
* Copyright 2005. Center For Natural Language Processing
*/
/**
* Declare what fields to load normally and what fields to load lazily
*
**/
public class SetBasedFieldSelector implements FieldSelector {
private Set fieldsToLoad;
private Set lazyFieldsToLoad;
/**
* Pass in the Set of {@link Field} names to load and the Set of {@link Field} names to load lazily. If both are null, the
* Document will not have any {@link Field} on it.
* @param fieldsToLoad A Set of {@link String} field names to load. May be empty, but not null
* @param lazyFieldsToLoad A Set of {@link String} field names to load lazily. May be empty, but not null
*/
public SetBasedFieldSelector(Set fieldsToLoad, Set lazyFieldsToLoad) {
this.fieldsToLoad = fieldsToLoad;
this.lazyFieldsToLoad = lazyFieldsToLoad;
}
/**
* Indicate whether to load the field with the given name or not. If the {@link Field#name()} is not in either of the
* initializing Sets, then {@link org.apache.lucene.document.FieldSelectorResult#NO_LOAD} is returned. If a Field name
* is in both <code>fieldsToLoad</code> and <code>lazyFieldsToLoad</code>, lazy has precedence.
*
* @param fieldName The {@link Field} name to check
* @return The {@link FieldSelectorResult}
*/
public FieldSelectorResult accept(String fieldName) {
FieldSelectorResult result = FieldSelectorResult.NO_LOAD;
if (fieldsToLoad.contains(fieldName) == true){
result = FieldSelectorResult.LOAD;
}
if (lazyFieldsToLoad.contains(fieldName) == true){
result = FieldSelectorResult.LAZY_LOAD;
}
return result;
}
}

View File

@ -16,22 +16,22 @@ package org.apache.lucene.index;
* limitations under the License.
*/
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.search.Similarity;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexOutput;
import java.io.IOException;
import java.io.PrintStream;
import java.io.Reader;
import java.io.StringReader;
import java.util.Hashtable;
import java.util.Enumeration;
import java.util.Arrays;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.search.Similarity;
import java.util.Enumeration;
import java.util.Hashtable;
final class DocumentWriter {
private Analyzer analyzer;
@ -129,7 +129,7 @@ final class DocumentWriter {
throws IOException {
Enumeration fields = doc.fields();
while (fields.hasMoreElements()) {
Field field = (Field) fields.nextElement();
Fieldable field = (Fieldable) fields.nextElement();
String fieldName = field.name();
int fieldNumber = fieldInfos.fieldNumber(fieldName);

View File

@ -16,18 +16,17 @@ package org.apache.lucene.index;
* limitations under the License.
*/
import java.util.*;
import java.io.IOException;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
/** Access to the Field Info file that describes document fields and whether or
* not they are indexed. Each segment has a separate Field Info file. Objects
import java.io.IOException;
import java.util.*;
/** Access to the Fieldable Info file that describes document fields and whether or
* not they are indexed. Each segment has a separate Fieldable Info file. Objects
* of this class are thread-safe for multiple readers, but only one thread can
* be adding documents at a time, with no other reader or writer threads
* accessing this object.
@ -65,7 +64,7 @@ final class FieldInfos {
public void add(Document doc) {
Enumeration fields = doc.fields();
while (fields.hasMoreElements()) {
Field field = (Field) fields.nextElement();
Fieldable field = (Fieldable) fields.nextElement();
add(field.name(), field.isIndexed(), field.isTermVectorStored(), field.isStorePositionWithTermVector(),
field.isStoreOffsetWithTermVector(), field.getOmitNorms());
}
@ -105,7 +104,7 @@ final class FieldInfos {
/**
* Calls 5 parameter add with false for all TermVector parameters.
*
* @param name The name of the Field
* @param name The name of the Fieldable
* @param isIndexed true if the field is indexed
* @see #add(String, boolean, boolean, boolean, boolean)
*/

View File

@ -0,0 +1,70 @@
package org.apache.lucene.index;
/**
* Created by IntelliJ IDEA.
* User: Grant Ingersoll
* Date: Jan 12, 2006
* Time: 9:37:43 AM
* $Id:$
* Copyright 2005. Center For Natural Language Processing
*/
/**
*
*
**/
public class FieldReaderException extends RuntimeException{
/**
* Constructs a new runtime exception with <code>null</code> as its
* detail message. The cause is not initialized, and may subsequently be
* initialized by a call to {@link #initCause}.
*/
public FieldReaderException() {
}
/**
* Constructs a new runtime exception with the specified cause and a
* detail message of <tt>(cause==null ? null : cause.toString())</tt>
* (which typically contains the class and detail message of
* <tt>cause</tt>). This constructor is useful for runtime exceptions
* that are little more than wrappers for other throwables.
*
* @param cause the cause (which is saved for later retrieval by the
* {@link #getCause()} method). (A <tt>null</tt> value is
* permitted, and indicates that the cause is nonexistent or
* unknown.)
* @since 1.4
*/
public FieldReaderException(Throwable cause) {
super(cause);
}
/**
* Constructs a new runtime exception with the specified detail message.
* The cause is not initialized, and may subsequently be initialized by a
* call to {@link #initCause}.
*
* @param message the detail message. The detail message is saved for
* later retrieval by the {@link #getMessage()} method.
*/
public FieldReaderException(String message) {
super(message);
}
/**
* Constructs a new runtime exception with the specified detail message and
* cause. <p>Note that the detail message associated with
* <code>cause</code> is <i>not</i> automatically incorporated in
* this runtime exception's detail message.
*
* @param message the detail message (which is saved for later retrieval
* by the {@link #getMessage()} method).
* @param cause the cause (which is saved for later retrieval by the
* {@link #getCause()} method). (A <tt>null</tt> value is
* permitted, and indicates that the cause is nonexistent or
* unknown.)
* @since 1.4
*/
public FieldReaderException(String message, Throwable cause) {
super(message, cause);
}
}

View File

@ -16,19 +16,19 @@ package org.apache.lucene.index;
* limitations under the License.
*/
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.zip.DataFormatException;
import java.util.zip.Inflater;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.*;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.Reader;
import java.util.zip.DataFormatException;
import java.util.zip.Inflater;
/**
* Class responsible for access to stored document fields.
*
* <p/>
* It uses &lt;segment&gt;.fdt and &lt;segment&gt;.fdx; files.
*
* @version $Id$
@ -39,25 +39,37 @@ final class FieldsReader {
private IndexInput indexStream;
private int size;
private static ThreadLocal fieldsStreamTL = new ThreadLocal();
FieldsReader(Directory d, String segment, FieldInfos fn) throws IOException {
fieldInfos = fn;
fieldsStream = d.openInput(segment + ".fdt");
indexStream = d.openInput(segment + ".fdx");
size = (int) (indexStream.length() / 8);
}
/**
* Cloeses the underlying {@link org.apache.lucene.store.IndexInput} streams, including any ones associated with a
* lazy implementation of a Field. This means that the Fields values will not be accessible.
*
* @throws IOException
*/
final void close() throws IOException {
fieldsStream.close();
indexStream.close();
IndexInput localFieldsStream = (IndexInput) fieldsStreamTL.get();
if (localFieldsStream != null) {
localFieldsStream.close();
fieldsStreamTL.set(null);
}
}
final int size() {
return size;
}
final Document doc(int n) throws IOException {
final Document doc(int n, FieldSelector fieldSelector) throws IOException {
indexStream.seek(n * 8L);
long position = indexStream.readLong();
fieldsStream.seek(position);
@ -67,82 +79,270 @@ final class FieldsReader {
for (int i = 0; i < numFields; i++) {
int fieldNumber = fieldsStream.readVInt();
FieldInfo fi = fieldInfos.fieldInfo(fieldNumber);
FieldSelectorResult acceptField = fieldSelector == null ? FieldSelectorResult.LOAD : fieldSelector.accept(fi.name);
boolean lazy = acceptField.equals(FieldSelectorResult.LAZY_LOAD) == true;
byte bits = fieldsStream.readByte();
boolean compressed = (bits & FieldsWriter.FIELD_IS_COMPRESSED) != 0;
boolean tokenize = (bits & FieldsWriter.FIELD_IS_TOKENIZED) != 0;
if ((bits & FieldsWriter.FIELD_IS_BINARY) != 0) {
final byte[] b = new byte[fieldsStream.readVInt()];
fieldsStream.readBytes(b, 0, b.length);
if (compressed)
doc.add(new Field(fi.name, uncompress(b), Field.Store.COMPRESS));
else
doc.add(new Field(fi.name, b, Field.Store.YES));
boolean binary = (bits & FieldsWriter.FIELD_IS_BINARY) != 0;
if (acceptField.equals(FieldSelectorResult.LOAD) == true) {
addField(doc, fi, binary, compressed, tokenize);
}
else if (acceptField.equals(FieldSelectorResult.LOAD_AND_BREAK) == true){
addField(doc, fi, binary, compressed, tokenize);
break;//Get out of this loop
}
else if (lazy == true){
addFieldLazy(doc, fi, binary, compressed, tokenize);
}
else {
Field.Index index;
Field.Store store = Field.Store.YES;
if (fi.isIndexed && tokenize)
index = Field.Index.TOKENIZED;
else if (fi.isIndexed && !tokenize)
index = Field.Index.UN_TOKENIZED;
else
index = Field.Index.NO;
Field.TermVector termVector = null;
if (fi.storeTermVector) {
if (fi.storeOffsetWithTermVector) {
if (fi.storePositionWithTermVector) {
termVector = Field.TermVector.WITH_POSITIONS_OFFSETS;
}
else {
termVector = Field.TermVector.WITH_OFFSETS;
}
}
else if (fi.storePositionWithTermVector) {
termVector = Field.TermVector.WITH_POSITIONS;
}
else {
termVector = Field.TermVector.YES;
}
}
else {
termVector = Field.TermVector.NO;
}
if (compressed) {
store = Field.Store.COMPRESS;
final byte[] b = new byte[fieldsStream.readVInt()];
fieldsStream.readBytes(b, 0, b.length);
Field f = new Field(fi.name, // field name
new String(uncompress(b), "UTF-8"), // uncompress the value and add as string
store,
index,
termVector);
f.setOmitNorms(fi.omitNorms);
doc.add(f);
}
else {
Field f = new Field(fi.name, // name
fieldsStream.readString(), // read value
store,
index,
termVector);
f.setOmitNorms(fi.omitNorms);
doc.add(f);
}
skipField(binary, compressed);
}
}
return doc;
}
/**
* Skip the field. We still have to read some of the information about the field, but can skip past the actual content.
* This will have the most payoff on large fields.
*/
private void skipField(boolean binary, boolean compressed) throws IOException {
int toRead = fieldsStream.readVInt();
if (binary || compressed) {
long pointer = fieldsStream.getFilePointer();
fieldsStream.seek(pointer + toRead);
} else {
//We need to skip chars. This will slow us down, but still better
fieldsStream.skipChars(toRead);
}
}
private void addFieldLazy(Document doc, FieldInfo fi, boolean binary, boolean compressed, boolean tokenize) throws IOException {
if (binary == true) {
int toRead = fieldsStream.readVInt();
long pointer = fieldsStream.getFilePointer();
if (compressed) {
//was: doc.add(new Fieldable(fi.name, uncompress(b), Fieldable.Store.COMPRESS));
doc.add(new LazyField(fi.name, Field.Store.COMPRESS, toRead, pointer));
} else {
//was: doc.add(new Fieldable(fi.name, b, Fieldable.Store.YES));
doc.add(new LazyField(fi.name, Field.Store.YES, toRead, pointer));
}
//Need to move the pointer ahead by toRead positions
fieldsStream.seek(pointer + toRead);
} else {
Field.Store store = Field.Store.YES;
Field.Index index = getIndexType(fi, tokenize);
Field.TermVector termVector = getTermVectorType(fi);
Fieldable f;
if (compressed) {
store = Field.Store.COMPRESS;
int toRead = fieldsStream.readVInt();
long pointer = fieldsStream.getFilePointer();
f = new LazyField(fi.name, store, toRead, pointer);
//skip over the part that we aren't loading
fieldsStream.seek(pointer + toRead);
f.setOmitNorms(fi.omitNorms);
} else {
int length = fieldsStream.readVInt();
long pointer = fieldsStream.getFilePointer();
//Skip ahead of where we are by the length of what is stored
fieldsStream.skipChars(length);
f = new LazyField(fi.name, store, index, termVector, length, pointer);
f.setOmitNorms(fi.omitNorms);
}
doc.add(f);
}
}
private void addField(Document doc, FieldInfo fi, boolean binary, boolean compressed, boolean tokenize) throws IOException {
//we have a binary stored field, and it may be compressed
if (binary) {
int toRead = fieldsStream.readVInt();
final byte[] b = new byte[toRead];
fieldsStream.readBytes(b, 0, b.length);
if (compressed)
doc.add(new Field(fi.name, uncompress(b), Field.Store.COMPRESS));
else
doc.add(new Field(fi.name, b, Field.Store.YES));
} else {
Field.Store store = Field.Store.YES;
Field.Index index = getIndexType(fi, tokenize);
Field.TermVector termVector = getTermVectorType(fi);
Fieldable f;
if (compressed) {
store = Field.Store.COMPRESS;
int toRead = fieldsStream.readVInt();
final byte[] b = new byte[toRead];
fieldsStream.readBytes(b, 0, b.length);
f = new Field(fi.name, // field name
new String(uncompress(b), "UTF-8"), // uncompress the value and add as string
store,
index,
termVector);
f.setOmitNorms(fi.omitNorms);
} else {
f = new Field(fi.name, // name
fieldsStream.readString(), // read value
store,
index,
termVector);
f.setOmitNorms(fi.omitNorms);
}
doc.add(f);
}
}
private Field.TermVector getTermVectorType(FieldInfo fi) {
Field.TermVector termVector = null;
if (fi.storeTermVector) {
if (fi.storeOffsetWithTermVector) {
if (fi.storePositionWithTermVector) {
termVector = Field.TermVector.WITH_POSITIONS_OFFSETS;
} else {
termVector = Field.TermVector.WITH_OFFSETS;
}
} else if (fi.storePositionWithTermVector) {
termVector = Field.TermVector.WITH_POSITIONS;
} else {
termVector = Field.TermVector.YES;
}
} else {
termVector = Field.TermVector.NO;
}
return termVector;
}
private Field.Index getIndexType(FieldInfo fi, boolean tokenize) {
Field.Index index;
if (fi.isIndexed && tokenize)
index = Field.Index.TOKENIZED;
else if (fi.isIndexed && !tokenize)
index = Field.Index.UN_TOKENIZED;
else
index = Field.Index.NO;
return index;
}
/**
* A Lazy implementation of Fieldable that differs loading of fields until asked for, instead of when the Document is
* loaded.
*/
private class LazyField extends AbstractField implements Fieldable {
private int toRead;
private long pointer;
//internal buffer
private char[] chars;
public LazyField(String name, Field.Store store, int toRead, long pointer) {
super(name, store, Field.Index.NO, Field.TermVector.NO);
this.toRead = toRead;
this.pointer = pointer;
lazy = true;
}
public LazyField(String name, Field.Store store, Field.Index index, Field.TermVector termVector, int toRead, long pointer) {
super(name, store, index, termVector);
this.toRead = toRead;
this.pointer = pointer;
lazy = true;
}
/**
* The value of the field in Binary, or null. If null, the Reader or
* String value is used. Exactly one of stringValue(), readerValue() and
* binaryValue() must be set.
*/
public byte[] binaryValue() {
if (fieldsData == null) {
final byte[] b = new byte[toRead];
IndexInput localFieldsStream = (IndexInput) fieldsStreamTL.get();
if (localFieldsStream == null) {
localFieldsStream = (IndexInput) fieldsStream.clone();
fieldsStreamTL.set(localFieldsStream);
}
//Throw this IO Exception since IndexREader.document does so anyway, so probably not that big of a change for people
//since they are already handling this exception when getting the document
try {
localFieldsStream.seek(pointer);
localFieldsStream.readBytes(b, 0, b.length);
if (isCompressed == true) {
fieldsData = uncompress(b);
} else {
fieldsData = b;
}
} catch (IOException e) {
throw new FieldReaderException(e);
}
}
return fieldsData instanceof byte[] ? (byte[]) fieldsData : null;
}
/**
* The value of the field as a Reader, or null. If null, the String value
* or binary value is used. Exactly one of stringValue(), readerValue(),
* and binaryValue() must be set.
*/
public Reader readerValue() {
return fieldsData instanceof Reader ? (Reader) fieldsData : null;
}
/**
* The value of the field as a String, or null. If null, the Reader value
* or binary value is used. Exactly one of stringValue(), readerValue(), and
* binaryValue() must be set.
*/
public String stringValue() {
if (fieldsData == null) {
IndexInput localFieldsStream = (IndexInput) fieldsStreamTL.get();
if (localFieldsStream == null) {
localFieldsStream = (IndexInput) fieldsStream.clone();
fieldsStreamTL.set(localFieldsStream);
}
try {
localFieldsStream.seek(pointer);
//read in chars b/c we already know the length we need to read
if (chars == null || toRead > chars.length)
chars = new char[toRead];
localFieldsStream.readChars(chars, 0, toRead);
fieldsData = new String(chars, 0, toRead);//fieldsStream.readString();
} catch (IOException e) {
throw new FieldReaderException(e);
}
}
return fieldsData instanceof String ? (String) fieldsData : null;
}
public long getPointer() {
return pointer;
}
public void setPointer(long pointer) {
this.pointer = pointer;
}
public int getToRead() {
return toRead;
}
public void setToRead(int toRead) {
this.toRead = toRead;
}
}
private final byte[] uncompress(final byte[] input)
throws IOException
{
throws IOException {
Inflater decompressor = new Inflater();
decompressor.setInput(input);

View File

@ -17,6 +17,8 @@ package org.apache.lucene.index;
*/
import org.apache.lucene.document.Document;
import org.apache.lucene.document.FieldSelector;
import java.io.IOException;
import java.util.Collection;
@ -100,7 +102,7 @@ public class FilterIndexReader extends IndexReader {
public int numDocs() { return in.numDocs(); }
public int maxDoc() { return in.maxDoc(); }
public Document document(int n) throws IOException { return in.document(n); }
public Document document(int n, FieldSelector fieldSelector) throws IOException { return in.document(n, fieldSelector); }
public boolean isDeleted(int n) { return in.isDeleted(n); }
public boolean hasDeletions() { return in.hasDeletions(); }

View File

@ -500,8 +500,8 @@ public class IndexModifier {
// create an index in /tmp/index, overwriting an existing one:
IndexModifier indexModifier = new IndexModifier("/tmp/index", analyzer, true);
Document doc = new Document();
doc.add(new Field("id", "1", Field.Store.YES, Field.Index.UN_TOKENIZED));
doc.add(new Field("body", "a simple test", Field.Store.YES, Field.Index.TOKENIZED));
doc.add(new Fieldable("id", "1", Fieldable.Store.YES, Fieldable.Index.UN_TOKENIZED));
doc.add(new Fieldable("body", "a simple test", Fieldable.Store.YES, Fieldable.Index.TOKENIZED));
indexModifier.addDocument(doc);
int deleted = indexModifier.delete(new Term("id", "1"));
System.out.println("Deleted " + deleted + " document");

View File

@ -17,7 +17,7 @@ package org.apache.lucene.index;
*/
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldSelector;
import org.apache.lucene.search.Similarity;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
@ -353,7 +353,33 @@ public abstract class IndexReader {
/** Returns the stored fields of the <code>n</code><sup>th</sup>
<code>Document</code> in this index. */
public abstract Document document(int n) throws IOException;
public Document document(int n) throws IOException{
return document(n, null);
}
/**
* Get the {@link org.apache.lucene.document.Document} at the <code>n</code><sup>th</sup> position. The {@link org.apache.lucene.document.FieldSelector}
* may be used to determine what {@link org.apache.lucene.document.Field}s to load and how they should be loaded.
*
* <b>NOTE:</b> If this Reader (more specifically, the underlying {@link FieldsReader} is closed before the lazy {@link org.apache.lucene.document.Field} is
* loaded an exception may be thrown. If you want the value of a lazy {@link org.apache.lucene.document.Field} to be available after closing you must
* explicitly load it or fetch the Document again with a new loader.
*
*
* @param n Get the document at the <code>n</code><sup>th</sup> position
* @param fieldSelector The {@link org.apache.lucene.document.FieldSelector} to use to determine what Fields should be loaded on the Document. May be null, in which case all Fields will be loaded.
* @return The stored fields of the {@link org.apache.lucene.document.Document} at the nth position
* @throws IOException If there is a problem reading this document
*
* @see org.apache.lucene.document.Fieldable
* @see org.apache.lucene.document.FieldSelector
* @see org.apache.lucene.document.SetBasedFieldSelector
* @see org.apache.lucene.document.LoadFirstFieldSelector
*/
//When we convert to JDK 1.5 make this Set<String>
public abstract Document document(int n, FieldSelector fieldSelector) throws IOException;
/** Returns true if document <i>n</i> has been deleted */
public abstract boolean isDeleted(int n);
@ -371,21 +397,21 @@ public abstract class IndexReader {
/** Returns the byte-encoded normalization factor for the named field of
* every document. This is used by the search code to score documents.
*
* @see Field#setBoost(float)
* @see org.apache.lucene.document.Field#setBoost(float)
*/
public abstract byte[] norms(String field) throws IOException;
/** Reads the byte-encoded normalization factor for the named field of every
* document. This is used by the search code to score documents.
*
* @see Field#setBoost(float)
* @see org.apache.lucene.document.Field#setBoost(float)
*/
public abstract void norms(String field, byte[] bytes, int offset)
throws IOException;
/** Expert: Resets the normalization factor for the named field of the named
* document. The norm represents the product of the field's {@link
* Field#setBoost(float) boost} and its {@link Similarity#lengthNorm(String,
* Fieldable#setBoost(float) boost} and its {@link Similarity#lengthNorm(String,
* int) length normalization}. Thus, to preserve the length normalization
* values when resetting this, one should base the new value upon the old.
*

View File

@ -17,11 +17,14 @@ package org.apache.lucene.index;
*/
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldSelector;
import org.apache.lucene.store.Directory;
import java.io.IOException;
import java.util.*;
import java.util.Collection;
import java.util.HashSet;
import java.util.Hashtable;
import java.util.Set;
/** An IndexReader which reads multiple indexes, appending their content.
*
@ -99,9 +102,9 @@ public class MultiReader extends IndexReader {
return maxDoc;
}
public Document document(int n) throws IOException {
public Document document(int n, FieldSelector fieldSelector) throws IOException {
int i = readerIndex(n); // find segment num
return subReaders[i].document(n - starts[i]); // dispatch to segment reader
return subReaders[i].document(n - starts[i], fieldSelector); // dispatch to segment reader
}
public boolean isDeleted(int n) {

View File

@ -16,20 +16,24 @@ package org.apache.lucene.index;
* limitations under the License.
*/
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Enumeration;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.SortedMap;
import java.util.TreeMap;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.document.FieldSelector;
import org.apache.lucene.document.FieldSelectorResult;
import java.io.IOException;
import java.util.SortedMap;
import java.util.ArrayList;
import java.util.List;
import java.util.HashMap;
import java.util.Map;
import java.util.TreeMap;
import java.util.Collection;
import java.util.Iterator;
import java.util.Enumeration;
import java.util.Set;
import java.util.HashSet;
/** An IndexReader which reads multiple, parallel indexes. Each index added
* must have the same number of documents, but typically each contains
@ -51,6 +55,7 @@ import org.apache.lucene.document.Field;
public class ParallelReader extends IndexReader {
private List readers = new ArrayList();
private SortedMap fieldToReader = new TreeMap();
private Map readerToFields = new HashMap();
private List storedFieldReaders = new ArrayList();
private int maxDoc;
@ -90,7 +95,9 @@ public class ParallelReader extends IndexReader {
throw new IllegalArgumentException
("All readers must have same numDocs: "+numDocs+"!="+reader.numDocs());
Iterator i = reader.getFieldNames(IndexReader.FieldOption.ALL).iterator();
Collection fields = reader.getFieldNames(IndexReader.FieldOption.ALL);
readerToFields.put(reader, fields);
Iterator i = fields.iterator();
while (i.hasNext()) { // update fieldToReader map
String field = (String)i.next();
if (fieldToReader.get(field) == null)
@ -132,13 +139,25 @@ public class ParallelReader extends IndexReader {
}
// append fields from storedFieldReaders
public Document document(int n) throws IOException {
public Document document(int n, FieldSelector fieldSelector) throws IOException {
Document result = new Document();
for (int i = 0; i < storedFieldReaders.size(); i++) {
IndexReader reader = (IndexReader)storedFieldReaders.get(i);
Enumeration fields = reader.document(n).fields();
boolean include = (fieldSelector==null);
if (!include) {
Iterator it = ((Collection) readerToFields.get(reader)).iterator();
while (it.hasNext())
if (fieldSelector.accept((String)it.next())!=FieldSelectorResult.NO_LOAD) {
include = true;
break;
}
}
if (include) {
Enumeration fields = reader.document(n, fieldSelector).fields();
while (fields.hasMoreElements()) {
result.add((Field)fields.nextElement());
result.add((Fieldable)fields.nextElement());
}
}
}
return result;

View File

@ -120,7 +120,7 @@ final class SegmentMerger {
files.add(segment + "." + IndexFileNames.COMPOUND_EXTENSIONS[i]);
}
// Field norm files
// Fieldable norm files
for (int i = 0; i < fieldInfos.size(); i++) {
FieldInfo fi = fieldInfos.fieldInfo(i);
if (fi.isIndexed && !fi.omitNorms) {

View File

@ -16,16 +16,16 @@ package org.apache.lucene.index;
* limitations under the License.
*/
import java.io.IOException;
import java.util.*;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldSelector;
import org.apache.lucene.search.DefaultSimilarity;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BitVector;
import org.apache.lucene.search.DefaultSimilarity;
import java.io.IOException;
import java.util.*;
/**
* @version $Id$
@ -277,11 +277,11 @@ class SegmentReader extends IndexReader {
return tis.terms(t);
}
public synchronized Document document(int n) throws IOException {
public synchronized Document document(int n, FieldSelector fieldSelector) throws IOException {
if (isDeleted(n))
throw new IllegalArgumentException
("attempt to access a deleted document");
return fieldsReader.doc(n);
return fieldsReader.doc(n, fieldSelector);
}
public synchronized boolean isDeleted(int n) {

View File

@ -127,7 +127,7 @@ class TermVectorsReader implements Cloneable {
result = readTermVector(field, position);
} else {
//System.out.println("Field not found");
//System.out.println("Fieldable not found");
}
} else {
//System.out.println("No tvx file");

View File

@ -150,7 +150,7 @@ final class TermVectorsWriter {
return currentField != null;
}
/** Add term to the field's term vector. Field must already be open.
/** Add term to the field's term vector. Fieldable must already be open.
* Terms should be added in
* increasing order of terms, one call per unique termNum. ProxPointer
* is a pointer into the TermPosition file (prx). Freq is the number of
@ -268,7 +268,7 @@ final class TermVectorsWriter {
private void writeField() throws IOException {
// remember where this field is written
currentField.tvfPointer = tvf.getFilePointer();
//System.out.println("Field Pointer: " + currentField.tvfPointer);
//System.out.println("Fieldable Pointer: " + currentField.tvfPointer);
final int size = terms.size();
tvf.writeVInt(size);

View File

@ -20,7 +20,6 @@ import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermDocs;
import org.apache.lucene.index.TermEnum;
import org.apache.lucene.search.FieldCache.StringIndex; // required by GCJ
import java.io.IOException;
import java.util.Locale;
@ -43,7 +42,7 @@ implements FieldCache {
/** Expert: Every key in the internal cache is of this type. */
static class Entry {
final String field; // which Field
final String field; // which Fieldable
final int type; // which SortField type
final Object custom; // which custom comparator
final Locale locale; // the locale we're sorting (if string)

View File

@ -45,7 +45,7 @@ extends PriorityQueue {
/**
* Creates a hit queue sorted by the given list of fields.
* @param fields Field names, in priority order (highest priority first).
* @param fields Fieldable names, in priority order (highest priority first).
* @param size The number of hits to retain. Must be greater than zero.
*/
FieldDocSortedHitQueue (SortField[] fields, int size) {

View File

@ -44,7 +44,7 @@ extends PriorityQueue {
/**
* Creates a hit queue sorted by the given list of fields.
* @param reader Index to use.
* @param fields Field names, in priority order (highest priority first). Cannot be <code>null</code> or empty.
* @param fields Fieldable names, in priority order (highest priority first). Cannot be <code>null</code> or empty.
* @param size The number of hits to retain. Must be greater than zero.
* @throws IOException
*/
@ -212,7 +212,7 @@ extends PriorityQueue {
/**
* Returns a comparator for sorting hits according to a field containing integers.
* @param reader Index to use.
* @param fieldname Field containg integer values.
* @param fieldname Fieldable containg integer values.
* @return Comparator for sorting hits.
* @throws IOException If an error occurs reading the index.
*/
@ -243,7 +243,7 @@ extends PriorityQueue {
/**
* Returns a comparator for sorting hits according to a field containing floats.
* @param reader Index to use.
* @param fieldname Field containg float values.
* @param fieldname Fieldable containg float values.
* @return Comparator for sorting hits.
* @throws IOException If an error occurs reading the index.
*/
@ -274,7 +274,7 @@ extends PriorityQueue {
/**
* Returns a comparator for sorting hits according to a field containing strings.
* @param reader Index to use.
* @param fieldname Field containg string values.
* @param fieldname Fieldable containg string values.
* @return Comparator for sorting hits.
* @throws IOException If an error occurs reading the index.
*/
@ -305,7 +305,7 @@ extends PriorityQueue {
/**
* Returns a comparator for sorting hits according to a field containing strings.
* @param reader Index to use.
* @param fieldname Field containg string values.
* @param fieldname Fieldable containg string values.
* @return Comparator for sorting hits.
* @throws IOException If an error occurs reading the index.
*/
@ -336,7 +336,7 @@ extends PriorityQueue {
* floats or strings. Once the type is determined, one of the other static methods
* in this class is called to get the comparator.
* @param reader Index to use.
* @param fieldname Field containg values.
* @param fieldname Fieldable containg values.
* @return Comparator for sorting hits.
* @throws IOException If an error occurs reading the index.
*/

View File

@ -16,19 +16,16 @@ package org.apache.lucene.search;
* limitations under the License.
*/
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.util.SmallFloat;
import java.io.IOException;
import java.io.Serializable;
import java.util.Collection;
import java.util.Iterator;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.IndexReader; // for javadoc
import org.apache.lucene.index.IndexWriter; // for javadoc
import org.apache.lucene.document.Field; // for javadoc
import org.apache.lucene.util.SmallFloat;
/** Expert: Scoring API.
* <p>Subclasses implement search scoring.
*
@ -44,7 +41,7 @@ import org.apache.lucene.util.SmallFloat;
* ( {@link #tf(int) tf}(t in d) *
* {@link #idf(Term,Searcher) idf}(t)^2 *
* {@link Query#getBoost getBoost}(t in q) *
* {@link Field#getBoost getBoost}(t.field in d) *
* {@link org.apache.lucene.document.Field#getBoost getBoost}(t.field in d) *
* {@link #lengthNorm(String,int) lengthNorm}(t.field in d) )
* </small></td>
* <td valign="middle" rowspan="2">&nbsp;*
@ -152,7 +149,7 @@ public abstract class Similarity implements Serializable {
* <i>fieldName</i> of <i>doc</i>.
* @return a normalization factor for hits on this field of this document
*
* @see Field#setBoost(float)
* @see org.apache.lucene.document.Field#setBoost(float)
*/
public abstract float lengthNorm(String fieldName, int numTokens);
@ -179,7 +176,7 @@ public abstract class Similarity implements Serializable {
* small to represent are rounded up to the smallest positive representable
* value.
*
* @see Field#setBoost(float)
* @see org.apache.lucene.document.Field#setBoost(float)
* @see SmallFloat
*/
public static byte encodeNorm(float f) {

View File

@ -29,7 +29,7 @@ import java.io.Serializable;
* and does not need to be stored (unless you happen to want it back with the
* rest of your document data). In other words:
*
* <p><code>document.add (new Field ("byNumber", Integer.toString(x), Field.Store.NO, Field.Index.UN_TOKENIZED));</code></p>
* <p><code>document.add (new Fieldable ("byNumber", Integer.toString(x), Fieldable.Store.NO, Fieldable.Index.UN_TOKENIZED));</code></p>
*
*
* <p><h3>Valid Types of Values</h3>

View File

@ -19,7 +19,7 @@ extends Serializable {
/**
* Creates a comparator for the field in the given index.
* @param reader Index to create comparator for.
* @param fieldname Field to create comparator for.
* @param fieldname Fieldable to create comparator for.
* @return Comparator of ScoreDoc objects.
* @throws IOException If an error occurs reading the index.
*/

View File

@ -116,6 +116,32 @@ public abstract class IndexInput implements Cloneable {
}
}
/**
* Expert
*
* Similar to {@link #readChars(char[], int, int)} but does not do any conversion operations on the bytes it is reading in. It still
* has to invoke {@link #readByte()} just as {@link #readChars(char[], int, int)} does, but it does not need a buffer to store anything
* and it does not have to do any of the bitwise operations, since we don't actually care what is in the byte except to determine
* how many more bytes to read
* @param length The number of chars to read
*/
public void skipChars(int length) throws IOException{
for (int i = 0; i < length; i++) {
byte b = readByte();
if ((b & 0x80) == 0){
//do nothing, we only need one byte
}
else if ((b & 0xE0) != 0xE0) {
readByte();//read an additional byte
} else{
//read two additional bytes.
readByte();
readByte();
}
}
}
/** Closes the stream to futher operations. */
public abstract void close() throws IOException;

View File

@ -38,10 +38,10 @@ public class TestBinaryDocument extends TestCase
public void testBinaryFieldInIndex()
throws Exception
{
Field binaryFldStored = new Field("binaryStored", binaryValStored.getBytes(), Field.Store.YES);
Field binaryFldCompressed = new Field("binaryCompressed", binaryValCompressed.getBytes(), Field.Store.COMPRESS);
Field stringFldStored = new Field("stringStored", binaryValStored, Field.Store.YES, Field.Index.NO, Field.TermVector.NO);
Field stringFldCompressed = new Field("stringCompressed", binaryValCompressed, Field.Store.COMPRESS, Field.Index.NO, Field.TermVector.NO);
Fieldable binaryFldStored = new Field("binaryStored", binaryValStored.getBytes(), Field.Store.YES);
Fieldable binaryFldCompressed = new Field("binaryCompressed", binaryValCompressed.getBytes(), Field.Store.COMPRESS);
Fieldable stringFldStored = new Field("stringStored", binaryValStored, Field.Store.YES, Field.Index.NO, Field.TermVector.NO);
Fieldable stringFldCompressed = new Field("stringCompressed", binaryValCompressed, Field.Store.COMPRESS, Field.Index.NO, Field.TermVector.NO);
try {
// binary fields with store off are not allowed

View File

@ -46,9 +46,9 @@ public class TestDocument extends TestCase
throws Exception
{
Document doc = new Document();
Field stringFld = new Field("string", binaryVal, Field.Store.YES, Field.Index.NO);
Field binaryFld = new Field("binary", binaryVal.getBytes(), Field.Store.YES);
Field binaryFld2 = new Field("binary", binaryVal2.getBytes(), Field.Store.YES);
Fieldable stringFld = new Field("string", binaryVal, Field.Store.YES, Field.Index.NO);
Fieldable binaryFld = new Field("binary", binaryVal.getBytes(), Field.Store.YES);
Fieldable binaryFld2 = new Field("binary", binaryVal2.getBytes(), Field.Store.YES);
doc.add(stringFld);
doc.add(binaryFld);

View File

@ -18,12 +18,12 @@ package org.apache.lucene.index;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.*;
import org.apache.lucene.search.Similarity;
import org.apache.lucene.store.Directory;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.util.HashMap;
import java.util.Map;
import java.util.Enumeration;
@ -40,6 +40,13 @@ class DocHelper {
public static final String TEXT_FIELD_2_KEY = "textField2";
public static Field textField2 = new Field(TEXT_FIELD_2_KEY, FIELD_2_TEXT, Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
public static final String FIELD_2_COMPRESSED_TEXT = "field field field two text";
//Fields will be lexicographically sorted. So, the order is: field, text, two
public static final int [] COMPRESSED_FIELD_2_FREQS = {3, 1, 1};
public static final String COMPRESSED_TEXT_FIELD_2_KEY = "compressedTextField2";
public static Field compressedTextField2 = new Field(COMPRESSED_TEXT_FIELD_2_KEY, FIELD_2_COMPRESSED_TEXT, Field.Store.COMPRESS, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
public static final String FIELD_3_TEXT = "aaaNoNorms aaaNoNorms bbbNoNorms";
public static final String TEXT_FIELD_3_KEY = "textField3";
public static Field textField3 = new Field(TEXT_FIELD_3_KEY, FIELD_3_TEXT, Field.Store.YES, Field.Index.TOKENIZED);
@ -71,6 +78,34 @@ class DocHelper {
public static Field unStoredField2 = new Field(UNSTORED_FIELD_2_KEY, UNSTORED_2_FIELD_TEXT,
Field.Store.NO, Field.Index.TOKENIZED, Field.TermVector.YES);
public static final String LAZY_FIELD_BINARY_KEY = "lazyFieldBinary";
public static byte [] LAZY_FIELD_BINARY_BYTES;
public static Field lazyFieldBinary;
public static final String LAZY_FIELD_KEY = "lazyField";
public static final String LAZY_FIELD_TEXT = "These are some field bytes";
public static Field lazyField = new Field(LAZY_FIELD_KEY, LAZY_FIELD_TEXT, Field.Store.YES, Field.Index.TOKENIZED);
public static final String LARGE_LAZY_FIELD_KEY = "largeLazyField";
public static String LARGE_LAZY_FIELD_TEXT;
public static Field largeLazyField;
//From Issue 509
public static final String FIELD_UTF1_TEXT = "field one \u4e00text";
public static final String TEXT_FIELD_UTF1_KEY = "textField1Utf8";
public static Field textUtfField1 = new Field(TEXT_FIELD_UTF1_KEY, FIELD_UTF1_TEXT,
Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.NO);
public static final String FIELD_UTF2_TEXT = "field field field \u4e00two text";
//Fields will be lexicographically sorted. So, the order is: field, text, two
public static final int [] FIELD_UTF2_FREQS = {3, 1, 1};
public static final String TEXT_FIELD_UTF2_KEY = "textField2Utf8";
public static Field textUtfField2 = new Field(TEXT_FIELD_UTF2_KEY, FIELD_UTF2_TEXT, Field.Store.YES,
Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
public static Map nameValues = null;
// ordered list of all the fields...
@ -79,14 +114,20 @@ class DocHelper {
textField1,
textField2,
textField3,
compressedTextField2,
keyField,
noNormsField,
unIndField,
unStoredField1,
unStoredField2,
textUtfField1,
textUtfField2,
lazyField,
lazyFieldBinary,//placeholder for binary field, since this is null. It must be second to last.
largeLazyField//placeholder for large field, since this is null. It must always be last
};
// Map<String fieldName, Field field>
// Map<String fieldName, Fieldable field>
public static Map all=new HashMap();
public static Map indexed=new HashMap();
public static Map stored=new HashMap();
@ -94,11 +135,28 @@ class DocHelper {
public static Map unindexed=new HashMap();
public static Map termvector=new HashMap();
public static Map notermvector=new HashMap();
public static Map lazy= new HashMap();
public static Map noNorms=new HashMap();
static {
//Initialize the large Lazy Field
StringBuffer buffer = new StringBuffer();
for (int i = 0; i < 10000; i++)
{
buffer.append("Lazily loading lengths of language in lieu of laughing ");
}
try {
LAZY_FIELD_BINARY_BYTES = "These are some binary field bytes".getBytes("UTF8");
} catch (UnsupportedEncodingException e) {
}
lazyFieldBinary = new Field(LAZY_FIELD_BINARY_KEY, LAZY_FIELD_BINARY_BYTES, Field.Store.YES);
fields[fields.length - 2] = lazyFieldBinary;
LARGE_LAZY_FIELD_TEXT = buffer.toString();
largeLazyField = new Field(LARGE_LAZY_FIELD_KEY, LARGE_LAZY_FIELD_TEXT, Field.Store.YES, Field.Index.TOKENIZED);
fields[fields.length - 1] = largeLazyField;
for (int i=0; i<fields.length; i++) {
Field f = fields[i];
Fieldable f = fields[i];
add(all,f);
if (f.isIndexed()) add(indexed,f);
else add(unindexed,f);
@ -107,11 +165,12 @@ class DocHelper {
if (f.isStored()) add(stored,f);
else add(unstored,f);
if (f.getOmitNorms()) add(noNorms,f);
if (f.isLazy()) add(lazy, f);
}
}
private static void add(Map map, Field field) {
private static void add(Map map, Fieldable field) {
map.put(field.name(), field);
}
@ -121,12 +180,18 @@ class DocHelper {
nameValues = new HashMap();
nameValues.put(TEXT_FIELD_1_KEY, FIELD_1_TEXT);
nameValues.put(TEXT_FIELD_2_KEY, FIELD_2_TEXT);
nameValues.put(COMPRESSED_TEXT_FIELD_2_KEY, FIELD_2_COMPRESSED_TEXT);
nameValues.put(TEXT_FIELD_3_KEY, FIELD_3_TEXT);
nameValues.put(KEYWORD_FIELD_KEY, KEYWORD_TEXT);
nameValues.put(NO_NORMS_KEY, NO_NORMS_TEXT);
nameValues.put(UNINDEXED_FIELD_KEY, UNINDEXED_FIELD_TEXT);
nameValues.put(UNSTORED_FIELD_1_KEY, UNSTORED_1_FIELD_TEXT);
nameValues.put(UNSTORED_FIELD_2_KEY, UNSTORED_2_FIELD_TEXT);
nameValues.put(LAZY_FIELD_KEY, LAZY_FIELD_TEXT);
nameValues.put(LAZY_FIELD_BINARY_KEY, LAZY_FIELD_BINARY_BYTES);
nameValues.put(LARGE_LAZY_FIELD_KEY, LARGE_LAZY_FIELD_TEXT);
nameValues.put(TEXT_FIELD_UTF1_KEY, FIELD_UTF1_TEXT);
nameValues.put(TEXT_FIELD_UTF2_KEY, FIELD_UTF2_TEXT);
}
/**

View File

@ -21,8 +21,7 @@ import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.*;
import org.apache.lucene.search.Similarity;
import org.apache.lucene.store.RAMDirectory;
@ -64,7 +63,7 @@ public class TestDocumentWriter extends TestCase {
assertTrue(doc != null);
//System.out.println("Document: " + doc);
Field [] fields = doc.getFields("textField2");
Fieldable [] fields = doc.getFields("textField2");
assertTrue(fields != null && fields.length == 1);
assertTrue(fields[0].stringValue().equals(DocHelper.FIELD_2_TEXT));
assertTrue(fields[0].isTermVectorStored());

View File

@ -17,13 +17,18 @@ package org.apache.lucene.index;
*/
import junit.framework.TestCase;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.document.*;
import org.apache.lucene.search.Similarity;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.RAMDirectory;
import java.io.File;
import java.io.IOException;
import java.util.Collections;
import java.util.HashSet;
import java.util.Set;
import java.util.Enumeration;
public class TestFieldsReader extends TestCase {
private RAMDirectory dir = new RAMDirectory();
@ -50,11 +55,11 @@ public class TestFieldsReader extends TestCase {
FieldsReader reader = new FieldsReader(dir, "test", fieldInfos);
assertTrue(reader != null);
assertTrue(reader.size() == 1);
Document doc = reader.doc(0);
Document doc = reader.doc(0, null);
assertTrue(doc != null);
assertTrue(doc.getField("textField1") != null);
assertTrue(doc.getField(DocHelper.TEXT_FIELD_1_KEY) != null);
Field field = doc.getField("textField2");
Fieldable field = doc.getField(DocHelper.TEXT_FIELD_2_KEY);
assertTrue(field != null);
assertTrue(field.isTermVectorStored() == true);
@ -62,7 +67,7 @@ public class TestFieldsReader extends TestCase {
assertTrue(field.isStorePositionWithTermVector() == true);
assertTrue(field.getOmitNorms() == false);
field = doc.getField("textField3");
field = doc.getField(DocHelper.TEXT_FIELD_3_KEY);
assertTrue(field != null);
assertTrue(field.isTermVectorStored() == false);
assertTrue(field.isStoreOffsetWithTermVector() == false);
@ -72,4 +77,144 @@ public class TestFieldsReader extends TestCase {
reader.close();
}
public void testLazyFields() throws Exception {
assertTrue(dir != null);
assertTrue(fieldInfos != null);
FieldsReader reader = new FieldsReader(dir, "test", fieldInfos);
assertTrue(reader != null);
assertTrue(reader.size() == 1);
Set loadFieldNames = new HashSet();
loadFieldNames.add(DocHelper.TEXT_FIELD_1_KEY);
loadFieldNames.add(DocHelper.TEXT_FIELD_UTF1_KEY);
Set lazyFieldNames = new HashSet();
//new String[]{DocHelper.LARGE_LAZY_FIELD_KEY, DocHelper.LAZY_FIELD_KEY, DocHelper.LAZY_FIELD_BINARY_KEY};
lazyFieldNames.add(DocHelper.LARGE_LAZY_FIELD_KEY);
lazyFieldNames.add(DocHelper.LAZY_FIELD_KEY);
lazyFieldNames.add(DocHelper.LAZY_FIELD_BINARY_KEY);
lazyFieldNames.add(DocHelper.TEXT_FIELD_UTF2_KEY);
SetBasedFieldSelector fieldSelector = new SetBasedFieldSelector(loadFieldNames, lazyFieldNames);
Document doc = reader.doc(0, fieldSelector);
assertTrue("doc is null and it shouldn't be", doc != null);
Fieldable field = doc.getField(DocHelper.LAZY_FIELD_KEY);
assertTrue("field is null and it shouldn't be", field != null);
assertTrue("field is not lazy and it should be", field.isLazy());
String value = field.stringValue();
assertTrue("value is null and it shouldn't be", value != null);
assertTrue(value + " is not equal to " + DocHelper.LAZY_FIELD_TEXT, value.equals(DocHelper.LAZY_FIELD_TEXT) == true);
field = doc.getField(DocHelper.TEXT_FIELD_1_KEY);
assertTrue("field is null and it shouldn't be", field != null);
assertTrue("Field is lazy and it should not be", field.isLazy() == false);
field = doc.getField(DocHelper.TEXT_FIELD_UTF1_KEY);
assertTrue("field is null and it shouldn't be", field != null);
assertTrue("Field is lazy and it should not be", field.isLazy() == false);
assertTrue(field.stringValue() + " is not equal to " + DocHelper.FIELD_UTF1_TEXT, field.stringValue().equals(DocHelper.FIELD_UTF1_TEXT) == true);
field = doc.getField(DocHelper.TEXT_FIELD_UTF2_KEY);
assertTrue("field is null and it shouldn't be", field != null);
assertTrue("Field is lazy and it should not be", field.isLazy() == true);
assertTrue(field.stringValue() + " is not equal to " + DocHelper.FIELD_UTF2_TEXT, field.stringValue().equals(DocHelper.FIELD_UTF2_TEXT) == true);
field = doc.getField(DocHelper.LAZY_FIELD_BINARY_KEY);
assertTrue("field is null and it shouldn't be", field != null);
byte [] bytes = field.binaryValue();
assertTrue("bytes is null and it shouldn't be", bytes != null);
assertTrue("", DocHelper.LAZY_FIELD_BINARY_BYTES.length == bytes.length);
for (int i = 0; i < bytes.length; i++) {
assertTrue("byte[" + i + "] is mismatched", bytes[i] == DocHelper.LAZY_FIELD_BINARY_BYTES[i]);
}
}
public void testLoadFirst() throws Exception {
assertTrue(dir != null);
assertTrue(fieldInfos != null);
FieldsReader reader = new FieldsReader(dir, "test", fieldInfos);
assertTrue(reader != null);
assertTrue(reader.size() == 1);
LoadFirstFieldSelector fieldSelector = new LoadFirstFieldSelector();
Document doc = reader.doc(0, fieldSelector);
assertTrue("doc is null and it shouldn't be", doc != null);
int count = 0;
Enumeration enumeration = doc.fields();
while (enumeration.hasMoreElements()) {
Field field = (Field) enumeration.nextElement();
assertTrue("field is null and it shouldn't be", field != null);
String sv = field.stringValue();
assertTrue("sv is null and it shouldn't be", sv != null);
count++;
}
assertTrue(count + " does not equal: " + 1, count == 1);
}
/**
* Not really a test per se, but we should have some way of assessing whether this is worthwhile.
* <p/>
* Must test using a File based directory
*
* @throws Exception
*/
public void testLazyPerformance() throws Exception {
String tmpIODir = System.getProperty("java.io.tmpdir");
String path = tmpIODir + File.separator + "lazyDir";
File file = new File(path);
FSDirectory tmpDir = FSDirectory.getDirectory(file, true);
assertTrue(tmpDir != null);
DocumentWriter writer = new DocumentWriter(tmpDir, new WhitespaceAnalyzer(),
Similarity.getDefault(), 50);
assertTrue(writer != null);
writer.addDocument("test", testDoc);
assertTrue(fieldInfos != null);
FieldsReader reader;
long lazyTime = 0;
long regularTime = 0;
int length = 50;
Set lazyFieldNames = new HashSet();
lazyFieldNames.add(DocHelper.LARGE_LAZY_FIELD_KEY);
SetBasedFieldSelector fieldSelector = new SetBasedFieldSelector(Collections.EMPTY_SET, lazyFieldNames);
for (int i = 0; i < length; i++) {
reader = new FieldsReader(tmpDir, "test", fieldInfos);
assertTrue(reader != null);
assertTrue(reader.size() == 1);
Document doc;
doc = reader.doc(0, null);//Load all of them
assertTrue("doc is null and it shouldn't be", doc != null);
Fieldable field = doc.getField(DocHelper.LARGE_LAZY_FIELD_KEY);
assertTrue("field is lazy", field.isLazy() == false);
String value;
long start;
long finish;
start = System.currentTimeMillis();
//On my machine this was always 0ms.
value = field.stringValue();
finish = System.currentTimeMillis();
assertTrue("value is null and it shouldn't be", value != null);
assertTrue("field is null and it shouldn't be", field != null);
regularTime += (finish - start);
reader.close();
reader = null;
doc = null;
//Hmmm, are we still in cache???
System.gc();
reader = new FieldsReader(tmpDir, "test", fieldInfos);
doc = reader.doc(0, fieldSelector);
field = doc.getField(DocHelper.LARGE_LAZY_FIELD_KEY);
assertTrue("field is not lazy", field.isLazy() == true);
start = System.currentTimeMillis();
//On my machine this took around 50 - 70ms
value = field.stringValue();
finish = System.currentTimeMillis();
assertTrue("value is null and it shouldn't be", value != null);
lazyTime += (finish - start);
reader.close();
}
System.out.println("Average Non-lazy time (should be very close to zero): " + regularTime / length + " ms for " + length + " reads");
System.out.println("Average Lazy Time (should be greater than zero): " + lazyTime / length + " ms for " + length + " reads");
}
}

View File

@ -34,4 +34,44 @@ public class TestIndexInput extends TestCase {
assertEquals(16385, is.readVInt());
assertEquals("Lucene", is.readString());
}
/**
* Expert
*
* @throws IOException
*/
public void testSkipChars() throws IOException {
byte[] bytes = new byte[]{(byte) 0x80, 0x01,
(byte) 0xFF, 0x7F,
(byte) 0x80, (byte) 0x80, 0x01,
(byte) 0x81, (byte) 0x80, 0x01,
0x06, 'L', 'u', 'c', 'e', 'n', 'e',
};
String utf8Str = "\u0634\u1ea1";
byte [] utf8Bytes = utf8Str.getBytes("UTF-8");
byte [] theBytes = new byte[bytes.length + 1 + utf8Bytes.length];
System.arraycopy(bytes, 0, theBytes, 0, bytes.length);
theBytes[bytes.length] = (byte)utf8Str.length();//Add in the number of chars we are storing, which should fit in a byte for this test
System.arraycopy(utf8Bytes, 0, theBytes, bytes.length + 1, utf8Bytes.length);
IndexInput is = new MockIndexInput(theBytes);
assertEquals(128, is.readVInt());
assertEquals(16383, is.readVInt());
assertEquals(16384, is.readVInt());
assertEquals(16385, is.readVInt());
int charsToRead = is.readVInt();//number of chars in the Lucene string
assertTrue(0x06 + " does not equal: " + charsToRead, 0x06 == charsToRead);
is.skipChars(3);
char [] chars = new char[3];//there should be 6 chars remaining
is.readChars(chars, 0, 3);
String tmpStr = new String(chars);
assertTrue(tmpStr + " is not equal to " + "ene", tmpStr.equals("ene" ) == true);
//Now read the UTF8 stuff
charsToRead = is.readVInt() - 1;//since we are skipping one
is.skipChars(1);
assertTrue(utf8Str.length() - 1 + " does not equal: " + charsToRead, utf8Str.length() - 1 == charsToRead);
chars = new char[charsToRead];
is.readChars(chars, 0, charsToRead);
tmpStr = new String(chars);
assertTrue(tmpStr + " is not equal to " + utf8Str.substring(1), tmpStr.equals(utf8Str.substring(1)) == true);
}
}

View File

@ -22,6 +22,8 @@ import org.apache.lucene.analysis.SimpleAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.RAMDirectory;

View File

@ -16,20 +16,25 @@ package org.apache.lucene.index;
* limitations under the License.
*/
import java.io.IOException;
import java.util.Collection;
import junit.framework.TestCase;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.document.MapFieldSelector;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.*;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
import java.util.Enumeration;
public class TestParallelReader extends TestCase {
@ -71,6 +76,35 @@ public class TestParallelReader extends TestCase {
assertTrue(fieldNames.contains("f4"));
}
public void testDocument() throws IOException {
Directory dir1 = getDir1();
Directory dir2 = getDir2();
ParallelReader pr = new ParallelReader();
pr.add(IndexReader.open(dir1));
pr.add(IndexReader.open(dir2));
Document doc11 = pr.document(0, new MapFieldSelector(new String[] {"f1"}));
Document doc24 = pr.document(1, new MapFieldSelector(Arrays.asList(new String[] {"f4"})));
Document doc223 = pr.document(1, new MapFieldSelector(new String[] {"f2", "f3"}));
assertEquals(1, numFields(doc11));
assertEquals(1, numFields(doc24));
assertEquals(2, numFields(doc223));
assertEquals("v1", doc11.get("f1"));
assertEquals("v2", doc24.get("f4"));
assertEquals("v2", doc223.get("f2"));
assertEquals("v2", doc223.get("f3"));
}
private int numFields(Document doc) {
int num;
Enumeration e = doc.fields();
for (num=0; e.hasMoreElements(); num++)
e.nextElement();
return num;
}
public void testIncompatibleIndexes() throws IOException {
// two documents:
Directory dir1 = getDir1();

View File

@ -87,7 +87,7 @@ public class TestSegmentMerger extends TestCase {
Collection stored = mergedReader.getFieldNames(IndexReader.FieldOption.INDEXED_WITH_TERMVECTOR);
assertTrue(stored != null);
//System.out.println("stored size: " + stored.size());
assertTrue(stored.size() == 2);
assertTrue("We do not have 4 fields that were indexed with term vector",stored.size() == 4);
TermFreqVector vector = mergedReader.getTermFreqVector(0, DocHelper.TEXT_FIELD_2_KEY);
assertTrue(vector != null);

View File

@ -19,7 +19,7 @@ package org.apache.lucene.index;
import junit.framework.TestCase;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.search.DefaultSimilarity;
import java.io.IOException;
@ -64,7 +64,7 @@ public class TestSegmentReader extends TestCase {
Enumeration fields = result.fields();
while (fields.hasMoreElements()) {
Field field = (Field) fields.nextElement();
Fieldable field = (Fieldable) fields.nextElement();
assertTrue(field != null);
assertTrue(DocHelper.nameValues.containsKey(field.name()));
}
@ -166,7 +166,7 @@ public class TestSegmentReader extends TestCase {
public static void checkNorms(IndexReader reader) throws IOException {
// test omit norms
for (int i=0; i<DocHelper.fields.length; i++) {
Field f = DocHelper.fields[i];
Fieldable f = DocHelper.fields[i];
if (f.isIndexed()) {
assertEquals(reader.hasNorms(f.name()), !f.getOmitNorms());
assertEquals(reader.hasNorms(f.name()), !DocHelper.noNorms.containsKey(f.name()));
@ -202,7 +202,7 @@ public class TestSegmentReader extends TestCase {
TermFreqVector [] results = reader.getTermFreqVectors(0);
assertTrue(results != null);
assertTrue(results.length == 2);
assertTrue("We do not have 4 term freq vectors, we have: " + results.length, results.length == 4);
}
}

View File

@ -17,14 +17,11 @@ package org.apache.lucene.search;
*/
import junit.framework.TestCase;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.analysis.SimpleAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.*;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.RAMDirectory;
/** Document boost unit test.
*
@ -40,8 +37,8 @@ public class TestDocBoost extends TestCase {
RAMDirectory store = new RAMDirectory();
IndexWriter writer = new IndexWriter(store, new SimpleAnalyzer(), true);
Field f1 = new Field("field", "word", Field.Store.YES, Field.Index.TOKENIZED);
Field f2 = new Field("field", "word", Field.Store.YES, Field.Index.TOKENIZED);
Fieldable f1 = new Field("field", "word", Field.Store.YES, Field.Index.TOKENIZED);
Fieldable f2 = new Field("field", "word", Field.Store.YES, Field.Index.TOKENIZED);
f2.setBoost(2.0f);
Document d1 = new Document();

View File

@ -16,19 +16,17 @@ package org.apache.lucene.search;
* limitations under the License.
*/
import java.io.IOException;
import junit.framework.TestCase;
import org.apache.lucene.analysis.SimpleAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.*;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.TermFreqVector;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.English;
import java.io.IOException;
/**
* @author Bernhard Messer
* @version $rcs = ' $Id$ ' ;
@ -49,7 +47,7 @@ public class TestMultiThreadTermVectors extends TestCase {
//writer.infoStream = System.out;
for (int i = 0; i < numDocs; i++) {
Document doc = new Document();
Field fld = new Field("field", English.intToEnglish(i), Field.Store.YES, Field.Index.UN_TOKENIZED, Field.TermVector.YES);
Fieldable fld = new Field("field", English.intToEnglish(i), Field.Store.YES, Field.Index.UN_TOKENIZED, Field.TermVector.YES);
doc.add(fld);
writer.addDocument(doc);
}

View File

@ -16,22 +16,17 @@ package org.apache.lucene.search;
* limitations under the License.
*/
import java.io.IOException;
import java.io.Reader;
import junit.framework.TestCase;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.analysis.StopAnalyzer;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.analysis.*;
import org.apache.lucene.document.*;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import java.io.IOException;
import java.io.Reader;
/**
* Tests {@link PhraseQuery}.
*
@ -59,7 +54,7 @@ public class TestPhraseQuery extends TestCase {
Document doc = new Document();
doc.add(new Field("field", "one two three four five", Field.Store.YES, Field.Index.TOKENIZED));
doc.add(new Field("repeated", "this is a repeated field - first part", Field.Store.YES, Field.Index.TOKENIZED));
Field repeatedField = new Field("repeated", "second part of a repeated field", Field.Store.YES, Field.Index.TOKENIZED);
Fieldable repeatedField = new Field("repeated", "second part of a repeated field", Field.Store.YES, Field.Index.TOKENIZED);
doc.add(repeatedField);
writer.addDocument(doc);

View File

@ -17,15 +17,12 @@ package org.apache.lucene.search;
*/
import junit.framework.TestCase;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.analysis.SimpleAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.*;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.RAMDirectory;
/** Document boost unit test.
*
@ -42,7 +39,7 @@ public class TestSetNorm extends TestCase {
IndexWriter writer = new IndexWriter(store, new SimpleAnalyzer(), true);
// add the same document four times
Field f1 = new Field("field", "word", Field.Store.YES, Field.Index.TOKENIZED);
Fieldable f1 = new Field("field", "word", Field.Store.YES, Field.Index.TOKENIZED);
Document d1 = new Document();
d1.add(f1);
writer.addDocument(d1);