mirror of https://github.com/apache/lucene.git
Implementation of Issue 545. Introduction of new Fieldable interface (extracted from Field) which is now used where Field used to be used. Field now implements Fieldable.
Added new method to IndexReader and derived classes for working with the new FieldSelector interface. The FieldSelector interface defines a mechanism for doing lazy loading, amongst other things. Implemented Lazy loading of fields in the FieldsReader class. Added test case to TestFieldsReader.java git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@413201 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
2b9effb894
commit
d4cc19d03e
|
@ -9,6 +9,9 @@ New features
|
|||
1. LUCENE-503: New ThaiAnalyzer and ThaiWordFilter in contrib/analyzers
|
||||
(Samphan Raruenrom va Chris Hostetter)
|
||||
|
||||
2. LUCENE-545: New FieldSelector API and associated changes to IndexReader and implementations.
|
||||
New Fieldable interface for use with the lazy field loading mechanism. (Grant Ingersoll and Chuck Williams via Grant Ingersoll)
|
||||
|
||||
API Changes
|
||||
|
||||
1. LUCENE-438: Remove "final" from Token, implement Cloneable, allow
|
||||
|
|
|
@ -16,20 +16,11 @@ package org.apache.lucene.index.memory;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Serializable;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.FieldSelector;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermDocs;
|
||||
|
@ -43,6 +34,16 @@ import org.apache.lucene.search.Query;
|
|||
import org.apache.lucene.search.Searcher;
|
||||
import org.apache.lucene.search.Similarity;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Serializable;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* High-performance single-document main memory Apache Lucene fulltext search index.
|
||||
*
|
||||
|
@ -1004,8 +1005,14 @@ public class MemoryIndex {
|
|||
if (DEBUG) System.err.println("MemoryIndexReader.document");
|
||||
return new Document(); // there are no stored fields
|
||||
}
|
||||
|
||||
public boolean isDeleted(int n) {
|
||||
|
||||
//When we convert to JDK 1.5 make this Set<String>
|
||||
public Document document(int n, FieldSelector fieldSelector) throws IOException {
|
||||
if (DEBUG) System.err.println("MemoryIndexReader.document");
|
||||
return new Document(); // there are no stored fields
|
||||
}
|
||||
|
||||
public boolean isDeleted(int n) {
|
||||
if (DEBUG) System.err.println("MemoryIndexReader.isDeleted");
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -22,6 +22,7 @@ import org.apache.lucene.analysis.WhitespaceAnalyzer;
|
|||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.Fieldable;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.Hits;
|
||||
|
@ -190,7 +191,7 @@ public class ListSearcher extends AbstractListModel {
|
|||
//tabble model row that we are mapping to
|
||||
for (int t=0; t<hits.length(); t++){
|
||||
Document document = hits.doc(t);
|
||||
Field field = document.getField(ROW_NUMBER);
|
||||
Fieldable field = document.getField(ROW_NUMBER);
|
||||
rowToModelIndex.add(new Integer(field.stringValue()));
|
||||
}
|
||||
} catch (Exception e){
|
||||
|
|
|
@ -16,26 +16,23 @@ package org.apache.lucene.swing.models;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.WhitespaceAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.analysis.WhitespaceAnalyzer;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.document.Fieldable;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.queryParser.MultiFieldQueryParser;
|
||||
import org.apache.lucene.search.Hits;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.Hits;
|
||||
import org.apache.lucene.queryParser.MultiFieldQueryParser;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
|
||||
import java.awt.*;
|
||||
import java.awt.event.*;
|
||||
import java.util.*;
|
||||
import java.util.List;
|
||||
|
||||
import javax.swing.*;
|
||||
import javax.swing.event.TableModelEvent;
|
||||
import javax.swing.event.TableModelListener;
|
||||
import javax.swing.table.*;
|
||||
import javax.swing.table.AbstractTableModel;
|
||||
import javax.swing.table.TableModel;
|
||||
import java.util.ArrayList;
|
||||
|
||||
|
||||
/**
|
||||
|
@ -275,7 +272,7 @@ public class TableSearcher extends AbstractTableModel {
|
|||
//tabble model row that we are mapping to
|
||||
for (int t=0; t<hits.length(); t++){
|
||||
Document document = hits.doc(t);
|
||||
Field field = document.getField(ROW_NUMBER);
|
||||
Fieldable field = document.getField(ROW_NUMBER);
|
||||
rowToModelIndex.add(new Integer(field.stringValue()));
|
||||
}
|
||||
} catch (Exception e){
|
||||
|
|
|
@ -38,16 +38,16 @@ public abstract class Analyzer {
|
|||
|
||||
|
||||
/**
|
||||
* Invoked before indexing a Field instance if
|
||||
* Invoked before indexing a Fieldable instance if
|
||||
* terms have already been added to that field. This allows custom
|
||||
* analyzers to place an automatic position increment gap between
|
||||
* Field instances using the same field name. The default value
|
||||
* Fieldable instances using the same field name. The default value
|
||||
* position increment gap is 0. With a 0 position increment gap and
|
||||
* the typical default token position increment of 1, all terms in a field,
|
||||
* including across Field instances, are in successive positions, allowing
|
||||
* exact PhraseQuery matches, for instance, across Field instance boundaries.
|
||||
* including across Fieldable instances, are in successive positions, allowing
|
||||
* exact PhraseQuery matches, for instance, across Fieldable instance boundaries.
|
||||
*
|
||||
* @param fieldName Field name being indexed.
|
||||
* @param fieldName Fieldable name being indexed.
|
||||
* @return position increment gap, added to the next token emitted from {@link #tokenStream(String,Reader)}
|
||||
*/
|
||||
public int getPositionIncrementGap(String fieldName)
|
||||
|
|
|
@ -0,0 +1,274 @@
|
|||
package org.apache.lucene.document;
|
||||
/**
|
||||
* Copyright 2006 The Apache Software Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
*
|
||||
**/
|
||||
public abstract class AbstractField implements Fieldable {
|
||||
|
||||
protected String name = "body";
|
||||
protected boolean storeTermVector = false;
|
||||
protected boolean storeOffsetWithTermVector = false;
|
||||
protected boolean storePositionWithTermVector = false;
|
||||
protected boolean omitNorms = false;
|
||||
protected boolean isStored = false;
|
||||
protected boolean isIndexed = true;
|
||||
protected boolean isTokenized = true;
|
||||
protected boolean isBinary = false;
|
||||
protected boolean isCompressed = false;
|
||||
protected boolean lazy = false;
|
||||
protected float boost = 1.0f;
|
||||
// the one and only data object for all different kind of field values
|
||||
protected Object fieldsData = null;
|
||||
|
||||
protected AbstractField()
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
protected AbstractField(String name, Field.Store store, Field.Index index, Field.TermVector termVector) {
|
||||
if (name == null)
|
||||
throw new NullPointerException("name cannot be null");
|
||||
this.name = name.intern(); // field names are interned
|
||||
|
||||
if (store == Field.Store.YES){
|
||||
this.isStored = true;
|
||||
this.isCompressed = false;
|
||||
}
|
||||
else if (store == Field.Store.COMPRESS) {
|
||||
this.isStored = true;
|
||||
this.isCompressed = true;
|
||||
}
|
||||
else if (store == Field.Store.NO){
|
||||
this.isStored = false;
|
||||
this.isCompressed = false;
|
||||
}
|
||||
else
|
||||
throw new IllegalArgumentException("unknown store parameter " + store);
|
||||
|
||||
if (index == Field.Index.NO) {
|
||||
this.isIndexed = false;
|
||||
this.isTokenized = false;
|
||||
} else if (index == Field.Index.TOKENIZED) {
|
||||
this.isIndexed = true;
|
||||
this.isTokenized = true;
|
||||
} else if (index == Field.Index.UN_TOKENIZED) {
|
||||
this.isIndexed = true;
|
||||
this.isTokenized = false;
|
||||
} else if (index == Field.Index.NO_NORMS) {
|
||||
this.isIndexed = true;
|
||||
this.isTokenized = false;
|
||||
this.omitNorms = true;
|
||||
} else {
|
||||
throw new IllegalArgumentException("unknown index parameter " + index);
|
||||
}
|
||||
|
||||
this.isBinary = false;
|
||||
|
||||
setStoreTermVector(termVector);
|
||||
}
|
||||
|
||||
/** Sets the boost factor hits on this field. This value will be
|
||||
* multiplied into the score of all hits on this this field of this
|
||||
* document.
|
||||
*
|
||||
* <p>The boost is multiplied by {@link org.apache.lucene.document.Document#getBoost()} of the document
|
||||
* containing this field. If a document has multiple fields with the same
|
||||
* name, all such values are multiplied together. This product is then
|
||||
* multipled by the value {@link org.apache.lucene.search.Similarity#lengthNorm(String,int)}, and
|
||||
* rounded by {@link org.apache.lucene.search.Similarity#encodeNorm(float)} before it is stored in the
|
||||
* index. One should attempt to ensure that this product does not overflow
|
||||
* the range of that encoding.
|
||||
*
|
||||
* @see org.apache.lucene.document.Document#setBoost(float)
|
||||
* @see org.apache.lucene.search.Similarity#lengthNorm(String, int)
|
||||
* @see org.apache.lucene.search.Similarity#encodeNorm(float)
|
||||
*/
|
||||
public void setBoost(float boost) {
|
||||
this.boost = boost;
|
||||
}
|
||||
|
||||
/** Returns the boost factor for hits for this field.
|
||||
*
|
||||
* <p>The default value is 1.0.
|
||||
*
|
||||
* <p>Note: this value is not stored directly with the document in the index.
|
||||
* Documents returned from {@link org.apache.lucene.index.IndexReader#document(int)} and
|
||||
* {@link org.apache.lucene.search.Hits#doc(int)} may thus not have the same value present as when
|
||||
* this field was indexed.
|
||||
*
|
||||
* @see #setBoost(float)
|
||||
*/
|
||||
public float getBoost() {
|
||||
return boost;
|
||||
}
|
||||
|
||||
/** Returns the name of the field as an interned string.
|
||||
* For example "date", "title", "body", ...
|
||||
*/
|
||||
public String name() { return name; }
|
||||
|
||||
protected void setStoreTermVector(Field.TermVector termVector) {
|
||||
if (termVector == Field.TermVector.NO) {
|
||||
this.storeTermVector = false;
|
||||
this.storePositionWithTermVector = false;
|
||||
this.storeOffsetWithTermVector = false;
|
||||
}
|
||||
else if (termVector == Field.TermVector.YES) {
|
||||
this.storeTermVector = true;
|
||||
this.storePositionWithTermVector = false;
|
||||
this.storeOffsetWithTermVector = false;
|
||||
}
|
||||
else if (termVector == Field.TermVector.WITH_POSITIONS) {
|
||||
this.storeTermVector = true;
|
||||
this.storePositionWithTermVector = true;
|
||||
this.storeOffsetWithTermVector = false;
|
||||
}
|
||||
else if (termVector == Field.TermVector.WITH_OFFSETS) {
|
||||
this.storeTermVector = true;
|
||||
this.storePositionWithTermVector = false;
|
||||
this.storeOffsetWithTermVector = true;
|
||||
}
|
||||
else if (termVector == Field.TermVector.WITH_POSITIONS_OFFSETS) {
|
||||
this.storeTermVector = true;
|
||||
this.storePositionWithTermVector = true;
|
||||
this.storeOffsetWithTermVector = true;
|
||||
}
|
||||
else {
|
||||
throw new IllegalArgumentException("unknown termVector parameter " + termVector);
|
||||
}
|
||||
}
|
||||
|
||||
/** True iff the value of the field is to be stored in the index for return
|
||||
with search hits. It is an error for this to be true if a field is
|
||||
Reader-valued. */
|
||||
public final boolean isStored() { return isStored; }
|
||||
|
||||
/** True iff the value of the field is to be indexed, so that it may be
|
||||
searched on. */
|
||||
public final boolean isIndexed() { return isIndexed; }
|
||||
|
||||
/** True iff the value of the field should be tokenized as text prior to
|
||||
indexing. Un-tokenized fields are indexed as a single word and may not be
|
||||
Reader-valued. */
|
||||
public final boolean isTokenized() { return isTokenized; }
|
||||
|
||||
/** True if the value of the field is stored and compressed within the index */
|
||||
public final boolean isCompressed() { return isCompressed; }
|
||||
|
||||
/** True iff the term or terms used to index this field are stored as a term
|
||||
* vector, available from {@link org.apache.lucene.index.IndexReader#getTermFreqVector(int,String)}.
|
||||
* These methods do not provide access to the original content of the field,
|
||||
* only to terms used to index it. If the original content must be
|
||||
* preserved, use the <code>stored</code> attribute instead.
|
||||
*
|
||||
* @see org.apache.lucene.index.IndexReader#getTermFreqVector(int, String)
|
||||
*/
|
||||
public final boolean isTermVectorStored() { return storeTermVector; }
|
||||
|
||||
/**
|
||||
* True iff terms are stored as term vector together with their offsets
|
||||
* (start and end positon in source text).
|
||||
*/
|
||||
public boolean isStoreOffsetWithTermVector(){
|
||||
return storeOffsetWithTermVector;
|
||||
}
|
||||
|
||||
/**
|
||||
* True iff terms are stored as term vector together with their token positions.
|
||||
*/
|
||||
public boolean isStorePositionWithTermVector(){
|
||||
return storePositionWithTermVector;
|
||||
}
|
||||
|
||||
/** True iff the value of the filed is stored as binary */
|
||||
public final boolean isBinary() { return isBinary; }
|
||||
|
||||
/** True if norms are omitted for this indexed field */
|
||||
public boolean getOmitNorms() { return omitNorms; }
|
||||
|
||||
/** Expert:
|
||||
*
|
||||
* If set, omit normalization factors associated with this indexed field.
|
||||
* This effectively disables indexing boosts and length normalization for this field.
|
||||
*/
|
||||
public void setOmitNorms(boolean omitNorms) { this.omitNorms=omitNorms; }
|
||||
|
||||
public boolean isLazy() {
|
||||
return lazy;
|
||||
}
|
||||
|
||||
/** Prints a Field for human consumption. */
|
||||
public final String toString() {
|
||||
StringBuffer result = new StringBuffer();
|
||||
if (isStored) {
|
||||
result.append("stored");
|
||||
if (isCompressed)
|
||||
result.append("/compressed");
|
||||
else
|
||||
result.append("/uncompressed");
|
||||
}
|
||||
if (isIndexed) {
|
||||
if (result.length() > 0)
|
||||
result.append(",");
|
||||
result.append("indexed");
|
||||
}
|
||||
if (isTokenized) {
|
||||
if (result.length() > 0)
|
||||
result.append(",");
|
||||
result.append("tokenized");
|
||||
}
|
||||
if (storeTermVector) {
|
||||
if (result.length() > 0)
|
||||
result.append(",");
|
||||
result.append("termVector");
|
||||
}
|
||||
if (storeOffsetWithTermVector) {
|
||||
if (result.length() > 0)
|
||||
result.append(",");
|
||||
result.append("termVectorOffsets");
|
||||
}
|
||||
if (storePositionWithTermVector) {
|
||||
if (result.length() > 0)
|
||||
result.append(",");
|
||||
result.append("termVectorPosition");
|
||||
}
|
||||
if (isBinary) {
|
||||
if (result.length() > 0)
|
||||
result.append(",");
|
||||
result.append("binary");
|
||||
}
|
||||
if (omitNorms) {
|
||||
result.append(",omitNorms");
|
||||
}
|
||||
if (lazy){
|
||||
result.append(",lazy");
|
||||
}
|
||||
result.append('<');
|
||||
result.append(name);
|
||||
result.append(':');
|
||||
|
||||
if (fieldsData != null && lazy == false) {
|
||||
result.append(fieldsData);
|
||||
}
|
||||
|
||||
result.append('>');
|
||||
return result.toString();
|
||||
}
|
||||
}
|
|
@ -16,24 +16,21 @@ package org.apache.lucene.document;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Enumeration;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Vector;
|
||||
import org.apache.lucene.index.IndexReader; // for javadoc
|
||||
import org.apache.lucene.search.Searcher; // for javadoc
|
||||
import org.apache.lucene.search.Hits; // for javadoc
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.search.Hits;
|
||||
import org.apache.lucene.search.Searcher;
|
||||
|
||||
import java.util.*; // for javadoc
|
||||
|
||||
/** Documents are the unit of indexing and search.
|
||||
*
|
||||
* A Document is a set of fields. Each field has a name and a textual value.
|
||||
* A field may be {@link Field#isStored() stored} with the document, in which
|
||||
* A field may be {@link Fieldable#isStored() stored} with the document, in which
|
||||
* case it is returned with search hits on the document. Thus each document
|
||||
* should typically contain one or more stored fields which uniquely identify
|
||||
* it.
|
||||
*
|
||||
* <p>Note that fields which are <i>not</i> {@link Field#isStored() stored} are
|
||||
* <p>Note that fields which are <i>not</i> {@link Fieldable#isStored() stored} are
|
||||
* <i>not</i> available in documents retrieved from the index, e.g. with {@link
|
||||
* Hits#doc(int)}, {@link Searcher#doc(int)} or {@link
|
||||
* IndexReader#document(int)}.
|
||||
|
@ -50,11 +47,11 @@ public final class Document implements java.io.Serializable {
|
|||
/** Sets a boost factor for hits on any field of this document. This value
|
||||
* will be multiplied into the score of all hits on this document.
|
||||
*
|
||||
* <p>Values are multiplied into the value of {@link Field#getBoost()} of
|
||||
* <p>Values are multiplied into the value of {@link Fieldable#getBoost()} of
|
||||
* each field in this document. Thus, this method in effect sets a default
|
||||
* boost for the fields of this document.
|
||||
*
|
||||
* @see Field#setBoost(float)
|
||||
* @see Fieldable#setBoost(float)
|
||||
*/
|
||||
public void setBoost(float boost) {
|
||||
this.boost = boost;
|
||||
|
@ -85,7 +82,7 @@ public final class Document implements java.io.Serializable {
|
|||
* a document has to be deleted from an index and a new changed version of that
|
||||
* document has to be added.</p>
|
||||
*/
|
||||
public final void add(Field field) {
|
||||
public final void add(Fieldable field) {
|
||||
fields.add(field);
|
||||
}
|
||||
|
||||
|
@ -102,7 +99,7 @@ public final class Document implements java.io.Serializable {
|
|||
public final void removeField(String name) {
|
||||
Iterator it = fields.iterator();
|
||||
while (it.hasNext()) {
|
||||
Field field = (Field)it.next();
|
||||
Fieldable field = (Fieldable)it.next();
|
||||
if (field.name().equals(name)) {
|
||||
it.remove();
|
||||
return;
|
||||
|
@ -122,7 +119,7 @@ public final class Document implements java.io.Serializable {
|
|||
public final void removeFields(String name) {
|
||||
Iterator it = fields.iterator();
|
||||
while (it.hasNext()) {
|
||||
Field field = (Field)it.next();
|
||||
Fieldable field = (Fieldable)it.next();
|
||||
if (field.name().equals(name)) {
|
||||
it.remove();
|
||||
}
|
||||
|
@ -133,9 +130,9 @@ public final class Document implements java.io.Serializable {
|
|||
* null. If multiple fields exists with this name, this method returns the
|
||||
* first value added.
|
||||
*/
|
||||
public final Field getField(String name) {
|
||||
public final Fieldable getField(String name) {
|
||||
for (int i = 0; i < fields.size(); i++) {
|
||||
Field field = (Field)fields.get(i);
|
||||
Fieldable field = (Fieldable)fields.get(i);
|
||||
if (field.name().equals(name))
|
||||
return field;
|
||||
}
|
||||
|
@ -149,7 +146,7 @@ public final class Document implements java.io.Serializable {
|
|||
*/
|
||||
public final String get(String name) {
|
||||
for (int i = 0; i < fields.size(); i++) {
|
||||
Field field = (Field)fields.get(i);
|
||||
Fieldable field = (Fieldable)fields.get(i);
|
||||
if (field.name().equals(name) && (!field.isBinary()))
|
||||
return field.stringValue();
|
||||
}
|
||||
|
@ -162,16 +159,16 @@ public final class Document implements java.io.Serializable {
|
|||
}
|
||||
|
||||
/**
|
||||
* Returns an array of {@link Field}s with the given name.
|
||||
* Returns an array of {@link Fieldable}s with the given name.
|
||||
* This method can return <code>null</code>.
|
||||
*
|
||||
* @param name the name of the field
|
||||
* @return a <code>Field[]</code> array
|
||||
* @return a <code>Fieldable[]</code> array
|
||||
*/
|
||||
public final Field[] getFields(String name) {
|
||||
public final Fieldable[] getFields(String name) {
|
||||
List result = new ArrayList();
|
||||
for (int i = 0; i < fields.size(); i++) {
|
||||
Field field = (Field)fields.get(i);
|
||||
Fieldable field = (Fieldable)fields.get(i);
|
||||
if (field.name().equals(name)) {
|
||||
result.add(field);
|
||||
}
|
||||
|
@ -180,7 +177,7 @@ public final class Document implements java.io.Serializable {
|
|||
if (result.size() == 0)
|
||||
return null;
|
||||
|
||||
return (Field[])result.toArray(new Field[result.size()]);
|
||||
return (Fieldable[])result.toArray(new Fieldable[result.size()]);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -193,7 +190,7 @@ public final class Document implements java.io.Serializable {
|
|||
public final String[] getValues(String name) {
|
||||
List result = new ArrayList();
|
||||
for (int i = 0; i < fields.size(); i++) {
|
||||
Field field = (Field)fields.get(i);
|
||||
Fieldable field = (Fieldable)fields.get(i);
|
||||
if (field.name().equals(name) && (!field.isBinary()))
|
||||
result.add(field.stringValue());
|
||||
}
|
||||
|
@ -215,7 +212,7 @@ public final class Document implements java.io.Serializable {
|
|||
public final byte[][] getBinaryValues(String name) {
|
||||
List result = new ArrayList();
|
||||
for (int i = 0; i < fields.size(); i++) {
|
||||
Field field = (Field)fields.get(i);
|
||||
Fieldable field = (Fieldable)fields.get(i);
|
||||
if (field.name().equals(name) && (field.isBinary()))
|
||||
result.add(field.binaryValue());
|
||||
}
|
||||
|
@ -237,7 +234,7 @@ public final class Document implements java.io.Serializable {
|
|||
*/
|
||||
public final byte[] getBinaryValue(String name) {
|
||||
for (int i=0; i < fields.size(); i++) {
|
||||
Field field = (Field)fields.get(i);
|
||||
Fieldable field = (Fieldable)fields.get(i);
|
||||
if (field.name().equals(name) && (field.isBinary()))
|
||||
return field.binaryValue();
|
||||
}
|
||||
|
@ -249,7 +246,7 @@ public final class Document implements java.io.Serializable {
|
|||
StringBuffer buffer = new StringBuffer();
|
||||
buffer.append("Document<");
|
||||
for (int i = 0; i < fields.size(); i++) {
|
||||
Field field = (Field)fields.get(i);
|
||||
Fieldable field = (Fieldable)fields.get(i);
|
||||
buffer.append(field.toString());
|
||||
if (i != fields.size()-1)
|
||||
buffer.append(" ");
|
||||
|
|
|
@ -16,9 +16,6 @@ package org.apache.lucene.document;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.search.Hits;
|
||||
import org.apache.lucene.search.Similarity;
|
||||
import org.apache.lucene.util.Parameter;
|
||||
|
||||
import java.io.Reader;
|
||||
|
@ -32,23 +29,7 @@ import java.io.Serializable;
|
|||
index, so that they may be returned with hits on the document.
|
||||
*/
|
||||
|
||||
public final class Field implements Serializable {
|
||||
private String name = "body";
|
||||
|
||||
// the one and only data object for all different kind of field values
|
||||
private Object fieldsData = null;
|
||||
|
||||
private boolean storeTermVector = false;
|
||||
private boolean storeOffsetWithTermVector = false;
|
||||
private boolean storePositionWithTermVector = false;
|
||||
private boolean omitNorms = false;
|
||||
private boolean isStored = false;
|
||||
private boolean isIndexed = true;
|
||||
private boolean isTokenized = true;
|
||||
private boolean isBinary = false;
|
||||
private boolean isCompressed = false;
|
||||
|
||||
private float boost = 1.0f;
|
||||
public final class Field extends AbstractField implements Fieldable, Serializable {
|
||||
|
||||
/** Specifies whether and how a field should be stored. */
|
||||
public static final class Store extends Parameter implements Serializable {
|
||||
|
@ -146,45 +127,7 @@ public final class Field implements Serializable {
|
|||
public static final TermVector WITH_POSITIONS_OFFSETS = new TermVector("WITH_POSITIONS_OFFSETS");
|
||||
}
|
||||
|
||||
/** Sets the boost factor hits on this field. This value will be
|
||||
* multiplied into the score of all hits on this this field of this
|
||||
* document.
|
||||
*
|
||||
* <p>The boost is multiplied by {@link Document#getBoost()} of the document
|
||||
* containing this field. If a document has multiple fields with the same
|
||||
* name, all such values are multiplied together. This product is then
|
||||
* multipled by the value {@link Similarity#lengthNorm(String,int)}, and
|
||||
* rounded by {@link Similarity#encodeNorm(float)} before it is stored in the
|
||||
* index. One should attempt to ensure that this product does not overflow
|
||||
* the range of that encoding.
|
||||
*
|
||||
* @see Document#setBoost(float)
|
||||
* @see Similarity#lengthNorm(String, int)
|
||||
* @see Similarity#encodeNorm(float)
|
||||
*/
|
||||
public void setBoost(float boost) {
|
||||
this.boost = boost;
|
||||
}
|
||||
|
||||
/** Returns the boost factor for hits for this field.
|
||||
*
|
||||
* <p>The default value is 1.0.
|
||||
*
|
||||
* <p>Note: this value is not stored directly with the document in the index.
|
||||
* Documents returned from {@link IndexReader#document(int)} and
|
||||
* {@link Hits#doc(int)} may thus not have the same value present as when
|
||||
* this field was indexed.
|
||||
*
|
||||
* @see #setBoost(float)
|
||||
*/
|
||||
public float getBoost() {
|
||||
return boost;
|
||||
}
|
||||
/** Returns the name of the field as an interned string.
|
||||
* For example "date", "title", "body", ...
|
||||
*/
|
||||
public String name() { return name; }
|
||||
|
||||
|
||||
/** The value of the field as a String, or null. If null, the Reader value
|
||||
* or binary value is used. Exactly one of stringValue(), readerValue(), and
|
||||
* binaryValue() must be set. */
|
||||
|
@ -365,146 +308,6 @@ public final class Field implements Serializable {
|
|||
|
||||
setStoreTermVector(TermVector.NO);
|
||||
}
|
||||
|
||||
private void setStoreTermVector(TermVector termVector) {
|
||||
if (termVector == TermVector.NO) {
|
||||
this.storeTermVector = false;
|
||||
this.storePositionWithTermVector = false;
|
||||
this.storeOffsetWithTermVector = false;
|
||||
}
|
||||
else if (termVector == TermVector.YES) {
|
||||
this.storeTermVector = true;
|
||||
this.storePositionWithTermVector = false;
|
||||
this.storeOffsetWithTermVector = false;
|
||||
}
|
||||
else if (termVector == TermVector.WITH_POSITIONS) {
|
||||
this.storeTermVector = true;
|
||||
this.storePositionWithTermVector = true;
|
||||
this.storeOffsetWithTermVector = false;
|
||||
}
|
||||
else if (termVector == TermVector.WITH_OFFSETS) {
|
||||
this.storeTermVector = true;
|
||||
this.storePositionWithTermVector = false;
|
||||
this.storeOffsetWithTermVector = true;
|
||||
}
|
||||
else if (termVector == TermVector.WITH_POSITIONS_OFFSETS) {
|
||||
this.storeTermVector = true;
|
||||
this.storePositionWithTermVector = true;
|
||||
this.storeOffsetWithTermVector = true;
|
||||
}
|
||||
else {
|
||||
throw new IllegalArgumentException("unknown termVector parameter " + termVector);
|
||||
}
|
||||
}
|
||||
|
||||
/** True iff the value of the field is to be stored in the index for return
|
||||
with search hits. It is an error for this to be true if a field is
|
||||
Reader-valued. */
|
||||
public final boolean isStored() { return isStored; }
|
||||
|
||||
/** True iff the value of the field is to be indexed, so that it may be
|
||||
searched on. */
|
||||
public final boolean isIndexed() { return isIndexed; }
|
||||
|
||||
/** True iff the value of the field should be tokenized as text prior to
|
||||
indexing. Un-tokenized fields are indexed as a single word and may not be
|
||||
Reader-valued. */
|
||||
public final boolean isTokenized() { return isTokenized; }
|
||||
|
||||
/** True if the value of the field is stored and compressed within the index */
|
||||
public final boolean isCompressed() { return isCompressed; }
|
||||
|
||||
/** True iff the term or terms used to index this field are stored as a term
|
||||
* vector, available from {@link IndexReader#getTermFreqVector(int,String)}.
|
||||
* These methods do not provide access to the original content of the field,
|
||||
* only to terms used to index it. If the original content must be
|
||||
* preserved, use the <code>stored</code> attribute instead.
|
||||
*
|
||||
* @see IndexReader#getTermFreqVector(int, String)
|
||||
*/
|
||||
public final boolean isTermVectorStored() { return storeTermVector; }
|
||||
|
||||
/**
|
||||
* True iff terms are stored as term vector together with their offsets
|
||||
* (start and end positon in source text).
|
||||
*/
|
||||
public boolean isStoreOffsetWithTermVector(){
|
||||
return storeOffsetWithTermVector;
|
||||
}
|
||||
|
||||
/**
|
||||
* True iff terms are stored as term vector together with their token positions.
|
||||
*/
|
||||
public boolean isStorePositionWithTermVector(){
|
||||
return storePositionWithTermVector;
|
||||
}
|
||||
|
||||
/** True iff the value of the filed is stored as binary */
|
||||
public final boolean isBinary() { return isBinary; }
|
||||
|
||||
/** True if norms are omitted for this indexed field */
|
||||
public boolean getOmitNorms() { return omitNorms; }
|
||||
|
||||
/** Expert:
|
||||
*
|
||||
* If set, omit normalization factors associated with this indexed field.
|
||||
* This effectively disables indexing boosts and length normalization for this field.
|
||||
*/
|
||||
public void setOmitNorms(boolean omitNorms) { this.omitNorms=omitNorms; }
|
||||
|
||||
/** Prints a Field for human consumption. */
|
||||
public final String toString() {
|
||||
StringBuffer result = new StringBuffer();
|
||||
if (isStored) {
|
||||
result.append("stored");
|
||||
if (isCompressed)
|
||||
result.append("/compressed");
|
||||
else
|
||||
result.append("/uncompressed");
|
||||
}
|
||||
if (isIndexed) {
|
||||
if (result.length() > 0)
|
||||
result.append(",");
|
||||
result.append("indexed");
|
||||
}
|
||||
if (isTokenized) {
|
||||
if (result.length() > 0)
|
||||
result.append(",");
|
||||
result.append("tokenized");
|
||||
}
|
||||
if (storeTermVector) {
|
||||
if (result.length() > 0)
|
||||
result.append(",");
|
||||
result.append("termVector");
|
||||
}
|
||||
if (storeOffsetWithTermVector) {
|
||||
if (result.length() > 0)
|
||||
result.append(",");
|
||||
result.append("termVectorOffsets");
|
||||
}
|
||||
if (storePositionWithTermVector) {
|
||||
if (result.length() > 0)
|
||||
result.append(",");
|
||||
result.append("termVectorPosition");
|
||||
}
|
||||
if (isBinary) {
|
||||
if (result.length() > 0)
|
||||
result.append(",");
|
||||
result.append("binary");
|
||||
}
|
||||
if (omitNorms) {
|
||||
result.append(",omitNorms");
|
||||
}
|
||||
result.append('<');
|
||||
result.append(name);
|
||||
result.append(':');
|
||||
|
||||
if (fieldsData != null) {
|
||||
result.append(fieldsData);
|
||||
}
|
||||
|
||||
result.append('>');
|
||||
return result.toString();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -0,0 +1,24 @@
|
|||
package org.apache.lucene.document;
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: Grant Ingersoll
|
||||
* Date: Apr 14, 2006
|
||||
* Time: 5:29:26 PM
|
||||
* $Id:$
|
||||
* Copyright 2005. Center For Natural Language Processing
|
||||
*/
|
||||
|
||||
/**
|
||||
* Similar to a {@link java.io.FileFilter}, the FieldSelector allows one to make decisions about
|
||||
* what Fields get loaded on a {@link Document} by {@link org.apache.lucene.index.IndexReader#document(int,org.apache.lucene.document.FieldSelector)}
|
||||
*
|
||||
**/
|
||||
public interface FieldSelector {
|
||||
|
||||
/**
|
||||
*
|
||||
* @param fieldName
|
||||
* @return true if the {@link Field} with <code>fieldName</code> should be loaded or not
|
||||
*/
|
||||
FieldSelectorResult accept(String fieldName);
|
||||
}
|
|
@ -0,0 +1,44 @@
|
|||
package org.apache.lucene.document;
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: Grant Ingersoll
|
||||
* Date: Apr 14, 2006
|
||||
* Time: 5:40:17 PM
|
||||
* $Id:$
|
||||
* Copyright 2005. Center For Natural Language Processing
|
||||
*/
|
||||
|
||||
/**
|
||||
* Provides information about what should be done with this Field
|
||||
*
|
||||
**/
|
||||
//Replace with an enumerated type in 1.5
|
||||
public final class FieldSelectorResult {
|
||||
|
||||
public static final FieldSelectorResult LOAD = new FieldSelectorResult(0);
|
||||
public static final FieldSelectorResult LAZY_LOAD = new FieldSelectorResult(1);
|
||||
public static final FieldSelectorResult NO_LOAD = new FieldSelectorResult(2);
|
||||
public static final FieldSelectorResult LOAD_AND_BREAK = new FieldSelectorResult(3);
|
||||
|
||||
private int id;
|
||||
|
||||
private FieldSelectorResult(int id)
|
||||
{
|
||||
this.id = id;
|
||||
}
|
||||
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) return true;
|
||||
if (o == null || getClass() != o.getClass()) return false;
|
||||
|
||||
final FieldSelectorResult that = (FieldSelectorResult) o;
|
||||
|
||||
if (id != that.id) return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
public int hashCode() {
|
||||
return id;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,137 @@
|
|||
package org.apache.lucene.document;
|
||||
|
||||
/**
|
||||
* Copyright 2004 The Apache Software Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.Reader;
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* Synonymous with {@link Field}.
|
||||
*
|
||||
**/
|
||||
public interface Fieldable extends Serializable {
|
||||
/** Sets the boost factor hits on this field. This value will be
|
||||
* multiplied into the score of all hits on this this field of this
|
||||
* document.
|
||||
*
|
||||
* <p>The boost is multiplied by {@link org.apache.lucene.document.Document#getBoost()} of the document
|
||||
* containing this field. If a document has multiple fields with the same
|
||||
* name, all such values are multiplied together. This product is then
|
||||
* multipled by the value {@link org.apache.lucene.search.Similarity#lengthNorm(String,int)}, and
|
||||
* rounded by {@link org.apache.lucene.search.Similarity#encodeNorm(float)} before it is stored in the
|
||||
* index. One should attempt to ensure that this product does not overflow
|
||||
* the range of that encoding.
|
||||
*
|
||||
* @see org.apache.lucene.document.Document#setBoost(float)
|
||||
* @see org.apache.lucene.search.Similarity#lengthNorm(String, int)
|
||||
* @see org.apache.lucene.search.Similarity#encodeNorm(float)
|
||||
*/
|
||||
void setBoost(float boost);
|
||||
|
||||
/** Returns the boost factor for hits for this field.
|
||||
*
|
||||
* <p>The default value is 1.0.
|
||||
*
|
||||
* <p>Note: this value is not stored directly with the document in the index.
|
||||
* Documents returned from {@link org.apache.lucene.index.IndexReader#document(int)} and
|
||||
* {@link org.apache.lucene.search.Hits#doc(int)} may thus not have the same value present as when
|
||||
* this field was indexed.
|
||||
*
|
||||
* @see #setBoost(float)
|
||||
*/
|
||||
float getBoost();
|
||||
|
||||
/** Returns the name of the field as an interned string.
|
||||
* For example "date", "title", "body", ...
|
||||
*/
|
||||
String name();
|
||||
|
||||
/** The value of the field as a String, or null. If null, the Reader value
|
||||
* or binary value is used. Exactly one of stringValue(), readerValue(), and
|
||||
* binaryValue() must be set. */
|
||||
String stringValue();
|
||||
|
||||
/** The value of the field as a Reader, or null. If null, the String value
|
||||
* or binary value is used. Exactly one of stringValue(), readerValue(),
|
||||
* and binaryValue() must be set. */
|
||||
Reader readerValue();
|
||||
|
||||
/** The value of the field in Binary, or null. If null, the Reader or
|
||||
* String value is used. Exactly one of stringValue(), readerValue() and
|
||||
* binaryValue() must be set. */
|
||||
byte[] binaryValue();
|
||||
|
||||
/** True iff the value of the field is to be stored in the index for return
|
||||
with search hits. It is an error for this to be true if a field is
|
||||
Reader-valued. */
|
||||
boolean isStored();
|
||||
|
||||
/** True iff the value of the field is to be indexed, so that it may be
|
||||
searched on. */
|
||||
boolean isIndexed();
|
||||
|
||||
/** True iff the value of the field should be tokenized as text prior to
|
||||
indexing. Un-tokenized fields are indexed as a single word and may not be
|
||||
Reader-valued. */
|
||||
boolean isTokenized();
|
||||
|
||||
/** True if the value of the field is stored and compressed within the index */
|
||||
boolean isCompressed();
|
||||
|
||||
/** True iff the term or terms used to index this field are stored as a term
|
||||
* vector, available from {@link org.apache.lucene.index.IndexReader#getTermFreqVector(int,String)}.
|
||||
* These methods do not provide access to the original content of the field,
|
||||
* only to terms used to index it. If the original content must be
|
||||
* preserved, use the <code>stored</code> attribute instead.
|
||||
*
|
||||
* @see org.apache.lucene.index.IndexReader#getTermFreqVector(int, String)
|
||||
*/
|
||||
boolean isTermVectorStored();
|
||||
|
||||
/**
|
||||
* True iff terms are stored as term vector together with their offsets
|
||||
* (start and end positon in source text).
|
||||
*/
|
||||
boolean isStoreOffsetWithTermVector();
|
||||
|
||||
/**
|
||||
* True iff terms are stored as term vector together with their token positions.
|
||||
*/
|
||||
boolean isStorePositionWithTermVector();
|
||||
|
||||
/** True iff the value of the filed is stored as binary */
|
||||
boolean isBinary();
|
||||
|
||||
/** True if norms are omitted for this indexed field */
|
||||
boolean getOmitNorms();
|
||||
|
||||
/** Expert:
|
||||
*
|
||||
* If set, omit normalization factors associated with this indexed field.
|
||||
* This effectively disables indexing boosts and length normalization for this field.
|
||||
*/
|
||||
void setOmitNorms(boolean omitNorms);
|
||||
|
||||
/**
|
||||
* Indicates whether a Field is Lazy or not. The semantics of Lazy loading are such that if a Field is lazily loaded, retrieving
|
||||
* it's values via {@link #stringValue()} or {@link #binaryValue()} is only valid as long as the {@link org.apache.lucene.index.IndexReader} that
|
||||
* retrieved the {@link Document} is still open.
|
||||
*
|
||||
* @return true if this field can be loaded lazily
|
||||
*/
|
||||
boolean isLazy();
|
||||
}
|
|
@ -0,0 +1,22 @@
|
|||
package org.apache.lucene.document;
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: Grant Ingersoll
|
||||
* Date: Apr 15, 2006
|
||||
* Time: 10:13:07 AM
|
||||
* $Id:$
|
||||
* Copyright 2005. Center For Natural Language Processing
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* Load the First field and break.
|
||||
* <p/>
|
||||
* See {@link FieldSelectorResult#LOAD_AND_BREAK}
|
||||
*/
|
||||
public class LoadFirstFieldSelector implements FieldSelector {
|
||||
|
||||
public FieldSelectorResult accept(String fieldName) {
|
||||
return FieldSelectorResult.LOAD_AND_BREAK;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,57 @@
|
|||
/*
|
||||
* MapFieldSelector.java
|
||||
*
|
||||
* Created on May 2, 2006, 6:49 PM
|
||||
*
|
||||
*/
|
||||
|
||||
package org.apache.lucene.document;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* A FieldSelector based on a Map of field names to FieldSelectorResults
|
||||
*
|
||||
* @author Chuck Williams
|
||||
*/
|
||||
public class MapFieldSelector implements FieldSelector {
|
||||
|
||||
Map fieldSelections;
|
||||
|
||||
/** Create a a MapFieldSelector
|
||||
* @param fieldSelections maps from field names to FieldSelectorResults
|
||||
*/
|
||||
public MapFieldSelector(Map fieldSelections) {
|
||||
this.fieldSelections = fieldSelections;
|
||||
}
|
||||
|
||||
/** Create a a MapFieldSelector
|
||||
* @param fields fields to LOAD. All other fields are NO_LOAD.
|
||||
*/
|
||||
public MapFieldSelector(List fields) {
|
||||
fieldSelections = new HashMap(fields.size()*5/3);
|
||||
for (int i=0; i<fields.size(); i++)
|
||||
fieldSelections.put(fields.get(i), FieldSelectorResult.LOAD);
|
||||
}
|
||||
|
||||
/** Create a a MapFieldSelector
|
||||
* @param fields fields to LOAD. All other fields are NO_LOAD.
|
||||
*/
|
||||
public MapFieldSelector(String[] fields) {
|
||||
fieldSelections = new HashMap(fields.length*5/3);
|
||||
for (int i=0; i<fields.length; i++)
|
||||
fieldSelections.put(fields[i], FieldSelectorResult.LOAD);
|
||||
}
|
||||
|
||||
/** Load field according to its associated value in fieldSelections
|
||||
* @param field a field name
|
||||
* @return the fieldSelections value that field maps to or NO_LOAD if none.
|
||||
*/
|
||||
public FieldSelectorResult accept(String field) {
|
||||
FieldSelectorResult selection = (FieldSelectorResult) fieldSelections.get(field);
|
||||
return selection!=null ? selection : FieldSelectorResult.NO_LOAD;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,53 @@
|
|||
package org.apache.lucene.document;
|
||||
|
||||
import java.util.Set;
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: Grant Ingersoll
|
||||
* Date: Apr 14, 2006
|
||||
* Time: 6:53:07 PM
|
||||
* $Id:$
|
||||
* Copyright 2005. Center For Natural Language Processing
|
||||
*/
|
||||
|
||||
/**
|
||||
* Declare what fields to load normally and what fields to load lazily
|
||||
*
|
||||
**/
|
||||
public class SetBasedFieldSelector implements FieldSelector {
|
||||
|
||||
private Set fieldsToLoad;
|
||||
private Set lazyFieldsToLoad;
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Pass in the Set of {@link Field} names to load and the Set of {@link Field} names to load lazily. If both are null, the
|
||||
* Document will not have any {@link Field} on it.
|
||||
* @param fieldsToLoad A Set of {@link String} field names to load. May be empty, but not null
|
||||
* @param lazyFieldsToLoad A Set of {@link String} field names to load lazily. May be empty, but not null
|
||||
*/
|
||||
public SetBasedFieldSelector(Set fieldsToLoad, Set lazyFieldsToLoad) {
|
||||
this.fieldsToLoad = fieldsToLoad;
|
||||
this.lazyFieldsToLoad = lazyFieldsToLoad;
|
||||
}
|
||||
|
||||
/**
|
||||
* Indicate whether to load the field with the given name or not. If the {@link Field#name()} is not in either of the
|
||||
* initializing Sets, then {@link org.apache.lucene.document.FieldSelectorResult#NO_LOAD} is returned. If a Field name
|
||||
* is in both <code>fieldsToLoad</code> and <code>lazyFieldsToLoad</code>, lazy has precedence.
|
||||
*
|
||||
* @param fieldName The {@link Field} name to check
|
||||
* @return The {@link FieldSelectorResult}
|
||||
*/
|
||||
public FieldSelectorResult accept(String fieldName) {
|
||||
FieldSelectorResult result = FieldSelectorResult.NO_LOAD;
|
||||
if (fieldsToLoad.contains(fieldName) == true){
|
||||
result = FieldSelectorResult.LOAD;
|
||||
}
|
||||
if (lazyFieldsToLoad.contains(fieldName) == true){
|
||||
result = FieldSelectorResult.LAZY_LOAD;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
|
@ -16,22 +16,22 @@ package org.apache.lucene.index;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Fieldable;
|
||||
import org.apache.lucene.search.Similarity;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.PrintStream;
|
||||
import java.io.Reader;
|
||||
import java.io.StringReader;
|
||||
import java.util.Hashtable;
|
||||
import java.util.Enumeration;
|
||||
import java.util.Arrays;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.search.Similarity;
|
||||
import java.util.Enumeration;
|
||||
import java.util.Hashtable;
|
||||
|
||||
final class DocumentWriter {
|
||||
private Analyzer analyzer;
|
||||
|
@ -129,7 +129,7 @@ final class DocumentWriter {
|
|||
throws IOException {
|
||||
Enumeration fields = doc.fields();
|
||||
while (fields.hasMoreElements()) {
|
||||
Field field = (Field) fields.nextElement();
|
||||
Fieldable field = (Fieldable) fields.nextElement();
|
||||
String fieldName = field.name();
|
||||
int fieldNumber = fieldInfos.fieldNumber(fieldName);
|
||||
|
||||
|
|
|
@ -16,18 +16,17 @@ package org.apache.lucene.index;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.*;
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
|
||||
import org.apache.lucene.document.Fieldable;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
|
||||
/** Access to the Field Info file that describes document fields and whether or
|
||||
* not they are indexed. Each segment has a separate Field Info file. Objects
|
||||
import java.io.IOException;
|
||||
import java.util.*;
|
||||
|
||||
/** Access to the Fieldable Info file that describes document fields and whether or
|
||||
* not they are indexed. Each segment has a separate Fieldable Info file. Objects
|
||||
* of this class are thread-safe for multiple readers, but only one thread can
|
||||
* be adding documents at a time, with no other reader or writer threads
|
||||
* accessing this object.
|
||||
|
@ -65,7 +64,7 @@ final class FieldInfos {
|
|||
public void add(Document doc) {
|
||||
Enumeration fields = doc.fields();
|
||||
while (fields.hasMoreElements()) {
|
||||
Field field = (Field) fields.nextElement();
|
||||
Fieldable field = (Fieldable) fields.nextElement();
|
||||
add(field.name(), field.isIndexed(), field.isTermVectorStored(), field.isStorePositionWithTermVector(),
|
||||
field.isStoreOffsetWithTermVector(), field.getOmitNorms());
|
||||
}
|
||||
|
@ -105,7 +104,7 @@ final class FieldInfos {
|
|||
/**
|
||||
* Calls 5 parameter add with false for all TermVector parameters.
|
||||
*
|
||||
* @param name The name of the Field
|
||||
* @param name The name of the Fieldable
|
||||
* @param isIndexed true if the field is indexed
|
||||
* @see #add(String, boolean, boolean, boolean, boolean)
|
||||
*/
|
||||
|
|
|
@ -0,0 +1,70 @@
|
|||
package org.apache.lucene.index;
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: Grant Ingersoll
|
||||
* Date: Jan 12, 2006
|
||||
* Time: 9:37:43 AM
|
||||
* $Id:$
|
||||
* Copyright 2005. Center For Natural Language Processing
|
||||
*/
|
||||
|
||||
/**
|
||||
*
|
||||
*
|
||||
**/
|
||||
public class FieldReaderException extends RuntimeException{
|
||||
/**
|
||||
* Constructs a new runtime exception with <code>null</code> as its
|
||||
* detail message. The cause is not initialized, and may subsequently be
|
||||
* initialized by a call to {@link #initCause}.
|
||||
*/
|
||||
public FieldReaderException() {
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a new runtime exception with the specified cause and a
|
||||
* detail message of <tt>(cause==null ? null : cause.toString())</tt>
|
||||
* (which typically contains the class and detail message of
|
||||
* <tt>cause</tt>). This constructor is useful for runtime exceptions
|
||||
* that are little more than wrappers for other throwables.
|
||||
*
|
||||
* @param cause the cause (which is saved for later retrieval by the
|
||||
* {@link #getCause()} method). (A <tt>null</tt> value is
|
||||
* permitted, and indicates that the cause is nonexistent or
|
||||
* unknown.)
|
||||
* @since 1.4
|
||||
*/
|
||||
public FieldReaderException(Throwable cause) {
|
||||
super(cause);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a new runtime exception with the specified detail message.
|
||||
* The cause is not initialized, and may subsequently be initialized by a
|
||||
* call to {@link #initCause}.
|
||||
*
|
||||
* @param message the detail message. The detail message is saved for
|
||||
* later retrieval by the {@link #getMessage()} method.
|
||||
*/
|
||||
public FieldReaderException(String message) {
|
||||
super(message);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a new runtime exception with the specified detail message and
|
||||
* cause. <p>Note that the detail message associated with
|
||||
* <code>cause</code> is <i>not</i> automatically incorporated in
|
||||
* this runtime exception's detail message.
|
||||
*
|
||||
* @param message the detail message (which is saved for later retrieval
|
||||
* by the {@link #getMessage()} method).
|
||||
* @param cause the cause (which is saved for later retrieval by the
|
||||
* {@link #getCause()} method). (A <tt>null</tt> value is
|
||||
* permitted, and indicates that the cause is nonexistent or
|
||||
* unknown.)
|
||||
* @since 1.4
|
||||
*/
|
||||
public FieldReaderException(String message, Throwable cause) {
|
||||
super(message, cause);
|
||||
}
|
||||
}
|
|
@ -16,19 +16,19 @@ package org.apache.lucene.index;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.zip.DataFormatException;
|
||||
import java.util.zip.Inflater;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.*;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.util.zip.DataFormatException;
|
||||
import java.util.zip.Inflater;
|
||||
|
||||
/**
|
||||
* Class responsible for access to stored document fields.
|
||||
*
|
||||
* <p/>
|
||||
* It uses <segment>.fdt and <segment>.fdx; files.
|
||||
*
|
||||
* @version $Id$
|
||||
|
@ -39,25 +39,37 @@ final class FieldsReader {
|
|||
private IndexInput indexStream;
|
||||
private int size;
|
||||
|
||||
private static ThreadLocal fieldsStreamTL = new ThreadLocal();
|
||||
|
||||
FieldsReader(Directory d, String segment, FieldInfos fn) throws IOException {
|
||||
fieldInfos = fn;
|
||||
|
||||
fieldsStream = d.openInput(segment + ".fdt");
|
||||
indexStream = d.openInput(segment + ".fdx");
|
||||
|
||||
size = (int)(indexStream.length() / 8);
|
||||
size = (int) (indexStream.length() / 8);
|
||||
}
|
||||
|
||||
/**
|
||||
* Cloeses the underlying {@link org.apache.lucene.store.IndexInput} streams, including any ones associated with a
|
||||
* lazy implementation of a Field. This means that the Fields values will not be accessible.
|
||||
*
|
||||
* @throws IOException
|
||||
*/
|
||||
final void close() throws IOException {
|
||||
fieldsStream.close();
|
||||
indexStream.close();
|
||||
IndexInput localFieldsStream = (IndexInput) fieldsStreamTL.get();
|
||||
if (localFieldsStream != null) {
|
||||
localFieldsStream.close();
|
||||
fieldsStreamTL.set(null);
|
||||
}
|
||||
}
|
||||
|
||||
final int size() {
|
||||
return size;
|
||||
}
|
||||
|
||||
final Document doc(int n) throws IOException {
|
||||
final Document doc(int n, FieldSelector fieldSelector) throws IOException {
|
||||
indexStream.seek(n * 8L);
|
||||
long position = indexStream.readLong();
|
||||
fieldsStream.seek(position);
|
||||
|
@ -67,89 +79,277 @@ final class FieldsReader {
|
|||
for (int i = 0; i < numFields; i++) {
|
||||
int fieldNumber = fieldsStream.readVInt();
|
||||
FieldInfo fi = fieldInfos.fieldInfo(fieldNumber);
|
||||
|
||||
byte bits = fieldsStream.readByte();
|
||||
FieldSelectorResult acceptField = fieldSelector == null ? FieldSelectorResult.LOAD : fieldSelector.accept(fi.name);
|
||||
boolean lazy = acceptField.equals(FieldSelectorResult.LAZY_LOAD) == true;
|
||||
|
||||
byte bits = fieldsStream.readByte();
|
||||
boolean compressed = (bits & FieldsWriter.FIELD_IS_COMPRESSED) != 0;
|
||||
boolean tokenize = (bits & FieldsWriter.FIELD_IS_TOKENIZED) != 0;
|
||||
|
||||
if ((bits & FieldsWriter.FIELD_IS_BINARY) != 0) {
|
||||
final byte[] b = new byte[fieldsStream.readVInt()];
|
||||
fieldsStream.readBytes(b, 0, b.length);
|
||||
if (compressed)
|
||||
doc.add(new Field(fi.name, uncompress(b), Field.Store.COMPRESS));
|
||||
else
|
||||
doc.add(new Field(fi.name, b, Field.Store.YES));
|
||||
boolean binary = (bits & FieldsWriter.FIELD_IS_BINARY) != 0;
|
||||
if (acceptField.equals(FieldSelectorResult.LOAD) == true) {
|
||||
addField(doc, fi, binary, compressed, tokenize);
|
||||
}
|
||||
else if (acceptField.equals(FieldSelectorResult.LOAD_AND_BREAK) == true){
|
||||
addField(doc, fi, binary, compressed, tokenize);
|
||||
break;//Get out of this loop
|
||||
}
|
||||
else if (lazy == true){
|
||||
addFieldLazy(doc, fi, binary, compressed, tokenize);
|
||||
}
|
||||
else {
|
||||
Field.Index index;
|
||||
Field.Store store = Field.Store.YES;
|
||||
|
||||
if (fi.isIndexed && tokenize)
|
||||
index = Field.Index.TOKENIZED;
|
||||
else if (fi.isIndexed && !tokenize)
|
||||
index = Field.Index.UN_TOKENIZED;
|
||||
else
|
||||
index = Field.Index.NO;
|
||||
|
||||
Field.TermVector termVector = null;
|
||||
if (fi.storeTermVector) {
|
||||
if (fi.storeOffsetWithTermVector) {
|
||||
if (fi.storePositionWithTermVector) {
|
||||
termVector = Field.TermVector.WITH_POSITIONS_OFFSETS;
|
||||
}
|
||||
else {
|
||||
termVector = Field.TermVector.WITH_OFFSETS;
|
||||
}
|
||||
}
|
||||
else if (fi.storePositionWithTermVector) {
|
||||
termVector = Field.TermVector.WITH_POSITIONS;
|
||||
}
|
||||
else {
|
||||
termVector = Field.TermVector.YES;
|
||||
}
|
||||
}
|
||||
else {
|
||||
termVector = Field.TermVector.NO;
|
||||
}
|
||||
|
||||
if (compressed) {
|
||||
store = Field.Store.COMPRESS;
|
||||
final byte[] b = new byte[fieldsStream.readVInt()];
|
||||
fieldsStream.readBytes(b, 0, b.length);
|
||||
Field f = new Field(fi.name, // field name
|
||||
new String(uncompress(b), "UTF-8"), // uncompress the value and add as string
|
||||
store,
|
||||
index,
|
||||
termVector);
|
||||
f.setOmitNorms(fi.omitNorms);
|
||||
doc.add(f);
|
||||
}
|
||||
else {
|
||||
Field f = new Field(fi.name, // name
|
||||
fieldsStream.readString(), // read value
|
||||
store,
|
||||
index,
|
||||
termVector);
|
||||
f.setOmitNorms(fi.omitNorms);
|
||||
doc.add(f);
|
||||
}
|
||||
skipField(binary, compressed);
|
||||
}
|
||||
}
|
||||
|
||||
return doc;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Skip the field. We still have to read some of the information about the field, but can skip past the actual content.
|
||||
* This will have the most payoff on large fields.
|
||||
*/
|
||||
private void skipField(boolean binary, boolean compressed) throws IOException {
|
||||
|
||||
int toRead = fieldsStream.readVInt();
|
||||
|
||||
if (binary || compressed) {
|
||||
long pointer = fieldsStream.getFilePointer();
|
||||
fieldsStream.seek(pointer + toRead);
|
||||
} else {
|
||||
//We need to skip chars. This will slow us down, but still better
|
||||
fieldsStream.skipChars(toRead);
|
||||
}
|
||||
}
|
||||
|
||||
private void addFieldLazy(Document doc, FieldInfo fi, boolean binary, boolean compressed, boolean tokenize) throws IOException {
|
||||
if (binary == true) {
|
||||
int toRead = fieldsStream.readVInt();
|
||||
long pointer = fieldsStream.getFilePointer();
|
||||
if (compressed) {
|
||||
//was: doc.add(new Fieldable(fi.name, uncompress(b), Fieldable.Store.COMPRESS));
|
||||
doc.add(new LazyField(fi.name, Field.Store.COMPRESS, toRead, pointer));
|
||||
} else {
|
||||
//was: doc.add(new Fieldable(fi.name, b, Fieldable.Store.YES));
|
||||
doc.add(new LazyField(fi.name, Field.Store.YES, toRead, pointer));
|
||||
}
|
||||
//Need to move the pointer ahead by toRead positions
|
||||
fieldsStream.seek(pointer + toRead);
|
||||
} else {
|
||||
Field.Store store = Field.Store.YES;
|
||||
Field.Index index = getIndexType(fi, tokenize);
|
||||
Field.TermVector termVector = getTermVectorType(fi);
|
||||
|
||||
Fieldable f;
|
||||
if (compressed) {
|
||||
store = Field.Store.COMPRESS;
|
||||
int toRead = fieldsStream.readVInt();
|
||||
long pointer = fieldsStream.getFilePointer();
|
||||
f = new LazyField(fi.name, store, toRead, pointer);
|
||||
//skip over the part that we aren't loading
|
||||
fieldsStream.seek(pointer + toRead);
|
||||
f.setOmitNorms(fi.omitNorms);
|
||||
} else {
|
||||
int length = fieldsStream.readVInt();
|
||||
long pointer = fieldsStream.getFilePointer();
|
||||
//Skip ahead of where we are by the length of what is stored
|
||||
fieldsStream.skipChars(length);
|
||||
f = new LazyField(fi.name, store, index, termVector, length, pointer);
|
||||
f.setOmitNorms(fi.omitNorms);
|
||||
}
|
||||
doc.add(f);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private void addField(Document doc, FieldInfo fi, boolean binary, boolean compressed, boolean tokenize) throws IOException {
|
||||
|
||||
//we have a binary stored field, and it may be compressed
|
||||
if (binary) {
|
||||
int toRead = fieldsStream.readVInt();
|
||||
final byte[] b = new byte[toRead];
|
||||
fieldsStream.readBytes(b, 0, b.length);
|
||||
if (compressed)
|
||||
doc.add(new Field(fi.name, uncompress(b), Field.Store.COMPRESS));
|
||||
else
|
||||
doc.add(new Field(fi.name, b, Field.Store.YES));
|
||||
|
||||
} else {
|
||||
Field.Store store = Field.Store.YES;
|
||||
Field.Index index = getIndexType(fi, tokenize);
|
||||
Field.TermVector termVector = getTermVectorType(fi);
|
||||
|
||||
Fieldable f;
|
||||
if (compressed) {
|
||||
store = Field.Store.COMPRESS;
|
||||
int toRead = fieldsStream.readVInt();
|
||||
|
||||
final byte[] b = new byte[toRead];
|
||||
fieldsStream.readBytes(b, 0, b.length);
|
||||
f = new Field(fi.name, // field name
|
||||
new String(uncompress(b), "UTF-8"), // uncompress the value and add as string
|
||||
store,
|
||||
index,
|
||||
termVector);
|
||||
f.setOmitNorms(fi.omitNorms);
|
||||
} else {
|
||||
f = new Field(fi.name, // name
|
||||
fieldsStream.readString(), // read value
|
||||
store,
|
||||
index,
|
||||
termVector);
|
||||
f.setOmitNorms(fi.omitNorms);
|
||||
}
|
||||
doc.add(f);
|
||||
}
|
||||
}
|
||||
|
||||
private Field.TermVector getTermVectorType(FieldInfo fi) {
|
||||
Field.TermVector termVector = null;
|
||||
if (fi.storeTermVector) {
|
||||
if (fi.storeOffsetWithTermVector) {
|
||||
if (fi.storePositionWithTermVector) {
|
||||
termVector = Field.TermVector.WITH_POSITIONS_OFFSETS;
|
||||
} else {
|
||||
termVector = Field.TermVector.WITH_OFFSETS;
|
||||
}
|
||||
} else if (fi.storePositionWithTermVector) {
|
||||
termVector = Field.TermVector.WITH_POSITIONS;
|
||||
} else {
|
||||
termVector = Field.TermVector.YES;
|
||||
}
|
||||
} else {
|
||||
termVector = Field.TermVector.NO;
|
||||
}
|
||||
return termVector;
|
||||
}
|
||||
|
||||
private Field.Index getIndexType(FieldInfo fi, boolean tokenize) {
|
||||
Field.Index index;
|
||||
if (fi.isIndexed && tokenize)
|
||||
index = Field.Index.TOKENIZED;
|
||||
else if (fi.isIndexed && !tokenize)
|
||||
index = Field.Index.UN_TOKENIZED;
|
||||
else
|
||||
index = Field.Index.NO;
|
||||
return index;
|
||||
}
|
||||
|
||||
/**
|
||||
* A Lazy implementation of Fieldable that differs loading of fields until asked for, instead of when the Document is
|
||||
* loaded.
|
||||
*/
|
||||
private class LazyField extends AbstractField implements Fieldable {
|
||||
private int toRead;
|
||||
private long pointer;
|
||||
//internal buffer
|
||||
private char[] chars;
|
||||
|
||||
|
||||
public LazyField(String name, Field.Store store, int toRead, long pointer) {
|
||||
super(name, store, Field.Index.NO, Field.TermVector.NO);
|
||||
this.toRead = toRead;
|
||||
this.pointer = pointer;
|
||||
lazy = true;
|
||||
}
|
||||
|
||||
public LazyField(String name, Field.Store store, Field.Index index, Field.TermVector termVector, int toRead, long pointer) {
|
||||
super(name, store, index, termVector);
|
||||
this.toRead = toRead;
|
||||
this.pointer = pointer;
|
||||
lazy = true;
|
||||
}
|
||||
|
||||
/**
|
||||
* The value of the field in Binary, or null. If null, the Reader or
|
||||
* String value is used. Exactly one of stringValue(), readerValue() and
|
||||
* binaryValue() must be set.
|
||||
*/
|
||||
public byte[] binaryValue() {
|
||||
if (fieldsData == null) {
|
||||
final byte[] b = new byte[toRead];
|
||||
IndexInput localFieldsStream = (IndexInput) fieldsStreamTL.get();
|
||||
if (localFieldsStream == null) {
|
||||
localFieldsStream = (IndexInput) fieldsStream.clone();
|
||||
fieldsStreamTL.set(localFieldsStream);
|
||||
}
|
||||
//Throw this IO Exception since IndexREader.document does so anyway, so probably not that big of a change for people
|
||||
//since they are already handling this exception when getting the document
|
||||
try {
|
||||
localFieldsStream.seek(pointer);
|
||||
localFieldsStream.readBytes(b, 0, b.length);
|
||||
if (isCompressed == true) {
|
||||
fieldsData = uncompress(b);
|
||||
} else {
|
||||
fieldsData = b;
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new FieldReaderException(e);
|
||||
}
|
||||
}
|
||||
return fieldsData instanceof byte[] ? (byte[]) fieldsData : null;
|
||||
}
|
||||
|
||||
/**
|
||||
* The value of the field as a Reader, or null. If null, the String value
|
||||
* or binary value is used. Exactly one of stringValue(), readerValue(),
|
||||
* and binaryValue() must be set.
|
||||
*/
|
||||
public Reader readerValue() {
|
||||
return fieldsData instanceof Reader ? (Reader) fieldsData : null;
|
||||
}
|
||||
|
||||
/**
|
||||
* The value of the field as a String, or null. If null, the Reader value
|
||||
* or binary value is used. Exactly one of stringValue(), readerValue(), and
|
||||
* binaryValue() must be set.
|
||||
*/
|
||||
public String stringValue() {
|
||||
if (fieldsData == null) {
|
||||
IndexInput localFieldsStream = (IndexInput) fieldsStreamTL.get();
|
||||
if (localFieldsStream == null) {
|
||||
localFieldsStream = (IndexInput) fieldsStream.clone();
|
||||
fieldsStreamTL.set(localFieldsStream);
|
||||
}
|
||||
try {
|
||||
localFieldsStream.seek(pointer);
|
||||
//read in chars b/c we already know the length we need to read
|
||||
if (chars == null || toRead > chars.length)
|
||||
chars = new char[toRead];
|
||||
localFieldsStream.readChars(chars, 0, toRead);
|
||||
fieldsData = new String(chars, 0, toRead);//fieldsStream.readString();
|
||||
} catch (IOException e) {
|
||||
throw new FieldReaderException(e);
|
||||
}
|
||||
}
|
||||
return fieldsData instanceof String ? (String) fieldsData : null;
|
||||
}
|
||||
|
||||
public long getPointer() {
|
||||
return pointer;
|
||||
}
|
||||
|
||||
public void setPointer(long pointer) {
|
||||
this.pointer = pointer;
|
||||
}
|
||||
|
||||
public int getToRead() {
|
||||
return toRead;
|
||||
}
|
||||
|
||||
public void setToRead(int toRead) {
|
||||
this.toRead = toRead;
|
||||
}
|
||||
}
|
||||
|
||||
private final byte[] uncompress(final byte[] input)
|
||||
throws IOException
|
||||
{
|
||||
|
||||
throws IOException {
|
||||
|
||||
Inflater decompressor = new Inflater();
|
||||
decompressor.setInput(input);
|
||||
|
||||
|
||||
// Create an expandable byte array to hold the decompressed data
|
||||
ByteArrayOutputStream bos = new ByteArrayOutputStream(input.length);
|
||||
|
||||
|
||||
// Decompress the data
|
||||
byte[] buf = new byte[1024];
|
||||
while (!decompressor.finished()) {
|
||||
|
|
|
@ -17,6 +17,8 @@ package org.apache.lucene.index;
|
|||
*/
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.FieldSelector;
|
||||
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
|
@ -100,7 +102,7 @@ public class FilterIndexReader extends IndexReader {
|
|||
public int numDocs() { return in.numDocs(); }
|
||||
public int maxDoc() { return in.maxDoc(); }
|
||||
|
||||
public Document document(int n) throws IOException { return in.document(n); }
|
||||
public Document document(int n, FieldSelector fieldSelector) throws IOException { return in.document(n, fieldSelector); }
|
||||
|
||||
public boolean isDeleted(int n) { return in.isDeleted(n); }
|
||||
public boolean hasDeletions() { return in.hasDeletions(); }
|
||||
|
@ -133,7 +135,7 @@ public class FilterIndexReader extends IndexReader {
|
|||
protected void doCommit() throws IOException { in.commit(); }
|
||||
protected void doClose() throws IOException { in.close(); }
|
||||
|
||||
|
||||
|
||||
public Collection getFieldNames(IndexReader.FieldOption fieldNames) {
|
||||
return in.getFieldNames(fieldNames);
|
||||
}
|
||||
|
|
|
@ -273,7 +273,7 @@ public class IndexModifier {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Returns the number of documents currently in this index.
|
||||
* @see IndexWriter#docCount()
|
||||
|
@ -407,7 +407,7 @@ public class IndexModifier {
|
|||
* the number of files open in a FSDirectory.
|
||||
*
|
||||
* <p>The default value is 10.
|
||||
*
|
||||
*
|
||||
* @see IndexWriter#setMaxBufferedDocs(int)
|
||||
* @throws IllegalStateException if the index is closed
|
||||
* @throws IllegalArgumentException if maxBufferedDocs is smaller than 2
|
||||
|
@ -500,8 +500,8 @@ public class IndexModifier {
|
|||
// create an index in /tmp/index, overwriting an existing one:
|
||||
IndexModifier indexModifier = new IndexModifier("/tmp/index", analyzer, true);
|
||||
Document doc = new Document();
|
||||
doc.add(new Field("id", "1", Field.Store.YES, Field.Index.UN_TOKENIZED));
|
||||
doc.add(new Field("body", "a simple test", Field.Store.YES, Field.Index.TOKENIZED));
|
||||
doc.add(new Fieldable("id", "1", Fieldable.Store.YES, Fieldable.Index.UN_TOKENIZED));
|
||||
doc.add(new Fieldable("body", "a simple test", Fieldable.Store.YES, Fieldable.Index.TOKENIZED));
|
||||
indexModifier.addDocument(doc);
|
||||
int deleted = indexModifier.delete(new Term("id", "1"));
|
||||
System.out.println("Deleted " + deleted + " document");
|
||||
|
|
|
@ -17,7 +17,7 @@ package org.apache.lucene.index;
|
|||
*/
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldSelector;
|
||||
import org.apache.lucene.search.Similarity;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.FSDirectory;
|
||||
|
@ -42,7 +42,7 @@ import java.util.Collection;
|
|||
document in the index. These document numbers are ephemeral--they may change
|
||||
as documents are added to and deleted from an index. Clients should thus not
|
||||
rely on a given document having the same number between sessions.
|
||||
|
||||
|
||||
<p> An IndexReader can be opened on a directory for which an IndexWriter is
|
||||
opened already, but it cannot be used to delete documents from the index then.
|
||||
|
||||
|
@ -50,13 +50,13 @@ import java.util.Collection;
|
|||
@version $Id$
|
||||
*/
|
||||
public abstract class IndexReader {
|
||||
|
||||
|
||||
public static final class FieldOption {
|
||||
private String option;
|
||||
private FieldOption() { }
|
||||
private FieldOption(String option) {
|
||||
this.option = option;
|
||||
}
|
||||
}
|
||||
public String toString() {
|
||||
return this.option;
|
||||
}
|
||||
|
@ -79,7 +79,7 @@ public abstract class IndexReader {
|
|||
// all fields where termvectors with offset and position values set
|
||||
public static final FieldOption TERMVECTOR_WITH_POSITION_OFFSET = new FieldOption ("TERMVECTOR_WITH_POSITION_OFFSET");
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Constructor used if IndexReader is not owner of its directory.
|
||||
* This is used for IndexReaders that are used within other IndexReaders that take care or locking directories.
|
||||
|
@ -89,7 +89,7 @@ public abstract class IndexReader {
|
|||
protected IndexReader(Directory directory) {
|
||||
this.directory = directory;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Constructor used if IndexReader is owner of its directory.
|
||||
* If IndexReader is owner of its directory, it locks its directory in case of write operations.
|
||||
|
@ -117,7 +117,7 @@ public abstract class IndexReader {
|
|||
private Lock writeLock;
|
||||
private boolean stale;
|
||||
private boolean hasChanges;
|
||||
|
||||
|
||||
|
||||
/** Returns an IndexReader reading the index in an FSDirectory in the named
|
||||
path. */
|
||||
|
@ -130,7 +130,7 @@ public abstract class IndexReader {
|
|||
public static IndexReader open(File path) throws IOException {
|
||||
return open(FSDirectory.getDirectory(path, false), true);
|
||||
}
|
||||
|
||||
|
||||
/** Returns an IndexReader reading the index in the given Directory. */
|
||||
public static IndexReader open(final Directory directory) throws IOException {
|
||||
return open(directory, false);
|
||||
|
@ -151,7 +151,7 @@ public abstract class IndexReader {
|
|||
for (int i = 0; i < infos.size(); i++)
|
||||
readers[i] = SegmentReader.get(infos.info(i));
|
||||
return new MultiReader(directory, infos, closeDirectory, readers);
|
||||
|
||||
|
||||
}
|
||||
}.run();
|
||||
}
|
||||
|
@ -160,7 +160,7 @@ public abstract class IndexReader {
|
|||
/** Returns the directory this index resides in. */
|
||||
public Directory directory() { return directory; }
|
||||
|
||||
/**
|
||||
/**
|
||||
* Returns the time the index in the named directory was last modified.
|
||||
* Do not use this to check whether the reader is still up-to-date, use
|
||||
* {@link #isCurrent()} instead.
|
||||
|
@ -169,7 +169,7 @@ public abstract class IndexReader {
|
|||
return lastModified(new File(directory));
|
||||
}
|
||||
|
||||
/**
|
||||
/**
|
||||
* Returns the time the index in the named directory was last modified.
|
||||
* Do not use this to check whether the reader is still up-to-date, use
|
||||
* {@link #isCurrent()} instead.
|
||||
|
@ -178,7 +178,7 @@ public abstract class IndexReader {
|
|||
return FSDirectory.fileModified(directory, IndexFileNames.SEGMENTS);
|
||||
}
|
||||
|
||||
/**
|
||||
/**
|
||||
* Returns the time the index in the named directory was last modified.
|
||||
* Do not use this to check whether the reader is still up-to-date, use
|
||||
* {@link #isCurrent()} instead.
|
||||
|
@ -228,12 +228,12 @@ public abstract class IndexReader {
|
|||
public static long getCurrentVersion(Directory directory) throws IOException {
|
||||
synchronized (directory) { // in- & inter-process sync
|
||||
Lock commitLock=directory.makeLock(IndexWriter.COMMIT_LOCK_NAME);
|
||||
|
||||
|
||||
boolean locked=false;
|
||||
|
||||
|
||||
try {
|
||||
locked=commitLock.obtain(IndexWriter.COMMIT_LOCK_TIMEOUT);
|
||||
|
||||
|
||||
return SegmentInfos.readCurrentVersion(directory);
|
||||
} finally {
|
||||
if (locked) {
|
||||
|
@ -242,7 +242,7 @@ public abstract class IndexReader {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Version number when this IndexReader was opened.
|
||||
*/
|
||||
|
@ -260,12 +260,12 @@ public abstract class IndexReader {
|
|||
public boolean isCurrent() throws IOException {
|
||||
synchronized (directory) { // in- & inter-process sync
|
||||
Lock commitLock=directory.makeLock(IndexWriter.COMMIT_LOCK_NAME);
|
||||
|
||||
|
||||
boolean locked=false;
|
||||
|
||||
|
||||
try {
|
||||
locked=commitLock.obtain(IndexWriter.COMMIT_LOCK_TIMEOUT);
|
||||
|
||||
|
||||
return SegmentInfos.readCurrentVersion(directory) == segmentInfos.getVersion();
|
||||
} finally {
|
||||
if (locked) {
|
||||
|
@ -292,7 +292,7 @@ public abstract class IndexReader {
|
|||
abstract public TermFreqVector[] getTermFreqVectors(int docNumber)
|
||||
throws IOException;
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Return a term frequency vector for the specified document and field. The
|
||||
* returned vector contains terms and frequencies for the terms in
|
||||
|
@ -309,7 +309,7 @@ public abstract class IndexReader {
|
|||
*/
|
||||
abstract public TermFreqVector getTermFreqVector(int docNumber, String field)
|
||||
throws IOException;
|
||||
|
||||
|
||||
/**
|
||||
* Returns <code>true</code> if an index exists at the specified directory.
|
||||
* If the directory does not exist or if there is no index in it.
|
||||
|
@ -353,14 +353,40 @@ public abstract class IndexReader {
|
|||
|
||||
/** Returns the stored fields of the <code>n</code><sup>th</sup>
|
||||
<code>Document</code> in this index. */
|
||||
public abstract Document document(int n) throws IOException;
|
||||
public Document document(int n) throws IOException{
|
||||
return document(n, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the {@link org.apache.lucene.document.Document} at the <code>n</code><sup>th</sup> position. The {@link org.apache.lucene.document.FieldSelector}
|
||||
* may be used to determine what {@link org.apache.lucene.document.Field}s to load and how they should be loaded.
|
||||
*
|
||||
* <b>NOTE:</b> If this Reader (more specifically, the underlying {@link FieldsReader} is closed before the lazy {@link org.apache.lucene.document.Field} is
|
||||
* loaded an exception may be thrown. If you want the value of a lazy {@link org.apache.lucene.document.Field} to be available after closing you must
|
||||
* explicitly load it or fetch the Document again with a new loader.
|
||||
*
|
||||
*
|
||||
* @param n Get the document at the <code>n</code><sup>th</sup> position
|
||||
* @param fieldSelector The {@link org.apache.lucene.document.FieldSelector} to use to determine what Fields should be loaded on the Document. May be null, in which case all Fields will be loaded.
|
||||
* @return The stored fields of the {@link org.apache.lucene.document.Document} at the nth position
|
||||
* @throws IOException If there is a problem reading this document
|
||||
*
|
||||
* @see org.apache.lucene.document.Fieldable
|
||||
* @see org.apache.lucene.document.FieldSelector
|
||||
* @see org.apache.lucene.document.SetBasedFieldSelector
|
||||
* @see org.apache.lucene.document.LoadFirstFieldSelector
|
||||
*/
|
||||
//When we convert to JDK 1.5 make this Set<String>
|
||||
public abstract Document document(int n, FieldSelector fieldSelector) throws IOException;
|
||||
|
||||
|
||||
|
||||
/** Returns true if document <i>n</i> has been deleted */
|
||||
public abstract boolean isDeleted(int n);
|
||||
|
||||
/** Returns true if any documents have been deleted */
|
||||
public abstract boolean hasDeletions();
|
||||
|
||||
|
||||
/** Returns true if there are norms stored for this field. */
|
||||
public boolean hasNorms(String field) throws IOException {
|
||||
// backward compatible implementation.
|
||||
|
@ -371,21 +397,21 @@ public abstract class IndexReader {
|
|||
/** Returns the byte-encoded normalization factor for the named field of
|
||||
* every document. This is used by the search code to score documents.
|
||||
*
|
||||
* @see Field#setBoost(float)
|
||||
* @see org.apache.lucene.document.Field#setBoost(float)
|
||||
*/
|
||||
public abstract byte[] norms(String field) throws IOException;
|
||||
|
||||
/** Reads the byte-encoded normalization factor for the named field of every
|
||||
* document. This is used by the search code to score documents.
|
||||
*
|
||||
* @see Field#setBoost(float)
|
||||
* @see org.apache.lucene.document.Field#setBoost(float)
|
||||
*/
|
||||
public abstract void norms(String field, byte[] bytes, int offset)
|
||||
throws IOException;
|
||||
|
||||
/** Expert: Resets the normalization factor for the named field of the named
|
||||
* document. The norm represents the product of the field's {@link
|
||||
* Field#setBoost(float) boost} and its {@link Similarity#lengthNorm(String,
|
||||
* Fieldable#setBoost(float) boost} and its {@link Similarity#lengthNorm(String,
|
||||
* int) length normalization}. Thus, to preserve the length normalization
|
||||
* values when resetting this, one should base the new value upon the old.
|
||||
*
|
||||
|
@ -399,9 +425,9 @@ public abstract class IndexReader {
|
|||
doSetNorm(doc, field, value);
|
||||
hasChanges = true;
|
||||
}
|
||||
|
||||
|
||||
/** Implements setNorm in subclass.*/
|
||||
protected abstract void doSetNorm(int doc, String field, byte value)
|
||||
protected abstract void doSetNorm(int doc, String field, byte value)
|
||||
throws IOException;
|
||||
|
||||
/** Expert: Resets the normalization factor for the named field of the named
|
||||
|
@ -554,7 +580,7 @@ public abstract class IndexReader {
|
|||
doUndeleteAll();
|
||||
hasChanges = true;
|
||||
}
|
||||
|
||||
|
||||
/** Implements actual undeleteAll() in subclass. */
|
||||
protected abstract void doUndeleteAll() throws IOException;
|
||||
|
||||
|
@ -586,10 +612,10 @@ public abstract class IndexReader {
|
|||
}
|
||||
hasChanges = false;
|
||||
}
|
||||
|
||||
|
||||
/** Implements commit. */
|
||||
protected abstract void doCommit() throws IOException;
|
||||
|
||||
|
||||
/**
|
||||
* Closes files associated with this index.
|
||||
* Also saves any new deletions to disk.
|
||||
|
@ -613,7 +639,7 @@ public abstract class IndexReader {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Get a list of unique field names that exist in this index and have the specified
|
||||
* field option information.
|
||||
|
@ -659,7 +685,7 @@ public abstract class IndexReader {
|
|||
directory.makeLock(IndexWriter.WRITE_LOCK_NAME).release();
|
||||
directory.makeLock(IndexWriter.COMMIT_LOCK_NAME).release();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Prints the filename and size of each file within a given compound file.
|
||||
* Add the -extract flag to extract files to the current working directory.
|
||||
|
@ -686,7 +712,7 @@ public abstract class IndexReader {
|
|||
|
||||
Directory dir = null;
|
||||
CompoundFileReader cfr = null;
|
||||
|
||||
|
||||
try {
|
||||
File file = new File(filename);
|
||||
String dirname = file.getAbsoluteFile().getParent();
|
||||
|
@ -696,7 +722,7 @@ public abstract class IndexReader {
|
|||
|
||||
String [] files = cfr.list();
|
||||
Arrays.sort(files); // sort the array of filename so that the output is more readable
|
||||
|
||||
|
||||
for (int i = 0; i < files.length; ++i) {
|
||||
long len = cfr.fileLength(files[i]);
|
||||
|
||||
|
@ -705,7 +731,7 @@ public abstract class IndexReader {
|
|||
IndexInput ii = cfr.openInput(files[i]);
|
||||
|
||||
FileOutputStream f = new FileOutputStream(files[i]);
|
||||
|
||||
|
||||
// read and write with a small buffer, which is more effectiv than reading byte by byte
|
||||
byte[] buffer = new byte[1024];
|
||||
int chunk = buffer.length;
|
||||
|
@ -715,7 +741,7 @@ public abstract class IndexReader {
|
|||
f.write(buffer, 0, bufLen);
|
||||
len -= bufLen;
|
||||
}
|
||||
|
||||
|
||||
f.close();
|
||||
ii.close();
|
||||
}
|
||||
|
|
|
@ -17,11 +17,14 @@ package org.apache.lucene.index;
|
|||
*/
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldSelector;
|
||||
import org.apache.lucene.store.Directory;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.*;
|
||||
import java.util.Collection;
|
||||
import java.util.HashSet;
|
||||
import java.util.Hashtable;
|
||||
import java.util.Set;
|
||||
|
||||
/** An IndexReader which reads multiple indexes, appending their content.
|
||||
*
|
||||
|
@ -99,9 +102,9 @@ public class MultiReader extends IndexReader {
|
|||
return maxDoc;
|
||||
}
|
||||
|
||||
public Document document(int n) throws IOException {
|
||||
public Document document(int n, FieldSelector fieldSelector) throws IOException {
|
||||
int i = readerIndex(n); // find segment num
|
||||
return subReaders[i].document(n - starts[i]); // dispatch to segment reader
|
||||
return subReaders[i].document(n - starts[i], fieldSelector); // dispatch to segment reader
|
||||
}
|
||||
|
||||
public boolean isDeleted(int n) {
|
||||
|
|
|
@ -16,20 +16,24 @@ package org.apache.lucene.index;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Enumeration;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.SortedMap;
|
||||
import java.util.TreeMap;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.Fieldable;
|
||||
import org.apache.lucene.document.FieldSelector;
|
||||
import org.apache.lucene.document.FieldSelectorResult;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.SortedMap;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.TreeMap;
|
||||
import java.util.Collection;
|
||||
import java.util.Iterator;
|
||||
import java.util.Enumeration;
|
||||
import java.util.Set;
|
||||
import java.util.HashSet;
|
||||
|
||||
|
||||
/** An IndexReader which reads multiple, parallel indexes. Each index added
|
||||
* must have the same number of documents, but typically each contains
|
||||
|
@ -41,7 +45,7 @@ import org.apache.lucene.document.Field;
|
|||
* change rarely and small fields that change more frequently. The smaller
|
||||
* fields may be re-indexed in a new index and both indexes may be searched
|
||||
* together.
|
||||
*
|
||||
*
|
||||
* <p><strong>Warning:</strong> It is up to you to make sure all indexes
|
||||
* are created and modified the same way. For example, if you add
|
||||
* documents to one index, you need to add the same documents in the
|
||||
|
@ -51,7 +55,8 @@ import org.apache.lucene.document.Field;
|
|||
public class ParallelReader extends IndexReader {
|
||||
private List readers = new ArrayList();
|
||||
private SortedMap fieldToReader = new TreeMap();
|
||||
private List storedFieldReaders = new ArrayList();
|
||||
private Map readerToFields = new HashMap();
|
||||
private List storedFieldReaders = new ArrayList();
|
||||
|
||||
private int maxDoc;
|
||||
private int numDocs;
|
||||
|
@ -59,7 +64,7 @@ public class ParallelReader extends IndexReader {
|
|||
|
||||
/** Construct a ParallelReader. */
|
||||
public ParallelReader() throws IOException { super(null); }
|
||||
|
||||
|
||||
/** Add an IndexReader. */
|
||||
public void add(IndexReader reader) throws IOException {
|
||||
add(reader, false);
|
||||
|
@ -68,10 +73,10 @@ public class ParallelReader extends IndexReader {
|
|||
/** Add an IndexReader whose stored fields will not be returned. This can
|
||||
* accellerate search when stored fields are only needed from a subset of
|
||||
* the IndexReaders.
|
||||
*
|
||||
* @throws IllegalArgumentException if not all indexes contain the same number
|
||||
*
|
||||
* @throws IllegalArgumentException if not all indexes contain the same number
|
||||
* of documents
|
||||
* @throws IllegalArgumentException if not all indexes have the same value
|
||||
* @throws IllegalArgumentException if not all indexes have the same value
|
||||
* of {@link IndexReader#maxDoc()}
|
||||
*/
|
||||
public void add(IndexReader reader, boolean ignoreStoredFields)
|
||||
|
@ -89,8 +94,10 @@ public class ParallelReader extends IndexReader {
|
|||
if (reader.numDocs() != numDocs)
|
||||
throw new IllegalArgumentException
|
||||
("All readers must have same numDocs: "+numDocs+"!="+reader.numDocs());
|
||||
|
||||
Iterator i = reader.getFieldNames(IndexReader.FieldOption.ALL).iterator();
|
||||
|
||||
Collection fields = reader.getFieldNames(IndexReader.FieldOption.ALL);
|
||||
readerToFields.put(reader, fields);
|
||||
Iterator i = fields.iterator();
|
||||
while (i.hasNext()) { // update fieldToReader map
|
||||
String field = (String)i.next();
|
||||
if (fieldToReader.get(field) == null)
|
||||
|
@ -132,13 +139,25 @@ public class ParallelReader extends IndexReader {
|
|||
}
|
||||
|
||||
// append fields from storedFieldReaders
|
||||
public Document document(int n) throws IOException {
|
||||
public Document document(int n, FieldSelector fieldSelector) throws IOException {
|
||||
Document result = new Document();
|
||||
for (int i = 0; i < storedFieldReaders.size(); i++) {
|
||||
IndexReader reader = (IndexReader)storedFieldReaders.get(i);
|
||||
Enumeration fields = reader.document(n).fields();
|
||||
while (fields.hasMoreElements()) {
|
||||
result.add((Field)fields.nextElement());
|
||||
|
||||
boolean include = (fieldSelector==null);
|
||||
if (!include) {
|
||||
Iterator it = ((Collection) readerToFields.get(reader)).iterator();
|
||||
while (it.hasNext())
|
||||
if (fieldSelector.accept((String)it.next())!=FieldSelectorResult.NO_LOAD) {
|
||||
include = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (include) {
|
||||
Enumeration fields = reader.document(n, fieldSelector).fields();
|
||||
while (fields.hasMoreElements()) {
|
||||
result.add((Fieldable)fields.nextElement());
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
|
|
|
@ -120,7 +120,7 @@ final class SegmentMerger {
|
|||
files.add(segment + "." + IndexFileNames.COMPOUND_EXTENSIONS[i]);
|
||||
}
|
||||
|
||||
// Field norm files
|
||||
// Fieldable norm files
|
||||
for (int i = 0; i < fieldInfos.size(); i++) {
|
||||
FieldInfo fi = fieldInfos.fieldInfo(i);
|
||||
if (fi.isIndexed && !fi.omitNorms) {
|
||||
|
|
|
@ -16,16 +16,16 @@ package org.apache.lucene.index;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.*;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldSelector;
|
||||
import org.apache.lucene.search.DefaultSimilarity;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BitVector;
|
||||
import org.apache.lucene.search.DefaultSimilarity;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* @version $Id$
|
||||
|
@ -277,11 +277,11 @@ class SegmentReader extends IndexReader {
|
|||
return tis.terms(t);
|
||||
}
|
||||
|
||||
public synchronized Document document(int n) throws IOException {
|
||||
public synchronized Document document(int n, FieldSelector fieldSelector) throws IOException {
|
||||
if (isDeleted(n))
|
||||
throw new IllegalArgumentException
|
||||
("attempt to access a deleted document");
|
||||
return fieldsReader.doc(n);
|
||||
return fieldsReader.doc(n, fieldSelector);
|
||||
}
|
||||
|
||||
public synchronized boolean isDeleted(int n) {
|
||||
|
|
|
@ -127,7 +127,7 @@ class TermVectorsReader implements Cloneable {
|
|||
|
||||
result = readTermVector(field, position);
|
||||
} else {
|
||||
//System.out.println("Field not found");
|
||||
//System.out.println("Fieldable not found");
|
||||
}
|
||||
} else {
|
||||
//System.out.println("No tvx file");
|
||||
|
|
|
@ -150,7 +150,7 @@ final class TermVectorsWriter {
|
|||
return currentField != null;
|
||||
}
|
||||
|
||||
/** Add term to the field's term vector. Field must already be open.
|
||||
/** Add term to the field's term vector. Fieldable must already be open.
|
||||
* Terms should be added in
|
||||
* increasing order of terms, one call per unique termNum. ProxPointer
|
||||
* is a pointer into the TermPosition file (prx). Freq is the number of
|
||||
|
@ -268,7 +268,7 @@ final class TermVectorsWriter {
|
|||
private void writeField() throws IOException {
|
||||
// remember where this field is written
|
||||
currentField.tvfPointer = tvf.getFilePointer();
|
||||
//System.out.println("Field Pointer: " + currentField.tvfPointer);
|
||||
//System.out.println("Fieldable Pointer: " + currentField.tvfPointer);
|
||||
|
||||
final int size = terms.size();
|
||||
tvf.writeVInt(size);
|
||||
|
|
|
@ -20,7 +20,6 @@ import org.apache.lucene.index.IndexReader;
|
|||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermDocs;
|
||||
import org.apache.lucene.index.TermEnum;
|
||||
import org.apache.lucene.search.FieldCache.StringIndex; // required by GCJ
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Locale;
|
||||
|
@ -43,7 +42,7 @@ implements FieldCache {
|
|||
|
||||
/** Expert: Every key in the internal cache is of this type. */
|
||||
static class Entry {
|
||||
final String field; // which Field
|
||||
final String field; // which Fieldable
|
||||
final int type; // which SortField type
|
||||
final Object custom; // which custom comparator
|
||||
final Locale locale; // the locale we're sorting (if string)
|
||||
|
|
|
@ -45,7 +45,7 @@ extends PriorityQueue {
|
|||
|
||||
/**
|
||||
* Creates a hit queue sorted by the given list of fields.
|
||||
* @param fields Field names, in priority order (highest priority first).
|
||||
* @param fields Fieldable names, in priority order (highest priority first).
|
||||
* @param size The number of hits to retain. Must be greater than zero.
|
||||
*/
|
||||
FieldDocSortedHitQueue (SortField[] fields, int size) {
|
||||
|
|
|
@ -44,7 +44,7 @@ extends PriorityQueue {
|
|||
/**
|
||||
* Creates a hit queue sorted by the given list of fields.
|
||||
* @param reader Index to use.
|
||||
* @param fields Field names, in priority order (highest priority first). Cannot be <code>null</code> or empty.
|
||||
* @param fields Fieldable names, in priority order (highest priority first). Cannot be <code>null</code> or empty.
|
||||
* @param size The number of hits to retain. Must be greater than zero.
|
||||
* @throws IOException
|
||||
*/
|
||||
|
@ -212,7 +212,7 @@ extends PriorityQueue {
|
|||
/**
|
||||
* Returns a comparator for sorting hits according to a field containing integers.
|
||||
* @param reader Index to use.
|
||||
* @param fieldname Field containg integer values.
|
||||
* @param fieldname Fieldable containg integer values.
|
||||
* @return Comparator for sorting hits.
|
||||
* @throws IOException If an error occurs reading the index.
|
||||
*/
|
||||
|
@ -243,7 +243,7 @@ extends PriorityQueue {
|
|||
/**
|
||||
* Returns a comparator for sorting hits according to a field containing floats.
|
||||
* @param reader Index to use.
|
||||
* @param fieldname Field containg float values.
|
||||
* @param fieldname Fieldable containg float values.
|
||||
* @return Comparator for sorting hits.
|
||||
* @throws IOException If an error occurs reading the index.
|
||||
*/
|
||||
|
@ -274,7 +274,7 @@ extends PriorityQueue {
|
|||
/**
|
||||
* Returns a comparator for sorting hits according to a field containing strings.
|
||||
* @param reader Index to use.
|
||||
* @param fieldname Field containg string values.
|
||||
* @param fieldname Fieldable containg string values.
|
||||
* @return Comparator for sorting hits.
|
||||
* @throws IOException If an error occurs reading the index.
|
||||
*/
|
||||
|
@ -305,7 +305,7 @@ extends PriorityQueue {
|
|||
/**
|
||||
* Returns a comparator for sorting hits according to a field containing strings.
|
||||
* @param reader Index to use.
|
||||
* @param fieldname Field containg string values.
|
||||
* @param fieldname Fieldable containg string values.
|
||||
* @return Comparator for sorting hits.
|
||||
* @throws IOException If an error occurs reading the index.
|
||||
*/
|
||||
|
@ -336,7 +336,7 @@ extends PriorityQueue {
|
|||
* floats or strings. Once the type is determined, one of the other static methods
|
||||
* in this class is called to get the comparator.
|
||||
* @param reader Index to use.
|
||||
* @param fieldname Field containg values.
|
||||
* @param fieldname Fieldable containg values.
|
||||
* @return Comparator for sorting hits.
|
||||
* @throws IOException If an error occurs reading the index.
|
||||
*/
|
||||
|
|
|
@ -16,19 +16,16 @@ package org.apache.lucene.search;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.util.SmallFloat;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Serializable;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.Iterator;
|
||||
|
||||
import org.apache.lucene.index.Term;
|
||||
|
||||
import org.apache.lucene.index.IndexReader; // for javadoc
|
||||
import org.apache.lucene.index.IndexWriter; // for javadoc
|
||||
import org.apache.lucene.document.Field; // for javadoc
|
||||
import org.apache.lucene.util.SmallFloat;
|
||||
|
||||
/** Expert: Scoring API.
|
||||
* <p>Subclasses implement search scoring.
|
||||
*
|
||||
|
@ -44,7 +41,7 @@ import org.apache.lucene.util.SmallFloat;
|
|||
* ( {@link #tf(int) tf}(t in d) *
|
||||
* {@link #idf(Term,Searcher) idf}(t)^2 *
|
||||
* {@link Query#getBoost getBoost}(t in q) *
|
||||
* {@link Field#getBoost getBoost}(t.field in d) *
|
||||
* {@link org.apache.lucene.document.Field#getBoost getBoost}(t.field in d) *
|
||||
* {@link #lengthNorm(String,int) lengthNorm}(t.field in d) )
|
||||
* </small></td>
|
||||
* <td valign="middle" rowspan="2"> *
|
||||
|
@ -152,7 +149,7 @@ public abstract class Similarity implements Serializable {
|
|||
* <i>fieldName</i> of <i>doc</i>.
|
||||
* @return a normalization factor for hits on this field of this document
|
||||
*
|
||||
* @see Field#setBoost(float)
|
||||
* @see org.apache.lucene.document.Field#setBoost(float)
|
||||
*/
|
||||
public abstract float lengthNorm(String fieldName, int numTokens);
|
||||
|
||||
|
@ -179,7 +176,7 @@ public abstract class Similarity implements Serializable {
|
|||
* small to represent are rounded up to the smallest positive representable
|
||||
* value.
|
||||
*
|
||||
* @see Field#setBoost(float)
|
||||
* @see org.apache.lucene.document.Field#setBoost(float)
|
||||
* @see SmallFloat
|
||||
*/
|
||||
public static byte encodeNorm(float f) {
|
||||
|
|
|
@ -29,7 +29,7 @@ import java.io.Serializable;
|
|||
* and does not need to be stored (unless you happen to want it back with the
|
||||
* rest of your document data). In other words:
|
||||
*
|
||||
* <p><code>document.add (new Field ("byNumber", Integer.toString(x), Field.Store.NO, Field.Index.UN_TOKENIZED));</code></p>
|
||||
* <p><code>document.add (new Fieldable ("byNumber", Integer.toString(x), Fieldable.Store.NO, Fieldable.Index.UN_TOKENIZED));</code></p>
|
||||
*
|
||||
*
|
||||
* <p><h3>Valid Types of Values</h3>
|
||||
|
|
|
@ -19,7 +19,7 @@ extends Serializable {
|
|||
/**
|
||||
* Creates a comparator for the field in the given index.
|
||||
* @param reader Index to create comparator for.
|
||||
* @param fieldname Field to create comparator for.
|
||||
* @param fieldname Fieldable to create comparator for.
|
||||
* @return Comparator of ScoreDoc objects.
|
||||
* @throws IOException If an error occurs reading the index.
|
||||
*/
|
||||
|
|
|
@ -116,6 +116,32 @@ public abstract class IndexInput implements Cloneable {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Expert
|
||||
*
|
||||
* Similar to {@link #readChars(char[], int, int)} but does not do any conversion operations on the bytes it is reading in. It still
|
||||
* has to invoke {@link #readByte()} just as {@link #readChars(char[], int, int)} does, but it does not need a buffer to store anything
|
||||
* and it does not have to do any of the bitwise operations, since we don't actually care what is in the byte except to determine
|
||||
* how many more bytes to read
|
||||
* @param length The number of chars to read
|
||||
*/
|
||||
public void skipChars(int length) throws IOException{
|
||||
for (int i = 0; i < length; i++) {
|
||||
byte b = readByte();
|
||||
if ((b & 0x80) == 0){
|
||||
//do nothing, we only need one byte
|
||||
}
|
||||
else if ((b & 0xE0) != 0xE0) {
|
||||
readByte();//read an additional byte
|
||||
} else{
|
||||
//read two additional bytes.
|
||||
readByte();
|
||||
readByte();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/** Closes the stream to futher operations. */
|
||||
public abstract void close() throws IOException;
|
||||
|
||||
|
|
|
@ -38,10 +38,10 @@ public class TestBinaryDocument extends TestCase
|
|||
public void testBinaryFieldInIndex()
|
||||
throws Exception
|
||||
{
|
||||
Field binaryFldStored = new Field("binaryStored", binaryValStored.getBytes(), Field.Store.YES);
|
||||
Field binaryFldCompressed = new Field("binaryCompressed", binaryValCompressed.getBytes(), Field.Store.COMPRESS);
|
||||
Field stringFldStored = new Field("stringStored", binaryValStored, Field.Store.YES, Field.Index.NO, Field.TermVector.NO);
|
||||
Field stringFldCompressed = new Field("stringCompressed", binaryValCompressed, Field.Store.COMPRESS, Field.Index.NO, Field.TermVector.NO);
|
||||
Fieldable binaryFldStored = new Field("binaryStored", binaryValStored.getBytes(), Field.Store.YES);
|
||||
Fieldable binaryFldCompressed = new Field("binaryCompressed", binaryValCompressed.getBytes(), Field.Store.COMPRESS);
|
||||
Fieldable stringFldStored = new Field("stringStored", binaryValStored, Field.Store.YES, Field.Index.NO, Field.TermVector.NO);
|
||||
Fieldable stringFldCompressed = new Field("stringCompressed", binaryValCompressed, Field.Store.COMPRESS, Field.Index.NO, Field.TermVector.NO);
|
||||
|
||||
try {
|
||||
// binary fields with store off are not allowed
|
||||
|
|
|
@ -46,9 +46,9 @@ public class TestDocument extends TestCase
|
|||
throws Exception
|
||||
{
|
||||
Document doc = new Document();
|
||||
Field stringFld = new Field("string", binaryVal, Field.Store.YES, Field.Index.NO);
|
||||
Field binaryFld = new Field("binary", binaryVal.getBytes(), Field.Store.YES);
|
||||
Field binaryFld2 = new Field("binary", binaryVal2.getBytes(), Field.Store.YES);
|
||||
Fieldable stringFld = new Field("string", binaryVal, Field.Store.YES, Field.Index.NO);
|
||||
Fieldable binaryFld = new Field("binary", binaryVal.getBytes(), Field.Store.YES);
|
||||
Fieldable binaryFld2 = new Field("binary", binaryVal2.getBytes(), Field.Store.YES);
|
||||
|
||||
doc.add(stringFld);
|
||||
doc.add(binaryFld);
|
||||
|
|
|
@ -18,12 +18,12 @@ package org.apache.lucene.index;
|
|||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.WhitespaceAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.*;
|
||||
import org.apache.lucene.search.Similarity;
|
||||
import org.apache.lucene.store.Directory;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Enumeration;
|
||||
|
@ -39,6 +39,13 @@ class DocHelper {
|
|||
public static final int [] FIELD_2_FREQS = {3, 1, 1};
|
||||
public static final String TEXT_FIELD_2_KEY = "textField2";
|
||||
public static Field textField2 = new Field(TEXT_FIELD_2_KEY, FIELD_2_TEXT, Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
|
||||
|
||||
public static final String FIELD_2_COMPRESSED_TEXT = "field field field two text";
|
||||
//Fields will be lexicographically sorted. So, the order is: field, text, two
|
||||
public static final int [] COMPRESSED_FIELD_2_FREQS = {3, 1, 1};
|
||||
public static final String COMPRESSED_TEXT_FIELD_2_KEY = "compressedTextField2";
|
||||
public static Field compressedTextField2 = new Field(COMPRESSED_TEXT_FIELD_2_KEY, FIELD_2_COMPRESSED_TEXT, Field.Store.COMPRESS, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
|
||||
|
||||
|
||||
public static final String FIELD_3_TEXT = "aaaNoNorms aaaNoNorms bbbNoNorms";
|
||||
public static final String TEXT_FIELD_3_KEY = "textField3";
|
||||
|
@ -71,6 +78,34 @@ class DocHelper {
|
|||
public static Field unStoredField2 = new Field(UNSTORED_FIELD_2_KEY, UNSTORED_2_FIELD_TEXT,
|
||||
Field.Store.NO, Field.Index.TOKENIZED, Field.TermVector.YES);
|
||||
|
||||
public static final String LAZY_FIELD_BINARY_KEY = "lazyFieldBinary";
|
||||
public static byte [] LAZY_FIELD_BINARY_BYTES;
|
||||
public static Field lazyFieldBinary;
|
||||
|
||||
public static final String LAZY_FIELD_KEY = "lazyField";
|
||||
public static final String LAZY_FIELD_TEXT = "These are some field bytes";
|
||||
public static Field lazyField = new Field(LAZY_FIELD_KEY, LAZY_FIELD_TEXT, Field.Store.YES, Field.Index.TOKENIZED);
|
||||
|
||||
public static final String LARGE_LAZY_FIELD_KEY = "largeLazyField";
|
||||
public static String LARGE_LAZY_FIELD_TEXT;
|
||||
public static Field largeLazyField;
|
||||
|
||||
//From Issue 509
|
||||
public static final String FIELD_UTF1_TEXT = "field one \u4e00text";
|
||||
public static final String TEXT_FIELD_UTF1_KEY = "textField1Utf8";
|
||||
public static Field textUtfField1 = new Field(TEXT_FIELD_UTF1_KEY, FIELD_UTF1_TEXT,
|
||||
Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.NO);
|
||||
|
||||
public static final String FIELD_UTF2_TEXT = "field field field \u4e00two text";
|
||||
//Fields will be lexicographically sorted. So, the order is: field, text, two
|
||||
public static final int [] FIELD_UTF2_FREQS = {3, 1, 1};
|
||||
public static final String TEXT_FIELD_UTF2_KEY = "textField2Utf8";
|
||||
public static Field textUtfField2 = new Field(TEXT_FIELD_UTF2_KEY, FIELD_UTF2_TEXT, Field.Store.YES,
|
||||
Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
|
||||
|
||||
|
||||
|
||||
|
||||
public static Map nameValues = null;
|
||||
|
||||
// ordered list of all the fields...
|
||||
|
@ -79,14 +114,20 @@ class DocHelper {
|
|||
textField1,
|
||||
textField2,
|
||||
textField3,
|
||||
compressedTextField2,
|
||||
keyField,
|
||||
noNormsField,
|
||||
unIndField,
|
||||
unStoredField1,
|
||||
unStoredField2,
|
||||
textUtfField1,
|
||||
textUtfField2,
|
||||
lazyField,
|
||||
lazyFieldBinary,//placeholder for binary field, since this is null. It must be second to last.
|
||||
largeLazyField//placeholder for large field, since this is null. It must always be last
|
||||
};
|
||||
|
||||
// Map<String fieldName, Field field>
|
||||
// Map<String fieldName, Fieldable field>
|
||||
public static Map all=new HashMap();
|
||||
public static Map indexed=new HashMap();
|
||||
public static Map stored=new HashMap();
|
||||
|
@ -94,11 +135,28 @@ class DocHelper {
|
|||
public static Map unindexed=new HashMap();
|
||||
public static Map termvector=new HashMap();
|
||||
public static Map notermvector=new HashMap();
|
||||
public static Map lazy= new HashMap();
|
||||
public static Map noNorms=new HashMap();
|
||||
|
||||
static {
|
||||
//Initialize the large Lazy Field
|
||||
StringBuffer buffer = new StringBuffer();
|
||||
for (int i = 0; i < 10000; i++)
|
||||
{
|
||||
buffer.append("Lazily loading lengths of language in lieu of laughing ");
|
||||
}
|
||||
|
||||
try {
|
||||
LAZY_FIELD_BINARY_BYTES = "These are some binary field bytes".getBytes("UTF8");
|
||||
} catch (UnsupportedEncodingException e) {
|
||||
}
|
||||
lazyFieldBinary = new Field(LAZY_FIELD_BINARY_KEY, LAZY_FIELD_BINARY_BYTES, Field.Store.YES);
|
||||
fields[fields.length - 2] = lazyFieldBinary;
|
||||
LARGE_LAZY_FIELD_TEXT = buffer.toString();
|
||||
largeLazyField = new Field(LARGE_LAZY_FIELD_KEY, LARGE_LAZY_FIELD_TEXT, Field.Store.YES, Field.Index.TOKENIZED);
|
||||
fields[fields.length - 1] = largeLazyField;
|
||||
for (int i=0; i<fields.length; i++) {
|
||||
Field f = fields[i];
|
||||
Fieldable f = fields[i];
|
||||
add(all,f);
|
||||
if (f.isIndexed()) add(indexed,f);
|
||||
else add(unindexed,f);
|
||||
|
@ -107,11 +165,12 @@ class DocHelper {
|
|||
if (f.isStored()) add(stored,f);
|
||||
else add(unstored,f);
|
||||
if (f.getOmitNorms()) add(noNorms,f);
|
||||
if (f.isLazy()) add(lazy, f);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private static void add(Map map, Field field) {
|
||||
private static void add(Map map, Fieldable field) {
|
||||
map.put(field.name(), field);
|
||||
}
|
||||
|
||||
|
@ -121,13 +180,19 @@ class DocHelper {
|
|||
nameValues = new HashMap();
|
||||
nameValues.put(TEXT_FIELD_1_KEY, FIELD_1_TEXT);
|
||||
nameValues.put(TEXT_FIELD_2_KEY, FIELD_2_TEXT);
|
||||
nameValues.put(COMPRESSED_TEXT_FIELD_2_KEY, FIELD_2_COMPRESSED_TEXT);
|
||||
nameValues.put(TEXT_FIELD_3_KEY, FIELD_3_TEXT);
|
||||
nameValues.put(KEYWORD_FIELD_KEY, KEYWORD_TEXT);
|
||||
nameValues.put(NO_NORMS_KEY, NO_NORMS_TEXT);
|
||||
nameValues.put(UNINDEXED_FIELD_KEY, UNINDEXED_FIELD_TEXT);
|
||||
nameValues.put(UNSTORED_FIELD_1_KEY, UNSTORED_1_FIELD_TEXT);
|
||||
nameValues.put(UNSTORED_FIELD_2_KEY, UNSTORED_2_FIELD_TEXT);
|
||||
}
|
||||
nameValues.put(LAZY_FIELD_KEY, LAZY_FIELD_TEXT);
|
||||
nameValues.put(LAZY_FIELD_BINARY_KEY, LAZY_FIELD_BINARY_BYTES);
|
||||
nameValues.put(LARGE_LAZY_FIELD_KEY, LARGE_LAZY_FIELD_TEXT);
|
||||
nameValues.put(TEXT_FIELD_UTF1_KEY, FIELD_UTF1_TEXT);
|
||||
nameValues.put(TEXT_FIELD_UTF2_KEY, FIELD_UTF2_TEXT);
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds the fields above to a document
|
||||
|
|
|
@ -21,8 +21,7 @@ import org.apache.lucene.analysis.Analyzer;
|
|||
import org.apache.lucene.analysis.WhitespaceAnalyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.WhitespaceTokenizer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.*;
|
||||
import org.apache.lucene.search.Similarity;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
|
||||
|
@ -64,7 +63,7 @@ public class TestDocumentWriter extends TestCase {
|
|||
assertTrue(doc != null);
|
||||
|
||||
//System.out.println("Document: " + doc);
|
||||
Field [] fields = doc.getFields("textField2");
|
||||
Fieldable [] fields = doc.getFields("textField2");
|
||||
assertTrue(fields != null && fields.length == 1);
|
||||
assertTrue(fields[0].stringValue().equals(DocHelper.FIELD_2_TEXT));
|
||||
assertTrue(fields[0].isTermVectorStored());
|
||||
|
|
|
@ -17,13 +17,18 @@ package org.apache.lucene.index;
|
|||
*/
|
||||
|
||||
import junit.framework.TestCase;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.analysis.WhitespaceAnalyzer;
|
||||
import org.apache.lucene.document.*;
|
||||
import org.apache.lucene.search.Similarity;
|
||||
import org.apache.lucene.store.FSDirectory;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.Collections;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
import java.util.Enumeration;
|
||||
|
||||
public class TestFieldsReader extends TestCase {
|
||||
private RAMDirectory dir = new RAMDirectory();
|
||||
|
@ -50,19 +55,19 @@ public class TestFieldsReader extends TestCase {
|
|||
FieldsReader reader = new FieldsReader(dir, "test", fieldInfos);
|
||||
assertTrue(reader != null);
|
||||
assertTrue(reader.size() == 1);
|
||||
Document doc = reader.doc(0);
|
||||
Document doc = reader.doc(0, null);
|
||||
assertTrue(doc != null);
|
||||
assertTrue(doc.getField("textField1") != null);
|
||||
|
||||
Field field = doc.getField("textField2");
|
||||
assertTrue(doc.getField(DocHelper.TEXT_FIELD_1_KEY) != null);
|
||||
|
||||
Fieldable field = doc.getField(DocHelper.TEXT_FIELD_2_KEY);
|
||||
assertTrue(field != null);
|
||||
assertTrue(field.isTermVectorStored() == true);
|
||||
|
||||
|
||||
assertTrue(field.isStoreOffsetWithTermVector() == true);
|
||||
assertTrue(field.isStorePositionWithTermVector() == true);
|
||||
assertTrue(field.getOmitNorms() == false);
|
||||
|
||||
field = doc.getField("textField3");
|
||||
field = doc.getField(DocHelper.TEXT_FIELD_3_KEY);
|
||||
assertTrue(field != null);
|
||||
assertTrue(field.isTermVectorStored() == false);
|
||||
assertTrue(field.isStoreOffsetWithTermVector() == false);
|
||||
|
@ -72,4 +77,144 @@ public class TestFieldsReader extends TestCase {
|
|||
|
||||
reader.close();
|
||||
}
|
||||
|
||||
|
||||
public void testLazyFields() throws Exception {
|
||||
assertTrue(dir != null);
|
||||
assertTrue(fieldInfos != null);
|
||||
FieldsReader reader = new FieldsReader(dir, "test", fieldInfos);
|
||||
assertTrue(reader != null);
|
||||
assertTrue(reader.size() == 1);
|
||||
Set loadFieldNames = new HashSet();
|
||||
loadFieldNames.add(DocHelper.TEXT_FIELD_1_KEY);
|
||||
loadFieldNames.add(DocHelper.TEXT_FIELD_UTF1_KEY);
|
||||
Set lazyFieldNames = new HashSet();
|
||||
//new String[]{DocHelper.LARGE_LAZY_FIELD_KEY, DocHelper.LAZY_FIELD_KEY, DocHelper.LAZY_FIELD_BINARY_KEY};
|
||||
lazyFieldNames.add(DocHelper.LARGE_LAZY_FIELD_KEY);
|
||||
lazyFieldNames.add(DocHelper.LAZY_FIELD_KEY);
|
||||
lazyFieldNames.add(DocHelper.LAZY_FIELD_BINARY_KEY);
|
||||
lazyFieldNames.add(DocHelper.TEXT_FIELD_UTF2_KEY);
|
||||
SetBasedFieldSelector fieldSelector = new SetBasedFieldSelector(loadFieldNames, lazyFieldNames);
|
||||
Document doc = reader.doc(0, fieldSelector);
|
||||
assertTrue("doc is null and it shouldn't be", doc != null);
|
||||
Fieldable field = doc.getField(DocHelper.LAZY_FIELD_KEY);
|
||||
assertTrue("field is null and it shouldn't be", field != null);
|
||||
assertTrue("field is not lazy and it should be", field.isLazy());
|
||||
String value = field.stringValue();
|
||||
assertTrue("value is null and it shouldn't be", value != null);
|
||||
assertTrue(value + " is not equal to " + DocHelper.LAZY_FIELD_TEXT, value.equals(DocHelper.LAZY_FIELD_TEXT) == true);
|
||||
field = doc.getField(DocHelper.TEXT_FIELD_1_KEY);
|
||||
assertTrue("field is null and it shouldn't be", field != null);
|
||||
assertTrue("Field is lazy and it should not be", field.isLazy() == false);
|
||||
field = doc.getField(DocHelper.TEXT_FIELD_UTF1_KEY);
|
||||
assertTrue("field is null and it shouldn't be", field != null);
|
||||
assertTrue("Field is lazy and it should not be", field.isLazy() == false);
|
||||
assertTrue(field.stringValue() + " is not equal to " + DocHelper.FIELD_UTF1_TEXT, field.stringValue().equals(DocHelper.FIELD_UTF1_TEXT) == true);
|
||||
|
||||
field = doc.getField(DocHelper.TEXT_FIELD_UTF2_KEY);
|
||||
assertTrue("field is null and it shouldn't be", field != null);
|
||||
assertTrue("Field is lazy and it should not be", field.isLazy() == true);
|
||||
assertTrue(field.stringValue() + " is not equal to " + DocHelper.FIELD_UTF2_TEXT, field.stringValue().equals(DocHelper.FIELD_UTF2_TEXT) == true);
|
||||
|
||||
field = doc.getField(DocHelper.LAZY_FIELD_BINARY_KEY);
|
||||
assertTrue("field is null and it shouldn't be", field != null);
|
||||
byte [] bytes = field.binaryValue();
|
||||
assertTrue("bytes is null and it shouldn't be", bytes != null);
|
||||
assertTrue("", DocHelper.LAZY_FIELD_BINARY_BYTES.length == bytes.length);
|
||||
for (int i = 0; i < bytes.length; i++) {
|
||||
assertTrue("byte[" + i + "] is mismatched", bytes[i] == DocHelper.LAZY_FIELD_BINARY_BYTES[i]);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
public void testLoadFirst() throws Exception {
|
||||
assertTrue(dir != null);
|
||||
assertTrue(fieldInfos != null);
|
||||
FieldsReader reader = new FieldsReader(dir, "test", fieldInfos);
|
||||
assertTrue(reader != null);
|
||||
assertTrue(reader.size() == 1);
|
||||
LoadFirstFieldSelector fieldSelector = new LoadFirstFieldSelector();
|
||||
Document doc = reader.doc(0, fieldSelector);
|
||||
assertTrue("doc is null and it shouldn't be", doc != null);
|
||||
int count = 0;
|
||||
Enumeration enumeration = doc.fields();
|
||||
while (enumeration.hasMoreElements()) {
|
||||
Field field = (Field) enumeration.nextElement();
|
||||
assertTrue("field is null and it shouldn't be", field != null);
|
||||
String sv = field.stringValue();
|
||||
assertTrue("sv is null and it shouldn't be", sv != null);
|
||||
count++;
|
||||
}
|
||||
assertTrue(count + " does not equal: " + 1, count == 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Not really a test per se, but we should have some way of assessing whether this is worthwhile.
|
||||
* <p/>
|
||||
* Must test using a File based directory
|
||||
*
|
||||
* @throws Exception
|
||||
*/
|
||||
public void testLazyPerformance() throws Exception {
|
||||
String tmpIODir = System.getProperty("java.io.tmpdir");
|
||||
String path = tmpIODir + File.separator + "lazyDir";
|
||||
File file = new File(path);
|
||||
FSDirectory tmpDir = FSDirectory.getDirectory(file, true);
|
||||
assertTrue(tmpDir != null);
|
||||
DocumentWriter writer = new DocumentWriter(tmpDir, new WhitespaceAnalyzer(),
|
||||
Similarity.getDefault(), 50);
|
||||
assertTrue(writer != null);
|
||||
writer.addDocument("test", testDoc);
|
||||
assertTrue(fieldInfos != null);
|
||||
FieldsReader reader;
|
||||
long lazyTime = 0;
|
||||
long regularTime = 0;
|
||||
int length = 50;
|
||||
Set lazyFieldNames = new HashSet();
|
||||
lazyFieldNames.add(DocHelper.LARGE_LAZY_FIELD_KEY);
|
||||
SetBasedFieldSelector fieldSelector = new SetBasedFieldSelector(Collections.EMPTY_SET, lazyFieldNames);
|
||||
|
||||
for (int i = 0; i < length; i++) {
|
||||
reader = new FieldsReader(tmpDir, "test", fieldInfos);
|
||||
assertTrue(reader != null);
|
||||
assertTrue(reader.size() == 1);
|
||||
|
||||
Document doc;
|
||||
doc = reader.doc(0, null);//Load all of them
|
||||
assertTrue("doc is null and it shouldn't be", doc != null);
|
||||
Fieldable field = doc.getField(DocHelper.LARGE_LAZY_FIELD_KEY);
|
||||
assertTrue("field is lazy", field.isLazy() == false);
|
||||
String value;
|
||||
long start;
|
||||
long finish;
|
||||
start = System.currentTimeMillis();
|
||||
//On my machine this was always 0ms.
|
||||
value = field.stringValue();
|
||||
finish = System.currentTimeMillis();
|
||||
assertTrue("value is null and it shouldn't be", value != null);
|
||||
assertTrue("field is null and it shouldn't be", field != null);
|
||||
regularTime += (finish - start);
|
||||
reader.close();
|
||||
reader = null;
|
||||
doc = null;
|
||||
//Hmmm, are we still in cache???
|
||||
System.gc();
|
||||
reader = new FieldsReader(tmpDir, "test", fieldInfos);
|
||||
doc = reader.doc(0, fieldSelector);
|
||||
field = doc.getField(DocHelper.LARGE_LAZY_FIELD_KEY);
|
||||
assertTrue("field is not lazy", field.isLazy() == true);
|
||||
start = System.currentTimeMillis();
|
||||
//On my machine this took around 50 - 70ms
|
||||
value = field.stringValue();
|
||||
finish = System.currentTimeMillis();
|
||||
assertTrue("value is null and it shouldn't be", value != null);
|
||||
lazyTime += (finish - start);
|
||||
reader.close();
|
||||
|
||||
}
|
||||
System.out.println("Average Non-lazy time (should be very close to zero): " + regularTime / length + " ms for " + length + " reads");
|
||||
System.out.println("Average Lazy Time (should be greater than zero): " + lazyTime / length + " ms for " + length + " reads");
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
|
@ -22,16 +22,56 @@ import org.apache.lucene.store.IndexInput;
|
|||
import java.io.IOException;
|
||||
|
||||
public class TestIndexInput extends TestCase {
|
||||
public void testRead() throws IOException {
|
||||
IndexInput is = new MockIndexInput(new byte[] { (byte) 0x80, 0x01,
|
||||
(byte) 0xFF, 0x7F,
|
||||
(byte) 0x80, (byte) 0x80, 0x01,
|
||||
(byte) 0x81, (byte) 0x80, 0x01,
|
||||
0x06, 'L', 'u', 'c', 'e', 'n', 'e'});
|
||||
assertEquals(128,is.readVInt());
|
||||
assertEquals(16383,is.readVInt());
|
||||
assertEquals(16384,is.readVInt());
|
||||
assertEquals(16385,is.readVInt());
|
||||
assertEquals("Lucene",is.readString());
|
||||
}
|
||||
public void testRead() throws IOException {
|
||||
IndexInput is = new MockIndexInput(new byte[]{(byte) 0x80, 0x01,
|
||||
(byte) 0xFF, 0x7F,
|
||||
(byte) 0x80, (byte) 0x80, 0x01,
|
||||
(byte) 0x81, (byte) 0x80, 0x01,
|
||||
0x06, 'L', 'u', 'c', 'e', 'n', 'e'});
|
||||
assertEquals(128, is.readVInt());
|
||||
assertEquals(16383, is.readVInt());
|
||||
assertEquals(16384, is.readVInt());
|
||||
assertEquals(16385, is.readVInt());
|
||||
assertEquals("Lucene", is.readString());
|
||||
}
|
||||
|
||||
/**
|
||||
* Expert
|
||||
*
|
||||
* @throws IOException
|
||||
*/
|
||||
public void testSkipChars() throws IOException {
|
||||
byte[] bytes = new byte[]{(byte) 0x80, 0x01,
|
||||
(byte) 0xFF, 0x7F,
|
||||
(byte) 0x80, (byte) 0x80, 0x01,
|
||||
(byte) 0x81, (byte) 0x80, 0x01,
|
||||
0x06, 'L', 'u', 'c', 'e', 'n', 'e',
|
||||
};
|
||||
String utf8Str = "\u0634\u1ea1";
|
||||
byte [] utf8Bytes = utf8Str.getBytes("UTF-8");
|
||||
byte [] theBytes = new byte[bytes.length + 1 + utf8Bytes.length];
|
||||
System.arraycopy(bytes, 0, theBytes, 0, bytes.length);
|
||||
theBytes[bytes.length] = (byte)utf8Str.length();//Add in the number of chars we are storing, which should fit in a byte for this test
|
||||
System.arraycopy(utf8Bytes, 0, theBytes, bytes.length + 1, utf8Bytes.length);
|
||||
IndexInput is = new MockIndexInput(theBytes);
|
||||
assertEquals(128, is.readVInt());
|
||||
assertEquals(16383, is.readVInt());
|
||||
assertEquals(16384, is.readVInt());
|
||||
assertEquals(16385, is.readVInt());
|
||||
int charsToRead = is.readVInt();//number of chars in the Lucene string
|
||||
assertTrue(0x06 + " does not equal: " + charsToRead, 0x06 == charsToRead);
|
||||
is.skipChars(3);
|
||||
char [] chars = new char[3];//there should be 6 chars remaining
|
||||
is.readChars(chars, 0, 3);
|
||||
String tmpStr = new String(chars);
|
||||
assertTrue(tmpStr + " is not equal to " + "ene", tmpStr.equals("ene" ) == true);
|
||||
//Now read the UTF8 stuff
|
||||
charsToRead = is.readVInt() - 1;//since we are skipping one
|
||||
is.skipChars(1);
|
||||
assertTrue(utf8Str.length() - 1 + " does not equal: " + charsToRead, utf8Str.length() - 1 == charsToRead);
|
||||
chars = new char[charsToRead];
|
||||
is.readChars(chars, 0, charsToRead);
|
||||
tmpStr = new String(chars);
|
||||
assertTrue(tmpStr + " is not equal to " + utf8Str.substring(1), tmpStr.equals(utf8Str.substring(1)) == true);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -22,6 +22,8 @@ import org.apache.lucene.analysis.SimpleAnalyzer;
|
|||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.Field.Index;
|
||||
import org.apache.lucene.document.Field.Store;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.FSDirectory;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
|
@ -270,9 +272,9 @@ class IndexThread extends Thread {
|
|||
id++;
|
||||
}
|
||||
// add random stuff:
|
||||
doc.add(new Field("content", new Integer(random.nextInt(1000)).toString(), Field.Store.YES,
|
||||
doc.add(new Field("content", new Integer(random.nextInt(1000)).toString(), Field.Store.YES,
|
||||
Field.Index.TOKENIZED));
|
||||
doc.add(new Field("content", new Integer(random.nextInt(1000)).toString(), Field.Store.YES,
|
||||
doc.add(new Field("content", new Integer(random.nextInt(1000)).toString(), Field.Store.YES,
|
||||
Field.Index.TOKENIZED));
|
||||
doc.add(new Field("all", "x", Field.Store.YES, Field.Index.TOKENIZED));
|
||||
return doc;
|
||||
|
|
|
@ -16,20 +16,25 @@ package org.apache.lucene.index;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
|
||||
import junit.framework.TestCase;
|
||||
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.document.MapFieldSelector;
|
||||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.Hits;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.Searcher;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
import org.apache.lucene.search.*;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.Enumeration;
|
||||
|
||||
public class TestParallelReader extends TestCase {
|
||||
|
||||
|
@ -71,6 +76,35 @@ public class TestParallelReader extends TestCase {
|
|||
assertTrue(fieldNames.contains("f4"));
|
||||
}
|
||||
|
||||
public void testDocument() throws IOException {
|
||||
Directory dir1 = getDir1();
|
||||
Directory dir2 = getDir2();
|
||||
ParallelReader pr = new ParallelReader();
|
||||
pr.add(IndexReader.open(dir1));
|
||||
pr.add(IndexReader.open(dir2));
|
||||
|
||||
Document doc11 = pr.document(0, new MapFieldSelector(new String[] {"f1"}));
|
||||
Document doc24 = pr.document(1, new MapFieldSelector(Arrays.asList(new String[] {"f4"})));
|
||||
Document doc223 = pr.document(1, new MapFieldSelector(new String[] {"f2", "f3"}));
|
||||
|
||||
assertEquals(1, numFields(doc11));
|
||||
assertEquals(1, numFields(doc24));
|
||||
assertEquals(2, numFields(doc223));
|
||||
|
||||
assertEquals("v1", doc11.get("f1"));
|
||||
assertEquals("v2", doc24.get("f4"));
|
||||
assertEquals("v2", doc223.get("f2"));
|
||||
assertEquals("v2", doc223.get("f3"));
|
||||
}
|
||||
|
||||
private int numFields(Document doc) {
|
||||
int num;
|
||||
Enumeration e = doc.fields();
|
||||
for (num=0; e.hasMoreElements(); num++)
|
||||
e.nextElement();
|
||||
return num;
|
||||
}
|
||||
|
||||
public void testIncompatibleIndexes() throws IOException {
|
||||
// two documents:
|
||||
Directory dir1 = getDir1();
|
||||
|
|
|
@ -87,7 +87,7 @@ public class TestSegmentMerger extends TestCase {
|
|||
Collection stored = mergedReader.getFieldNames(IndexReader.FieldOption.INDEXED_WITH_TERMVECTOR);
|
||||
assertTrue(stored != null);
|
||||
//System.out.println("stored size: " + stored.size());
|
||||
assertTrue(stored.size() == 2);
|
||||
assertTrue("We do not have 4 fields that were indexed with term vector",stored.size() == 4);
|
||||
|
||||
TermFreqVector vector = mergedReader.getTermFreqVector(0, DocHelper.TEXT_FIELD_2_KEY);
|
||||
assertTrue(vector != null);
|
||||
|
|
|
@ -19,7 +19,7 @@ package org.apache.lucene.index;
|
|||
import junit.framework.TestCase;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.Fieldable;
|
||||
import org.apache.lucene.search.DefaultSimilarity;
|
||||
|
||||
import java.io.IOException;
|
||||
|
@ -64,7 +64,7 @@ public class TestSegmentReader extends TestCase {
|
|||
|
||||
Enumeration fields = result.fields();
|
||||
while (fields.hasMoreElements()) {
|
||||
Field field = (Field) fields.nextElement();
|
||||
Fieldable field = (Fieldable) fields.nextElement();
|
||||
assertTrue(field != null);
|
||||
assertTrue(DocHelper.nameValues.containsKey(field.name()));
|
||||
}
|
||||
|
@ -166,7 +166,7 @@ public class TestSegmentReader extends TestCase {
|
|||
public static void checkNorms(IndexReader reader) throws IOException {
|
||||
// test omit norms
|
||||
for (int i=0; i<DocHelper.fields.length; i++) {
|
||||
Field f = DocHelper.fields[i];
|
||||
Fieldable f = DocHelper.fields[i];
|
||||
if (f.isIndexed()) {
|
||||
assertEquals(reader.hasNorms(f.name()), !f.getOmitNorms());
|
||||
assertEquals(reader.hasNorms(f.name()), !DocHelper.noNorms.containsKey(f.name()));
|
||||
|
@ -202,7 +202,7 @@ public class TestSegmentReader extends TestCase {
|
|||
|
||||
TermFreqVector [] results = reader.getTermFreqVectors(0);
|
||||
assertTrue(results != null);
|
||||
assertTrue(results.length == 2);
|
||||
assertTrue("We do not have 4 term freq vectors, we have: " + results.length, results.length == 4);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -17,14 +17,11 @@ package org.apache.lucene.search;
|
|||
*/
|
||||
|
||||
import junit.framework.TestCase;
|
||||
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.apache.lucene.analysis.SimpleAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.*;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
|
||||
/** Document boost unit test.
|
||||
*
|
||||
|
@ -35,27 +32,27 @@ public class TestDocBoost extends TestCase {
|
|||
public TestDocBoost(String name) {
|
||||
super(name);
|
||||
}
|
||||
|
||||
|
||||
public void testDocBoost() throws Exception {
|
||||
RAMDirectory store = new RAMDirectory();
|
||||
IndexWriter writer = new IndexWriter(store, new SimpleAnalyzer(), true);
|
||||
|
||||
Field f1 = new Field("field", "word", Field.Store.YES, Field.Index.TOKENIZED);
|
||||
Field f2 = new Field("field", "word", Field.Store.YES, Field.Index.TOKENIZED);
|
||||
|
||||
Fieldable f1 = new Field("field", "word", Field.Store.YES, Field.Index.TOKENIZED);
|
||||
Fieldable f2 = new Field("field", "word", Field.Store.YES, Field.Index.TOKENIZED);
|
||||
f2.setBoost(2.0f);
|
||||
|
||||
|
||||
Document d1 = new Document();
|
||||
Document d2 = new Document();
|
||||
Document d3 = new Document();
|
||||
Document d4 = new Document();
|
||||
d3.setBoost(3.0f);
|
||||
d4.setBoost(2.0f);
|
||||
|
||||
|
||||
d1.add(f1); // boost = 1
|
||||
d2.add(f2); // boost = 2
|
||||
d3.add(f1); // boost = 3
|
||||
d4.add(f2); // boost = 4
|
||||
|
||||
|
||||
writer.addDocument(d1);
|
||||
writer.addDocument(d2);
|
||||
writer.addDocument(d3);
|
||||
|
@ -72,7 +69,7 @@ public class TestDocBoost extends TestCase {
|
|||
scores[doc] = score;
|
||||
}
|
||||
});
|
||||
|
||||
|
||||
float lastScore = 0.0f;
|
||||
|
||||
for (int i = 0; i < 4; i++) {
|
||||
|
|
|
@ -16,19 +16,17 @@ package org.apache.lucene.search;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import junit.framework.TestCase;
|
||||
|
||||
import org.apache.lucene.analysis.SimpleAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.*;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.TermFreqVector;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.apache.lucene.util.English;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* @author Bernhard Messer
|
||||
* @version $rcs = ' $Id$ ' ;
|
||||
|
@ -49,7 +47,7 @@ public class TestMultiThreadTermVectors extends TestCase {
|
|||
//writer.infoStream = System.out;
|
||||
for (int i = 0; i < numDocs; i++) {
|
||||
Document doc = new Document();
|
||||
Field fld = new Field("field", English.intToEnglish(i), Field.Store.YES, Field.Index.UN_TOKENIZED, Field.TermVector.YES);
|
||||
Fieldable fld = new Field("field", English.intToEnglish(i), Field.Store.YES, Field.Index.UN_TOKENIZED, Field.TermVector.YES);
|
||||
doc.add(fld);
|
||||
writer.addDocument(doc);
|
||||
}
|
||||
|
|
|
@ -16,22 +16,17 @@ package org.apache.lucene.search;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
|
||||
import junit.framework.TestCase;
|
||||
import org.apache.lucene.analysis.WhitespaceAnalyzer;
|
||||
import org.apache.lucene.analysis.StopAnalyzer;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.WhitespaceTokenizer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.analysis.*;
|
||||
import org.apache.lucene.document.*;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
|
||||
/**
|
||||
* Tests {@link PhraseQuery}.
|
||||
*
|
||||
|
@ -59,7 +54,7 @@ public class TestPhraseQuery extends TestCase {
|
|||
Document doc = new Document();
|
||||
doc.add(new Field("field", "one two three four five", Field.Store.YES, Field.Index.TOKENIZED));
|
||||
doc.add(new Field("repeated", "this is a repeated field - first part", Field.Store.YES, Field.Index.TOKENIZED));
|
||||
Field repeatedField = new Field("repeated", "second part of a repeated field", Field.Store.YES, Field.Index.TOKENIZED);
|
||||
Fieldable repeatedField = new Field("repeated", "second part of a repeated field", Field.Store.YES, Field.Index.TOKENIZED);
|
||||
doc.add(repeatedField);
|
||||
writer.addDocument(doc);
|
||||
|
||||
|
|
|
@ -17,15 +17,12 @@ package org.apache.lucene.search;
|
|||
*/
|
||||
|
||||
import junit.framework.TestCase;
|
||||
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.apache.lucene.analysis.SimpleAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.*;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
|
||||
/** Document boost unit test.
|
||||
*
|
||||
|
@ -36,13 +33,13 @@ public class TestSetNorm extends TestCase {
|
|||
public TestSetNorm(String name) {
|
||||
super(name);
|
||||
}
|
||||
|
||||
|
||||
public void testSetNorm() throws Exception {
|
||||
RAMDirectory store = new RAMDirectory();
|
||||
IndexWriter writer = new IndexWriter(store, new SimpleAnalyzer(), true);
|
||||
|
||||
|
||||
// add the same document four times
|
||||
Field f1 = new Field("field", "word", Field.Store.YES, Field.Index.TOKENIZED);
|
||||
Fieldable f1 = new Field("field", "word", Field.Store.YES, Field.Index.TOKENIZED);
|
||||
Document d1 = new Document();
|
||||
d1.add(f1);
|
||||
writer.addDocument(d1);
|
||||
|
@ -69,7 +66,7 @@ public class TestSetNorm extends TestCase {
|
|||
scores[doc] = score;
|
||||
}
|
||||
});
|
||||
|
||||
|
||||
float lastScore = 0.0f;
|
||||
|
||||
for (int i = 0; i < 4; i++) {
|
||||
|
|
Loading…
Reference in New Issue