LUCENE-2308: more cleanup

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1163251 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2011-08-30 15:05:14 +00:00
parent 7c92a0d48d
commit 5e4944b84b
9 changed files with 173 additions and 63 deletions

View File

@ -407,3 +407,115 @@ LUCENE-1458, LUCENE-2111: Flexible Indexing
- o.a.l.queryParser.QueryParserTokenManager -> o.a.l.queryparser.classic.QueryParserTokenManager
- o.a.l.queryParser.QueryParserToken -> o.a.l.queryparser.classic.Token
- o.a.l.queryParser.QueryParserTokenMgrError -> o.a.l.queryparser.classic.TokenMgrError
* LUCENE-2308: Separate FieldType from Field instances
With this change, the indexing details (indexed, tokenized, norms,
indexOptions, stored, etc.) are moved into a separate FieldType
instance (rather than being stored directly on the Field).
This means you can create the FieldType instance once, up front, for a
given field, and then re-use that instance whenever you instantiate
the Field.
Certain field types are pre-defined since they are common cases:
* StringField: indexes a String value as a single token (ie, does
not tokenize). This field turns off norms and indexes only doc
IDS (does not index term frequency nor positions). This field
does not store its value, but exposes TYPE_STORED as well.
* BinaryField: a byte[] value that's only stored.
* TextField: indexes and tokenizes a String, Reader or TokenStream
value, without term vectors. This field does not store its value,
but exposes TYPE_STORED as well.
If your usage fits one of those common cases you can simply
instantiate the above class. To use the TYPE_STORED variant, do this
instead:
Field f = new Field("field", StringField.TYPE_STORED, "value");
Alternatively, if an existing type is close to what you want but you
need to make a few changes, you can copy that type and make changes:
FieldType bodyType = new FieldType(TextField.TYPE_STORED);
bodyType.setStoreTermVectors(true);
You can of course also create your own FieldType from scratch:
FieldType t = new FieldType();
t.setIndexed(true);
t.setStored(true);
t.setOmitNorms(true);
t.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
FieldType has a freeze() method to prevent further changes. Note that
once a FieldType is bound to a Field, it's frozen, to help prevent
confusing bugs.
When migrating from the 3.x API, if you did this before:
new Field("field", "value", Field.Store.NO, Field.Indexed.NOT_ANALYZED_NO_NORMS)
you can now do this:
new StringField("field", "value")
(though note that StringField indexes DOCS_ONLY).
If instead the value was stored:
new Field("field", "value", Field.Store.YES, Field.Indexed.NOT_ANALYZED_NO_NORMS)
you can now do this:
new Field("field", StringField.TYPE_STORED, "value")
If you didn't omit norms:
new Field("field", "value", Field.Store.YES, Field.Indexed.NOT_ANALYZED)
you can now do this:
FieldType ft = new FieldType(StringField.TYPE_STORED);
ft.setOmitNorms(false);
new Field("field", ft, "value")
If you did this before (value can be String or Reader):
new Field("field", value, Field.Store.NO, Field.Indexed.ANALYZED)
you can now do this:
new TextField("field", value)
If instead the value was stored:
new Field("field", value, Field.Store.YES, Field.Indexed.ANALYZED)
you can now do this:
new Field("field", TextField.TYPE_STORED, value)
If in addition you omit norms:
new Field("field", value, Field.Store.YES, Field.Indexed.ANALYZED_NO_NORMS)
you can now do this:
FieldType ft = new FieldType(TextField.TYPE_STORED);
ft.setOmitNorms(true);
new Field("field", ft, value)
If you did this before (bytes is a byte[]):
new Field("field", bytes)
you can now do this:
new BinaryField("field", bytes)

View File

@ -174,7 +174,6 @@ public class FieldSelectorVisitor extends StoredFieldVisitor {
ft.setStored(true);
ft.setOmitNorms(fi.omitNorms);
ft.setIndexOptions(fi.indexOptions);
ft.setLazy(true);
if (binary) {
f = new LazyField(in, fi.name, ft, numBytes, pointer, binary, cacheResult);

View File

@ -19,6 +19,8 @@ import org.apache.lucene.util.BytesRef;
* limitations under the License.
*/
/** A field with byte[] value that is only stored. */
public final class BinaryField extends Field {
public static final FieldType TYPE_STORED = new FieldType();
@ -27,23 +29,18 @@ public final class BinaryField extends Field {
TYPE_STORED.freeze();
}
/** Creates a new BinaryField */
public BinaryField(String name, byte[] value) {
super(name, BinaryField.TYPE_STORED, value);
}
/** Creates a new BinaryField */
public BinaryField(String name, byte[] value, int offset, int length) {
super(name, BinaryField.TYPE_STORED, value, offset, length);
}
/** Creates a new BinaryField */
public BinaryField(String name, BytesRef bytes) {
super(name, BinaryField.TYPE_STORED, bytes);
}
public BinaryField(String name, FieldType custom, byte[] value) {
super(name, custom, value);
}
public boolean isNumeric() {
return false;
}
}

View File

@ -50,6 +50,7 @@ public class Field implements IndexableField {
public Field(String name, FieldType type) {
this.name = name;
this.type = type;
type.freeze();
}
public Field(String name, FieldType type, Reader reader) {
@ -63,6 +64,7 @@ public class Field implements IndexableField {
this.name = name;
this.fieldsData = reader;
this.type = type;
type.freeze();
}
public Field(String name, FieldType type, TokenStream tokenStream) {
@ -77,6 +79,7 @@ public class Field implements IndexableField {
this.fieldsData = null;
this.tokenStream = tokenStream;
this.type = type;
type.freeze();
}
public Field(String name, FieldType type, byte[] value) {
@ -87,12 +90,14 @@ public class Field implements IndexableField {
this.fieldsData = new BytesRef(value, offset, length);
this.type = type;
this.name = name;
type.freeze();
}
public Field(String name, FieldType type, BytesRef bytes) {
this.fieldsData = bytes;
this.type = type;
this.name = name;
type.freeze();
}
public Field(String name, FieldType type, String value) {
@ -114,6 +119,7 @@ public class Field implements IndexableField {
this.type = type;
this.name = name;
this.fieldsData = value;
type.freeze();
}
/**
@ -194,22 +200,6 @@ public class Field implements IndexableField {
fieldsData = new BytesRef(value);
}
/**
* Expert: change the value of this field. See <a
* href="#setValue(java.lang.String)">setValue(String)</a>.
*/
/*
public void setValue(byte[] value, int offset, int length) {
if (!isBinary) {
throw new IllegalArgumentException(
"cannot set a byte[] value on a non-binary field");
}
fieldsData = value;
binaryLength = length;
binaryOffset = offset;
}
*/
/**
* Expert: sets the token stream to be used for indexing and causes
* isIndexed() and isTokenized() to return true. May be combined with stored
@ -316,7 +306,7 @@ public class Field implements IndexableField {
result.append(name);
result.append(':');
if (fieldsData != null && type.lazy() == false) {
if (fieldsData != null) {
result.append(fieldsData);
}

View File

@ -29,7 +29,6 @@ public class FieldType {
private boolean storeTermVectorPositions;
private boolean omitNorms;
private IndexOptions indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
private boolean lazy;
private boolean frozen;
public FieldType(FieldType ref) {
@ -41,7 +40,7 @@ public class FieldType {
this.storeTermVectorPositions = ref.storeTermVectorPositions();
this.omitNorms = ref.omitNorms();
this.indexOptions = ref.indexOptions();
this.lazy = ref.lazy();
// Do not copy frozen!
}
public FieldType() {
@ -52,7 +51,9 @@ public class FieldType {
throw new IllegalStateException();
}
}
/** Prevents future changes. Note that when a FieldType
* is first bound to a Field instance, it is frozen. */
public void freeze() {
this.frozen = true;
}
@ -129,15 +130,6 @@ public class FieldType {
this.indexOptions = value;
}
public boolean lazy() {
return this.lazy;
}
public void setLazy(boolean value) {
checkIfFrozen();
this.lazy = value;
}
/** Prints a Field for human consumption. */
@Override
public final String toString() {
@ -177,9 +169,6 @@ public class FieldType {
result.append(",indexOptions=");
result.append(indexOptions);
}
if (lazy()){
result.append(",lazy");
}
return result.toString();
}

View File

@ -19,10 +19,26 @@ import org.apache.lucene.index.FieldInfo.IndexOptions;
* limitations under the License.
*/
/** A field that is indexed but not tokenized: the entire
* String value is indexed as a single token. For example
* this might be used for a 'country' field or an 'id'
* field, or any field that you intend to use for sorting
* or access through the field cache.
*
* <p/>This field's value is not stored by default; use the
* {@link TYPE_STORED} type (pass it to <code>new
* Field</code>) to store the value. */
public final class StringField extends Field {
/** Indexed, not tokenized, omits norms, indexes
* DOCS_ONLY, not stored. */
public static final FieldType TYPE_UNSTORED = new FieldType();
/** Indexed, not tokenized, omits norms, indexes
* DOCS_ONLY, stored */
public static final FieldType TYPE_STORED = new FieldType();
static {
TYPE_UNSTORED.setIndexed(true);
TYPE_UNSTORED.setOmitNorms(true);
@ -36,6 +52,7 @@ public final class StringField extends Field {
TYPE_STORED.freeze();
}
/** Creates a new un-stored StringField */
public StringField(String name, String value) {
super(name, TYPE_UNSTORED, value);
}
@ -44,8 +61,4 @@ public final class StringField extends Field {
public String stringValue() {
return (fieldsData == null) ? null : fieldsData.toString();
}
public boolean isNumeric() {
return false;
}
}

View File

@ -21,10 +21,22 @@ import java.io.Reader;
import org.apache.lucene.analysis.TokenStream;
/** A field that is indexed and tokenized, without term
* vectors. For example this would be used on a 'body'
* field, that contains the bulk of a document's text.
*
* This field's value is not stored by default; use the
* {@link TYPE_STORED} type (pass it to <code>new
* Field</code>) to store the value. */
public final class TextField extends Field {
/* Indexed, tokenized, not stored. */
public static final FieldType TYPE_UNSTORED = new FieldType();
/* Indexed, tokenized, stored. */
public static final FieldType TYPE_STORED = new FieldType();
static {
TYPE_UNSTORED.setIndexed(true);
TYPE_UNSTORED.setTokenized(true);
@ -35,20 +47,19 @@ public final class TextField extends Field {
TYPE_STORED.setTokenized(true);
TYPE_STORED.freeze();
}
/** Creates a new un-stored TextField */
public TextField(String name, Reader reader) {
super(name, TextField.TYPE_UNSTORED, reader);
}
/** Creates a new un-stored TextField */
public TextField(String name, String value) {
super(name, TextField.TYPE_UNSTORED, value);
}
/** Creates a new un-stored TextField */
public TextField(String name, TokenStream stream) {
super(name, TextField.TYPE_UNSTORED, stream);
}
public boolean isNumeric() {
return false;
}
}

View File

@ -56,8 +56,7 @@ to check if the results are what we expect):</p>
new IndexWriter.MaxFieldLength(25000));
Document doc = new Document();
String text = "This is the text to be indexed.";
doc.add(new Field("fieldname", text, Field.Store.YES,
Field.Index.ANALYZED));
doc.add(new Field("fieldname", TextField.TYPE_STORED, text));
iwriter.addDocument(doc);
iwriter.close();

View File

@ -179,8 +179,8 @@ public class DocMaker {
protected Config config;
protected final FieldType valType;
protected final FieldType bodyValType;
protected FieldType valType;
protected FieldType bodyValType;
protected ContentSource source;
protected boolean reuseFields;
@ -194,10 +194,6 @@ public class DocMaker {
private int printNum = 0;
public DocMaker() {
valType = new FieldType(TextField.TYPE_UNSTORED);
valType.setOmitNorms(true);
bodyValType = new FieldType(TextField.TYPE_UNSTORED);
}
// create a doc
@ -465,20 +461,24 @@ public class DocMaker {
boolean termVecPositions = config.get("doc.term.vector.positions", false);
boolean termVecOffsets = config.get("doc.term.vector.offsets", false);
valType = new FieldType(TextField.TYPE_UNSTORED);
valType.setStored(stored);
bodyValType.setStored(bodyStored);
valType.setTokenized(tokenized);
valType.setOmitNorms(!norms);
bodyValType.setTokenized(bodyTokenized);
bodyValType.setOmitNorms(!bodyNorms);
valType.setStoreTermVectors(termVec);
valType.setStoreTermVectorPositions(termVecPositions);
valType.setStoreTermVectorOffsets(termVecOffsets);
valType.freeze();
bodyValType = new FieldType(TextField.TYPE_UNSTORED);
bodyValType.setStored(bodyStored);
bodyValType.setTokenized(bodyTokenized);
bodyValType.setOmitNorms(!bodyNorms);
bodyValType.setStoreTermVectors(termVec);
bodyValType.setStoreTermVectorPositions(termVecPositions);
bodyValType.setStoreTermVectorOffsets(termVecOffsets);
bodyValType.freeze();
storeBytes = config.get("doc.store.body.bytes", false);
reuseFields = config.get("doc.reuse.fields", true);