mirror of https://github.com/apache/lucene.git
LUCENE-2308: more cleanup
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1163251 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
7c92a0d48d
commit
5e4944b84b
|
@ -407,3 +407,115 @@ LUCENE-1458, LUCENE-2111: Flexible Indexing
|
|||
- o.a.l.queryParser.QueryParserTokenManager -> o.a.l.queryparser.classic.QueryParserTokenManager
|
||||
- o.a.l.queryParser.QueryParserToken -> o.a.l.queryparser.classic.Token
|
||||
- o.a.l.queryParser.QueryParserTokenMgrError -> o.a.l.queryparser.classic.TokenMgrError
|
||||
|
||||
|
||||
|
||||
* LUCENE-2308: Separate FieldType from Field instances
|
||||
|
||||
With this change, the indexing details (indexed, tokenized, norms,
|
||||
indexOptions, stored, etc.) are moved into a separate FieldType
|
||||
instance (rather than being stored directly on the Field).
|
||||
|
||||
This means you can create the FieldType instance once, up front, for a
|
||||
given field, and then re-use that instance whenever you instantiate
|
||||
the Field.
|
||||
|
||||
Certain field types are pre-defined since they are common cases:
|
||||
|
||||
* StringField: indexes a String value as a single token (ie, does
|
||||
not tokenize). This field turns off norms and indexes only doc
|
||||
IDS (does not index term frequency nor positions). This field
|
||||
does not store its value, but exposes TYPE_STORED as well.
|
||||
|
||||
* BinaryField: a byte[] value that's only stored.
|
||||
|
||||
* TextField: indexes and tokenizes a String, Reader or TokenStream
|
||||
value, without term vectors. This field does not store its value,
|
||||
but exposes TYPE_STORED as well.
|
||||
|
||||
If your usage fits one of those common cases you can simply
|
||||
instantiate the above class. To use the TYPE_STORED variant, do this
|
||||
instead:
|
||||
|
||||
Field f = new Field("field", StringField.TYPE_STORED, "value");
|
||||
|
||||
Alternatively, if an existing type is close to what you want but you
|
||||
need to make a few changes, you can copy that type and make changes:
|
||||
|
||||
FieldType bodyType = new FieldType(TextField.TYPE_STORED);
|
||||
bodyType.setStoreTermVectors(true);
|
||||
|
||||
|
||||
You can of course also create your own FieldType from scratch:
|
||||
|
||||
FieldType t = new FieldType();
|
||||
t.setIndexed(true);
|
||||
t.setStored(true);
|
||||
t.setOmitNorms(true);
|
||||
t.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
|
||||
|
||||
FieldType has a freeze() method to prevent further changes. Note that
|
||||
once a FieldType is bound to a Field, it's frozen, to help prevent
|
||||
confusing bugs.
|
||||
|
||||
When migrating from the 3.x API, if you did this before:
|
||||
|
||||
new Field("field", "value", Field.Store.NO, Field.Indexed.NOT_ANALYZED_NO_NORMS)
|
||||
|
||||
you can now do this:
|
||||
|
||||
new StringField("field", "value")
|
||||
|
||||
(though note that StringField indexes DOCS_ONLY).
|
||||
|
||||
If instead the value was stored:
|
||||
|
||||
new Field("field", "value", Field.Store.YES, Field.Indexed.NOT_ANALYZED_NO_NORMS)
|
||||
|
||||
you can now do this:
|
||||
|
||||
new Field("field", StringField.TYPE_STORED, "value")
|
||||
|
||||
If you didn't omit norms:
|
||||
|
||||
new Field("field", "value", Field.Store.YES, Field.Indexed.NOT_ANALYZED)
|
||||
|
||||
you can now do this:
|
||||
|
||||
FieldType ft = new FieldType(StringField.TYPE_STORED);
|
||||
ft.setOmitNorms(false);
|
||||
new Field("field", ft, "value")
|
||||
|
||||
If you did this before (value can be String or Reader):
|
||||
|
||||
new Field("field", value, Field.Store.NO, Field.Indexed.ANALYZED)
|
||||
|
||||
you can now do this:
|
||||
|
||||
new TextField("field", value)
|
||||
|
||||
If instead the value was stored:
|
||||
|
||||
new Field("field", value, Field.Store.YES, Field.Indexed.ANALYZED)
|
||||
|
||||
you can now do this:
|
||||
|
||||
new Field("field", TextField.TYPE_STORED, value)
|
||||
|
||||
If in addition you omit norms:
|
||||
|
||||
new Field("field", value, Field.Store.YES, Field.Indexed.ANALYZED_NO_NORMS)
|
||||
|
||||
you can now do this:
|
||||
|
||||
FieldType ft = new FieldType(TextField.TYPE_STORED);
|
||||
ft.setOmitNorms(true);
|
||||
new Field("field", ft, value)
|
||||
|
||||
If you did this before (bytes is a byte[]):
|
||||
|
||||
new Field("field", bytes)
|
||||
|
||||
you can now do this:
|
||||
|
||||
new BinaryField("field", bytes)
|
||||
|
|
|
@ -174,7 +174,6 @@ public class FieldSelectorVisitor extends StoredFieldVisitor {
|
|||
ft.setStored(true);
|
||||
ft.setOmitNorms(fi.omitNorms);
|
||||
ft.setIndexOptions(fi.indexOptions);
|
||||
ft.setLazy(true);
|
||||
|
||||
if (binary) {
|
||||
f = new LazyField(in, fi.name, ft, numBytes, pointer, binary, cacheResult);
|
||||
|
|
|
@ -19,6 +19,8 @@ import org.apache.lucene.util.BytesRef;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/** A field with byte[] value that is only stored. */
|
||||
|
||||
public final class BinaryField extends Field {
|
||||
|
||||
public static final FieldType TYPE_STORED = new FieldType();
|
||||
|
@ -27,23 +29,18 @@ public final class BinaryField extends Field {
|
|||
TYPE_STORED.freeze();
|
||||
}
|
||||
|
||||
/** Creates a new BinaryField */
|
||||
public BinaryField(String name, byte[] value) {
|
||||
super(name, BinaryField.TYPE_STORED, value);
|
||||
}
|
||||
|
||||
/** Creates a new BinaryField */
|
||||
public BinaryField(String name, byte[] value, int offset, int length) {
|
||||
super(name, BinaryField.TYPE_STORED, value, offset, length);
|
||||
}
|
||||
|
||||
/** Creates a new BinaryField */
|
||||
public BinaryField(String name, BytesRef bytes) {
|
||||
super(name, BinaryField.TYPE_STORED, bytes);
|
||||
}
|
||||
|
||||
public BinaryField(String name, FieldType custom, byte[] value) {
|
||||
super(name, custom, value);
|
||||
}
|
||||
|
||||
public boolean isNumeric() {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -50,6 +50,7 @@ public class Field implements IndexableField {
|
|||
public Field(String name, FieldType type) {
|
||||
this.name = name;
|
||||
this.type = type;
|
||||
type.freeze();
|
||||
}
|
||||
|
||||
public Field(String name, FieldType type, Reader reader) {
|
||||
|
@ -63,6 +64,7 @@ public class Field implements IndexableField {
|
|||
this.name = name;
|
||||
this.fieldsData = reader;
|
||||
this.type = type;
|
||||
type.freeze();
|
||||
}
|
||||
|
||||
public Field(String name, FieldType type, TokenStream tokenStream) {
|
||||
|
@ -77,6 +79,7 @@ public class Field implements IndexableField {
|
|||
this.fieldsData = null;
|
||||
this.tokenStream = tokenStream;
|
||||
this.type = type;
|
||||
type.freeze();
|
||||
}
|
||||
|
||||
public Field(String name, FieldType type, byte[] value) {
|
||||
|
@ -87,12 +90,14 @@ public class Field implements IndexableField {
|
|||
this.fieldsData = new BytesRef(value, offset, length);
|
||||
this.type = type;
|
||||
this.name = name;
|
||||
type.freeze();
|
||||
}
|
||||
|
||||
public Field(String name, FieldType type, BytesRef bytes) {
|
||||
this.fieldsData = bytes;
|
||||
this.type = type;
|
||||
this.name = name;
|
||||
type.freeze();
|
||||
}
|
||||
|
||||
public Field(String name, FieldType type, String value) {
|
||||
|
@ -114,6 +119,7 @@ public class Field implements IndexableField {
|
|||
this.type = type;
|
||||
this.name = name;
|
||||
this.fieldsData = value;
|
||||
type.freeze();
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -194,22 +200,6 @@ public class Field implements IndexableField {
|
|||
fieldsData = new BytesRef(value);
|
||||
}
|
||||
|
||||
/**
|
||||
* Expert: change the value of this field. See <a
|
||||
* href="#setValue(java.lang.String)">setValue(String)</a>.
|
||||
*/
|
||||
/*
|
||||
public void setValue(byte[] value, int offset, int length) {
|
||||
if (!isBinary) {
|
||||
throw new IllegalArgumentException(
|
||||
"cannot set a byte[] value on a non-binary field");
|
||||
}
|
||||
fieldsData = value;
|
||||
binaryLength = length;
|
||||
binaryOffset = offset;
|
||||
}
|
||||
*/
|
||||
|
||||
/**
|
||||
* Expert: sets the token stream to be used for indexing and causes
|
||||
* isIndexed() and isTokenized() to return true. May be combined with stored
|
||||
|
@ -316,7 +306,7 @@ public class Field implements IndexableField {
|
|||
result.append(name);
|
||||
result.append(':');
|
||||
|
||||
if (fieldsData != null && type.lazy() == false) {
|
||||
if (fieldsData != null) {
|
||||
result.append(fieldsData);
|
||||
}
|
||||
|
||||
|
|
|
@ -29,7 +29,6 @@ public class FieldType {
|
|||
private boolean storeTermVectorPositions;
|
||||
private boolean omitNorms;
|
||||
private IndexOptions indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
|
||||
private boolean lazy;
|
||||
private boolean frozen;
|
||||
|
||||
public FieldType(FieldType ref) {
|
||||
|
@ -41,7 +40,7 @@ public class FieldType {
|
|||
this.storeTermVectorPositions = ref.storeTermVectorPositions();
|
||||
this.omitNorms = ref.omitNorms();
|
||||
this.indexOptions = ref.indexOptions();
|
||||
this.lazy = ref.lazy();
|
||||
// Do not copy frozen!
|
||||
}
|
||||
|
||||
public FieldType() {
|
||||
|
@ -52,7 +51,9 @@ public class FieldType {
|
|||
throw new IllegalStateException();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/** Prevents future changes. Note that when a FieldType
|
||||
* is first bound to a Field instance, it is frozen. */
|
||||
public void freeze() {
|
||||
this.frozen = true;
|
||||
}
|
||||
|
@ -129,15 +130,6 @@ public class FieldType {
|
|||
this.indexOptions = value;
|
||||
}
|
||||
|
||||
public boolean lazy() {
|
||||
return this.lazy;
|
||||
}
|
||||
|
||||
public void setLazy(boolean value) {
|
||||
checkIfFrozen();
|
||||
this.lazy = value;
|
||||
}
|
||||
|
||||
/** Prints a Field for human consumption. */
|
||||
@Override
|
||||
public final String toString() {
|
||||
|
@ -177,9 +169,6 @@ public class FieldType {
|
|||
result.append(",indexOptions=");
|
||||
result.append(indexOptions);
|
||||
}
|
||||
if (lazy()){
|
||||
result.append(",lazy");
|
||||
}
|
||||
|
||||
return result.toString();
|
||||
}
|
||||
|
|
|
@ -19,10 +19,26 @@ import org.apache.lucene.index.FieldInfo.IndexOptions;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/** A field that is indexed but not tokenized: the entire
|
||||
* String value is indexed as a single token. For example
|
||||
* this might be used for a 'country' field or an 'id'
|
||||
* field, or any field that you intend to use for sorting
|
||||
* or access through the field cache.
|
||||
*
|
||||
* <p/>This field's value is not stored by default; use the
|
||||
* {@link TYPE_STORED} type (pass it to <code>new
|
||||
* Field</code>) to store the value. */
|
||||
|
||||
public final class StringField extends Field {
|
||||
|
||||
/** Indexed, not tokenized, omits norms, indexes
|
||||
* DOCS_ONLY, not stored. */
|
||||
public static final FieldType TYPE_UNSTORED = new FieldType();
|
||||
|
||||
/** Indexed, not tokenized, omits norms, indexes
|
||||
* DOCS_ONLY, stored */
|
||||
public static final FieldType TYPE_STORED = new FieldType();
|
||||
|
||||
static {
|
||||
TYPE_UNSTORED.setIndexed(true);
|
||||
TYPE_UNSTORED.setOmitNorms(true);
|
||||
|
@ -36,6 +52,7 @@ public final class StringField extends Field {
|
|||
TYPE_STORED.freeze();
|
||||
}
|
||||
|
||||
/** Creates a new un-stored StringField */
|
||||
public StringField(String name, String value) {
|
||||
super(name, TYPE_UNSTORED, value);
|
||||
}
|
||||
|
@ -44,8 +61,4 @@ public final class StringField extends Field {
|
|||
public String stringValue() {
|
||||
return (fieldsData == null) ? null : fieldsData.toString();
|
||||
}
|
||||
|
||||
public boolean isNumeric() {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -21,10 +21,22 @@ import java.io.Reader;
|
|||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
|
||||
/** A field that is indexed and tokenized, without term
|
||||
* vectors. For example this would be used on a 'body'
|
||||
* field, that contains the bulk of a document's text.
|
||||
*
|
||||
* This field's value is not stored by default; use the
|
||||
* {@link TYPE_STORED} type (pass it to <code>new
|
||||
* Field</code>) to store the value. */
|
||||
|
||||
public final class TextField extends Field {
|
||||
|
||||
/* Indexed, tokenized, not stored. */
|
||||
public static final FieldType TYPE_UNSTORED = new FieldType();
|
||||
|
||||
/* Indexed, tokenized, stored. */
|
||||
public static final FieldType TYPE_STORED = new FieldType();
|
||||
|
||||
static {
|
||||
TYPE_UNSTORED.setIndexed(true);
|
||||
TYPE_UNSTORED.setTokenized(true);
|
||||
|
@ -35,20 +47,19 @@ public final class TextField extends Field {
|
|||
TYPE_STORED.setTokenized(true);
|
||||
TYPE_STORED.freeze();
|
||||
}
|
||||
|
||||
|
||||
/** Creates a new un-stored TextField */
|
||||
public TextField(String name, Reader reader) {
|
||||
super(name, TextField.TYPE_UNSTORED, reader);
|
||||
}
|
||||
|
||||
/** Creates a new un-stored TextField */
|
||||
public TextField(String name, String value) {
|
||||
super(name, TextField.TYPE_UNSTORED, value);
|
||||
}
|
||||
|
||||
/** Creates a new un-stored TextField */
|
||||
public TextField(String name, TokenStream stream) {
|
||||
super(name, TextField.TYPE_UNSTORED, stream);
|
||||
}
|
||||
|
||||
public boolean isNumeric() {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -56,8 +56,7 @@ to check if the results are what we expect):</p>
|
|||
new IndexWriter.MaxFieldLength(25000));
|
||||
Document doc = new Document();
|
||||
String text = "This is the text to be indexed.";
|
||||
doc.add(new Field("fieldname", text, Field.Store.YES,
|
||||
Field.Index.ANALYZED));
|
||||
doc.add(new Field("fieldname", TextField.TYPE_STORED, text));
|
||||
iwriter.addDocument(doc);
|
||||
iwriter.close();
|
||||
|
||||
|
|
|
@ -179,8 +179,8 @@ public class DocMaker {
|
|||
|
||||
protected Config config;
|
||||
|
||||
protected final FieldType valType;
|
||||
protected final FieldType bodyValType;
|
||||
protected FieldType valType;
|
||||
protected FieldType bodyValType;
|
||||
|
||||
protected ContentSource source;
|
||||
protected boolean reuseFields;
|
||||
|
@ -194,10 +194,6 @@ public class DocMaker {
|
|||
private int printNum = 0;
|
||||
|
||||
public DocMaker() {
|
||||
valType = new FieldType(TextField.TYPE_UNSTORED);
|
||||
valType.setOmitNorms(true);
|
||||
|
||||
bodyValType = new FieldType(TextField.TYPE_UNSTORED);
|
||||
}
|
||||
|
||||
// create a doc
|
||||
|
@ -465,20 +461,24 @@ public class DocMaker {
|
|||
boolean termVecPositions = config.get("doc.term.vector.positions", false);
|
||||
boolean termVecOffsets = config.get("doc.term.vector.offsets", false);
|
||||
|
||||
valType = new FieldType(TextField.TYPE_UNSTORED);
|
||||
valType.setStored(stored);
|
||||
bodyValType.setStored(bodyStored);
|
||||
valType.setTokenized(tokenized);
|
||||
valType.setOmitNorms(!norms);
|
||||
bodyValType.setTokenized(bodyTokenized);
|
||||
bodyValType.setOmitNorms(!bodyNorms);
|
||||
|
||||
valType.setStoreTermVectors(termVec);
|
||||
valType.setStoreTermVectorPositions(termVecPositions);
|
||||
valType.setStoreTermVectorOffsets(termVecOffsets);
|
||||
valType.freeze();
|
||||
|
||||
bodyValType = new FieldType(TextField.TYPE_UNSTORED);
|
||||
bodyValType.setStored(bodyStored);
|
||||
bodyValType.setTokenized(bodyTokenized);
|
||||
bodyValType.setOmitNorms(!bodyNorms);
|
||||
bodyValType.setStoreTermVectors(termVec);
|
||||
bodyValType.setStoreTermVectorPositions(termVecPositions);
|
||||
bodyValType.setStoreTermVectorOffsets(termVecOffsets);
|
||||
|
||||
bodyValType.freeze();
|
||||
|
||||
storeBytes = config.get("doc.store.body.bytes", false);
|
||||
|
||||
reuseFields = config.get("doc.reuse.fields", true);
|
||||
|
|
Loading…
Reference in New Issue