From 9de01b56ebf252ffefe05e606e330a1787b94c9d Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Sun, 15 Jan 2012 23:05:13 +0000 Subject: [PATCH] LUCENE-3453: simplify DocValues/Field API git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1231791 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/MIGRATE.txt | 25 +- .../org/apache/lucene/demo/IndexFiles.java | 4 +- .../search/highlight/HighlighterTest.java | 22 +- .../apache/lucene/document/LazyDocument.java | 39 - .../lucene/spatial/tier/TestCartesian.java | 36 +- .../lucene/spatial/tier/TestDistance.java | 24 +- .../lucene/codecs/DocValuesConsumer.java | 168 ++-- .../lucene40/Lucene40StoredFieldsReader.java | 1 - .../lucene40/Lucene40StoredFieldsWriter.java | 63 +- .../lucene/codecs/lucene40/values/Bytes.java | 19 +- .../values/FixedStraightBytesImpl.java | 39 +- .../lucene/codecs/lucene40/values/Floats.java | 12 +- .../lucene/codecs/lucene40/values/Ints.java | 14 +- .../lucene40/values/PackedIntValues.java | 16 +- .../lucene40/values/VarStraightBytesImpl.java | 28 +- .../simpletext/SimpleTextNormsConsumer.java | 6 +- .../SimpleTextStoredFieldsWriter.java | 67 +- .../apache/lucene/document/BinaryField.java | 46 -- .../lucene/document/DocValuesField.java | 370 ++------- .../document/DocumentStoredFieldVisitor.java | 18 +- .../org/apache/lucene/document/Field.java | 782 +++++++++++++++--- .../org/apache/lucene/document/FieldType.java | 58 +- .../apache/lucene/document/NumericField.java | 333 ++------ .../apache/lucene/document/StoredField.java | 71 ++ .../apache/lucene/document/StringField.java | 4 +- .../org/apache/lucene/document/TextField.java | 2 + .../lucene/index/DocFieldProcessor.java | 32 +- .../org/apache/lucene/index/DocValue.java | 53 -- .../org/apache/lucene/index/DocValues.java | 3 +- .../apache/lucene/index/IndexableField.java | 36 +- .../lucene/index/IndexableFieldType.java | 4 + .../apache/lucene/index/NormsConsumer.java | 5 +- .../lucene/index/NormsConsumerPerField.java | 4 +- .../preflexrw/PreFlexNormsConsumer.java | 8 +- .../org/apache/lucene/index/DocHelper.java | 8 +- .../lucene/index/RandomIndexWriter.java | 31 +- .../lucene/codecs/lucene40/TestDocValues.java | 78 +- .../lucene/document/TestBinaryDocument.java | 6 +- .../apache/lucene/document/TestDocument.java | 120 ++- .../apache/lucene/index/TestAddIndexes.java | 8 +- .../index/TestBackwardsCompatibility.java | 8 +- .../index/TestConsistentFieldNumbers.java | 6 +- .../apache/lucene/index/TestDocTermOrds.java | 6 +- .../lucene/index/TestDocValuesIndexing.java | 111 +-- .../lucene/index/TestDuelingCodecs.java | 2 - .../apache/lucene/index/TestFieldsReader.java | 23 +- .../apache/lucene/index/TestIndexReader.java | 4 +- .../apache/lucene/index/TestIndexWriter.java | 14 +- .../lucene/index/TestIndexableField.java | 77 +- .../apache/lucene/index/TestTermsEnum.java | 2 +- .../lucene/index/TestTypePromotion.java | 121 +-- .../lucene/search/TestDocValuesScoring.java | 13 +- .../apache/lucene/search/TestFieldCache.java | 2 +- .../TestMultiValuedNumericRangeQuery.java | 2 +- .../search/TestNumericRangeQuery32.java | 96 ++- .../search/TestNumericRangeQuery64.java | 112 ++- .../org/apache/lucene/search/TestSort.java | 44 +- .../lucene/search/TestTopDocsMerge.java | 4 +- .../benchmark/byTask/feeds/DocMaker.java | 45 +- .../grouping/AllGroupHeadsCollectorTest.java | 12 +- .../grouping/AllGroupsCollectorTest.java | 4 +- .../lucene/search/grouping/TestGrouping.java | 14 +- .../lucene/search/join/TestBlockJoin.java | 8 +- .../standard/TestNumericQueryParser.java | 42 +- .../lucene/queryparser/xml/TestParser.java | 4 +- .../transform/BaseEditorialTransformer.java | 12 +- .../org/apache/solr/schema/BinaryField.java | 2 +- .../org/apache/solr/schema/TrieField.java | 47 +- .../apache/solr/search/SolrIndexSearcher.java | 20 +- .../org/apache/solr/schema/PolyFieldTest.java | 3 +- 70 files changed, 1917 insertions(+), 1606 deletions(-) delete mode 100644 lucene/src/java/org/apache/lucene/document/BinaryField.java create mode 100644 lucene/src/java/org/apache/lucene/document/StoredField.java delete mode 100644 lucene/src/java/org/apache/lucene/index/DocValue.java diff --git a/lucene/MIGRATE.txt b/lucene/MIGRATE.txt index 35b8b0fe56d..fe47605456d 100644 --- a/lucene/MIGRATE.txt +++ b/lucene/MIGRATE.txt @@ -422,13 +422,13 @@ LUCENE-1458, LUCENE-2111: Flexible Indexing -* LUCENE-2308: Separate IndexableFieldType from Field instances +* LUCENE-2308,LUCENE-3453: Separate IndexableFieldType from Field instances With this change, the indexing details (indexed, tokenized, norms, indexOptions, stored, etc.) are moved into a separate FieldType instance (rather than being stored directly on the Field). -This means you can create the IndexableFieldType instance once, up front, +This means you can create the FieldType instance once, up front, for a given field, and then re-use that instance whenever you instantiate the Field. @@ -439,15 +439,21 @@ Certain field types are pre-defined since they are common cases: IDS (does not index term frequency nor positions). This field does not store its value, but exposes TYPE_STORED as well. - * BinaryField: a byte[] value that's only stored. - * TextField: indexes and tokenizes a String, Reader or TokenStream value, without term vectors. This field does not store its value, but exposes TYPE_STORED as well. + * StoredField: field that stores its value + + * DocValuesField: indexes the value as a DocValues field + + * NumericField: indexes the numeric value so that NumericRangeQuery + can be used at search-time. + If your usage fits one of those common cases you can simply -instantiate the above class. To use the TYPE_STORED variant, do this -instead: +instantiate the above class. If you need to store the value, you can +add a separate StoredField to the document, or you can use +TYPE_STORED for the field: Field f = new Field("field", "value", StringField.TYPE_STORED); @@ -465,9 +471,14 @@ You can of course also create your own FieldType from scratch: t.setStored(true); t.setOmitNorms(true); t.setIndexOptions(IndexOptions.DOCS_AND_FREQS); + t.freeze(); FieldType has a freeze() method to prevent further changes. +There is also a deprecated transition API, providing the same Index, +Store, TermVector enums from 3.x, and Field constructors taking these +enums. + When migrating from the 3.x API, if you did this before: new Field("field", "value", Field.Store.NO, Field.Indexed.NOT_ANALYZED_NO_NORMS) @@ -528,7 +539,7 @@ If you did this before (bytes is a byte[]): you can now do this: - new BinaryField("field", bytes) + new StoredField("field", bytes) * LUCENE-3396: Analyzer.tokenStream() and .reusableTokenStream() have been made final. It is now necessary to use Analyzer.TokenStreamComponents to define an analysis process. diff --git a/lucene/contrib/demo/src/java/org/apache/lucene/demo/IndexFiles.java b/lucene/contrib/demo/src/java/org/apache/lucene/demo/IndexFiles.java index 2094acef1a2..173976f678a 100644 --- a/lucene/contrib/demo/src/java/org/apache/lucene/demo/IndexFiles.java +++ b/lucene/contrib/demo/src/java/org/apache/lucene/demo/IndexFiles.java @@ -184,9 +184,7 @@ public class IndexFiles { // year/month/day/hour/minutes/seconds, down the resolution you require. // For example the long value 2011021714 would mean // February 17, 2011, 2-3 PM. - NumericField modifiedField = new NumericField("modified"); - modifiedField.setLongValue(file.lastModified()); - doc.add(modifiedField); + doc.add(new NumericField("modified", file.lastModified())); // Add the contents of the file to a field named "contents". Specify a Reader, // so that the text of the file is tokenized and indexed, but not stored. diff --git a/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java b/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java index d7dcaa511f1..d8278da9c90 100644 --- a/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java +++ b/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java @@ -386,7 +386,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte Highlighter highlighter = new Highlighter(this, scorer); for (int i = 0; i < hits.totalHits; i++) { - String text = searcher.doc(hits.scoreDocs[i].doc).get(NUMERIC_FIELD_NAME); + String text = searcher.doc(hits.scoreDocs[i].doc).getField(NUMERIC_FIELD_NAME).numericValue().toString(); TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text)); highlighter.setTextFragmenter(new SimpleFragmenter(40)); @@ -1738,25 +1738,21 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte addDoc(writer, text); } Document doc = new Document(); - NumericField nfield = new NumericField(NUMERIC_FIELD_NAME, NumericField.TYPE_STORED); - nfield.setIntValue(1); - doc.add(nfield); + doc.add(new NumericField(NUMERIC_FIELD_NAME, 1, NumericField.getFieldType(NumericField.DataType.INT, true))); writer.addDocument(doc, analyzer); - nfield = new NumericField(NUMERIC_FIELD_NAME, NumericField.TYPE_STORED); - nfield.setIntValue(3); + doc = new Document(); - doc.add(nfield); + doc.add(new NumericField(NUMERIC_FIELD_NAME, 3, NumericField.getFieldType(NumericField.DataType.INT, true))); writer.addDocument(doc, analyzer); - nfield = new NumericField(NUMERIC_FIELD_NAME, NumericField.TYPE_STORED); - nfield.setIntValue(5); + doc = new Document(); - doc.add(nfield); + doc.add(new NumericField(NUMERIC_FIELD_NAME, 5, NumericField.getFieldType(NumericField.DataType.INT, true))); writer.addDocument(doc, analyzer); - nfield = new NumericField(NUMERIC_FIELD_NAME, NumericField.TYPE_STORED); - nfield.setIntValue(7); + doc = new Document(); - doc.add(nfield); + doc.add(new NumericField(NUMERIC_FIELD_NAME, 7, NumericField.getFieldType(NumericField.DataType.INT, true))); writer.addDocument(doc, analyzer); + writer.forceMerge(1); writer.close(); reader = IndexReader.open(ramDir); diff --git a/lucene/contrib/misc/src/java/org/apache/lucene/document/LazyDocument.java b/lucene/contrib/misc/src/java/org/apache/lucene/document/LazyDocument.java index b7217086328..06563cdf3e3 100644 --- a/lucene/contrib/misc/src/java/org/apache/lucene/document/LazyDocument.java +++ b/lucene/contrib/misc/src/java/org/apache/lucene/document/LazyDocument.java @@ -23,13 +23,10 @@ import java.util.Map; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.document.NumericField.DataType; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.IndexableFieldType; -import org.apache.lucene.index.DocValue; -import org.apache.lucene.index.DocValues; import org.apache.lucene.util.BytesRef; /** Defers actually loading a field's value until you ask @@ -120,24 +117,6 @@ public class LazyDocument { } } - @Override - public boolean numeric() { - if (num == 0) { - return getDocument().getField(name).numeric(); - } else { - return getDocument().getFields(name)[num].numeric(); - } - } - - @Override - public DataType numericDataType() { - if (num == 0) { - return getDocument().getField(name).numericDataType(); - } else { - return getDocument().getFields(name)[num].numericDataType(); - } - } - @Override public Number numericValue() { if (num == 0) { @@ -156,24 +135,6 @@ public class LazyDocument { } } - @Override - public DocValue docValue() { - if (num == 0) { - return getDocument().getField(name).docValue(); - } else { - return getDocument().getFields(name)[num].docValue(); - } - } - - @Override - public DocValues.Type docValueType() { - if (num == 0) { - return getDocument().getField(name).docValueType(); - } else { - return getDocument().getFields(name)[num].docValueType(); - } - } - @Override public TokenStream tokenStream(Analyzer analyzer) throws IOException { if (num == 0) { diff --git a/lucene/contrib/spatial/src/test/org/apache/lucene/spatial/tier/TestCartesian.java b/lucene/contrib/spatial/src/test/org/apache/lucene/spatial/tier/TestCartesian.java index d17cf7a25e0..735ed9802a8 100644 --- a/lucene/contrib/spatial/src/test/org/apache/lucene/spatial/tier/TestCartesian.java +++ b/lucene/contrib/spatial/src/test/org/apache/lucene/spatial/tier/TestCartesian.java @@ -23,9 +23,11 @@ import java.util.Map; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; +import org.apache.lucene.document.FieldType; import org.apache.lucene.document.NumericField; import org.apache.lucene.document.StringField; import org.apache.lucene.document.TextField; +import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.Term; @@ -91,6 +93,18 @@ public class TestCartesian extends LuceneTestCase { } } + private static final FieldType latLongType = new FieldType(); + static { + latLongType.setIndexed(true); + latLongType.setStored(true); + latLongType.setTokenized(true); + latLongType.setOmitNorms(true); + latLongType.setIndexOptions(IndexOptions.DOCS_ONLY); + latLongType.setNumericType(NumericField.DataType.DOUBLE); + latLongType.setNumericPrecisionStep(Integer.MAX_VALUE); + latLongType.freeze(); + } + private void addPoint(IndexWriter writer, String name, double lat, double lng) throws IOException{ Document doc = new Document(); @@ -98,8 +112,8 @@ public class TestCartesian extends LuceneTestCase { doc.add(newField("name", name, TextField.TYPE_STORED)); // convert the lat / long to lucene fields - doc.add(new NumericField(latField, Integer.MAX_VALUE, NumericField.TYPE_STORED).setDoubleValue(lat)); - doc.add(new NumericField(lngField, Integer.MAX_VALUE, NumericField.TYPE_STORED).setDoubleValue(lng)); + doc.add(new NumericField(latField, lat, latLongType)); + doc.add(new NumericField(lngField, lng, latLongType)); // add a default meta field to make searching all documents easy doc.add(newField("metafile", "doc", TextField.TYPE_STORED)); @@ -107,7 +121,7 @@ public class TestCartesian extends LuceneTestCase { int ctpsize = ctps.size(); for (int i =0; i < ctpsize; i++){ CartesianTierPlotter ctp = ctps.get(i); - doc.add(new NumericField(ctp.getTierFieldName(), Integer.MAX_VALUE, TextField.TYPE_STORED).setDoubleValue(ctp.getTierBoxId(lat,lng))); + doc.add(new NumericField(ctp.getTierFieldName(), ctp.getTierBoxId(lat, lng), latLongType)); doc.add(newField(geoHashPrefix, GeoHashUtils.encode(lat,lng), StringField.TYPE_STORED)); } @@ -248,8 +262,8 @@ public class TestCartesian extends LuceneTestCase { Document d = searcher.doc(scoreDocs[i].doc); String name = d.get("name"); - double rsLat = Double.parseDouble(d.get(latField)); - double rsLng = Double.parseDouble(d.get(lngField)); + double rsLat = d.getField(latField).numericValue().doubleValue(); + double rsLng = d.getField(lngField).numericValue().doubleValue(); Double geo_distance = distances.get(scoreDocs[i].doc); double distance = DistanceUtils.getDistanceMi(lat, lng, rsLat, rsLng); @@ -317,8 +331,8 @@ public class TestCartesian extends LuceneTestCase { for(int i =0 ; i < results; i++){ Document d = searcher.doc(scoreDocs[i].doc); String name = d.get("name"); - double rsLat = Double.parseDouble(d.get(latField)); - double rsLng = Double.parseDouble(d.get(lngField)); + double rsLat = d.getField(latField).numericValue().doubleValue(); + double rsLng = d.getField(lngField).numericValue().doubleValue(); Double geo_distance = distances.get(scoreDocs[i].doc); double distance = DistanceUtils.getDistanceMi(lat, lng, rsLat, rsLng); @@ -389,8 +403,8 @@ public class TestCartesian extends LuceneTestCase { Document d = searcher.doc(scoreDocs[i].doc); String name = d.get("name"); - double rsLat = Double.parseDouble(d.get(latField)); - double rsLng = Double.parseDouble(d.get(lngField)); + double rsLat = d.getField(latField).numericValue().doubleValue(); + double rsLng = d.getField(lngField).numericValue().doubleValue(); Double geo_distance = distances.get(scoreDocs[i].doc); double distance = DistanceUtils.getDistanceMi(lat, lng, rsLat, rsLng); @@ -461,8 +475,8 @@ public class TestCartesian extends LuceneTestCase { Document d = searcher.doc(scoreDocs[i].doc); String name = d.get("name"); - double rsLat = Double.parseDouble(d.get(latField)); - double rsLng = Double.parseDouble(d.get(lngField)); + double rsLat = d.getField(latField).numericValue().doubleValue(); + double rsLng = d.getField(lngField).numericValue().doubleValue(); Double geo_distance = distances.get(scoreDocs[i].doc); double distance = DistanceUtils.getDistanceMi(lat, lng, rsLat, rsLng); diff --git a/lucene/contrib/spatial/src/test/org/apache/lucene/spatial/tier/TestDistance.java b/lucene/contrib/spatial/src/test/org/apache/lucene/spatial/tier/TestDistance.java index 43aff9161ad..8fb9981d073 100644 --- a/lucene/contrib/spatial/src/test/org/apache/lucene/spatial/tier/TestDistance.java +++ b/lucene/contrib/spatial/src/test/org/apache/lucene/spatial/tier/TestDistance.java @@ -20,17 +20,19 @@ import java.io.IOException; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; +import org.apache.lucene.document.FieldType; import org.apache.lucene.document.NumericField; import org.apache.lucene.document.TextField; +import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.IndexReader.AtomicReaderContext; +import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.Term; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.search.QueryWrapperFilter; import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.QueryWrapperFilter; +import org.apache.lucene.store.Directory; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.ReaderUtil; -import org.apache.lucene.store.Directory; public class TestDistance extends LuceneTestCase { @@ -58,6 +60,18 @@ public class TestDistance extends LuceneTestCase { directory.close(); super.tearDown(); } + + private static final FieldType latLongType = new FieldType(); + static { + latLongType.setIndexed(true); + latLongType.setStored(true); + latLongType.setTokenized(true); + latLongType.setOmitNorms(true); + latLongType.setIndexOptions(IndexOptions.DOCS_ONLY); + latLongType.setNumericType(NumericField.DataType.DOUBLE); + latLongType.setNumericPrecisionStep(Integer.MAX_VALUE); + latLongType.freeze(); + } private void addPoint(IndexWriter writer, String name, double lat, double lng) throws IOException{ @@ -66,8 +80,8 @@ public class TestDistance extends LuceneTestCase { doc.add(newField("name", name, TextField.TYPE_STORED)); // convert the lat / long to lucene fields - doc.add(new NumericField(latField, Integer.MAX_VALUE, NumericField.TYPE_STORED).setDoubleValue(lat)); - doc.add(new NumericField(lngField, Integer.MAX_VALUE, NumericField.TYPE_STORED).setDoubleValue(lng)); + doc.add(new NumericField(latField, lat, latLongType)); + doc.add(new NumericField(lngField, lng, latLongType)); // add a default meta field to make searching all documents easy doc.add(newField("metafile", "doc", TextField.TYPE_STORED)); diff --git a/lucene/src/java/org/apache/lucene/codecs/DocValuesConsumer.java b/lucene/src/java/org/apache/lucene/codecs/DocValuesConsumer.java index a20a653858b..bb2330cabad 100644 --- a/lucene/src/java/org/apache/lucene/codecs/DocValuesConsumer.java +++ b/lucene/src/java/org/apache/lucene/codecs/DocValuesConsumer.java @@ -19,51 +19,50 @@ package org.apache.lucene.codecs; import java.io.IOException; import org.apache.lucene.codecs.lucene40.values.Writer; -import org.apache.lucene.index.DocValues; +import org.apache.lucene.document.DocValuesField; +import org.apache.lucene.document.Field; import org.apache.lucene.index.DocValues.Source; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.MergeState; -import org.apache.lucene.index.DocValue; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; /** - * Abstract API that consumes {@link DocValue}s. + * Abstract API that consumes {@link IndexableField}s. * {@link DocValuesConsumer} are always associated with a specific field and * segments. Concrete implementations of this API write the given - * {@link DocValue} into a implementation specific format depending on + * {@link IndexableField} into a implementation specific format depending on * the fields meta-data. * * @lucene.experimental */ public abstract class DocValuesConsumer { - protected Source currentMergeSource; protected final BytesRef spare = new BytesRef(); /** - * Adds the given {@link DocValue} instance to this + * Adds the given {@link IndexableField} instance to this * {@link DocValuesConsumer} * * @param docID * the document ID to add the value for. The docID must always * increase or be 0 if it is the first call to this method. - * @param docValue + * @param value * the value to add * @throws IOException * if an {@link IOException} occurs */ - public abstract void add(int docID, DocValue docValue) + public abstract void add(int docID, IndexableField value) throws IOException; /** - * Called when the consumer of this API is doc with adding - * {@link DocValue} to this {@link DocValuesConsumer} + * Called when the consumer of this API is done adding values. * * @param docCount * the total number of documents in this {@link DocValuesConsumer}. * Must be greater than or equal the last given docID to - * {@link #add(int, DocValue)}. + * {@link #add(int, IndexableField)}. * @throws IOException */ public abstract void finish(int docCount) throws IOException; @@ -87,8 +86,8 @@ public abstract class DocValuesConsumer { final org.apache.lucene.index.MergeState.IndexReaderAndLiveDocs reader = mergeState.readers.get(readerIDX); if (docValues[readerIDX] != null) { hasMerged = true; - merge(new SingleSubMergeState(docValues[readerIDX], mergeState.docBase[readerIDX], reader.reader.maxDoc(), - reader.liveDocs)); + merge(docValues[readerIDX], mergeState.docBase[readerIDX], + reader.reader.maxDoc(), reader.liveDocs); mergeState.checkAbort.work(reader.reader.maxDoc()); } } @@ -99,73 +98,66 @@ public abstract class DocValuesConsumer { } /** - * Merges the given {@link SingleSubMergeState} into this {@link DocValuesConsumer}. + * Merges the given {@link DocValues} into this {@link DocValuesConsumer}. * - * @param state - * the {@link SingleSubMergeState} to merge * @throws IOException * if an {@link IOException} occurs */ - protected void merge(SingleSubMergeState state) throws IOException { + protected void merge(DocValues reader, int docBase, int docCount, Bits liveDocs) throws IOException { // This enables bulk copies in subclasses per MergeState, subclasses can // simply override this and decide if they want to merge // segments using this generic implementation or if a bulk merge is possible // / feasible. - final Source source = state.reader.getDirectSource(); + final Source source = reader.getDirectSource(); assert source != null; - setNextMergeSource(source); // set the current enum we are working on - the - // impl. will get the correct reference for the type - // it supports - int docID = state.docBase; - final Bits liveDocs = state.liveDocs; - final int docCount = state.docCount; + int docID = docBase; + final DocValues.Type type = reader.type(); + final Field scratchField; + switch(type) { + case VAR_INTS: + scratchField = new DocValuesField("", (long) 0, type); + break; + case FIXED_INTS_16: + scratchField = new DocValuesField("", (short) 0, type); + break; + case FIXED_INTS_32: + scratchField = new DocValuesField("", 0, type); + break; + case FIXED_INTS_64: + scratchField = new DocValuesField("", (long) 0, type); + break; + case FIXED_INTS_8: + scratchField = new DocValuesField("", (byte) 0, type); + break; + case FLOAT_32: + scratchField = new DocValuesField("", (float) 0, type); + break; + case FLOAT_64: + scratchField = new DocValuesField("", (double) 0, type); + break; + case BYTES_FIXED_STRAIGHT: + case BYTES_FIXED_DEREF: + case BYTES_FIXED_SORTED: + case BYTES_VAR_STRAIGHT: + case BYTES_VAR_DEREF: + case BYTES_VAR_SORTED: + scratchField = new DocValuesField("", new BytesRef(), type); + break; + default: + assert false; + scratchField = null; + } for (int i = 0; i < docCount; i++) { if (liveDocs == null || liveDocs.get(i)) { - mergeDoc(docID++, i); + mergeDoc(scratchField, source, docID++, i); } } } - /** - * Records the specified long value for the docID or throws an - * {@link UnsupportedOperationException} if this {@link Writer} doesn't record - * long values. - * - * @throws UnsupportedOperationException - * if this writer doesn't record long values - */ - protected void add(int docID, long value) throws IOException { - throw new UnsupportedOperationException("override this method to support integer types"); - } - - /** - * Records the specified double value for the docID or throws an - * {@link UnsupportedOperationException} if this {@link Writer} doesn't record - * double values. - * - * @throws UnsupportedOperationException - * if this writer doesn't record double values - */ - protected void add(int docID, double value) throws IOException { - throw new UnsupportedOperationException("override this method to support floating point types"); - } - - /** - * Records the specified {@link BytesRef} value for the docID or throws an - * {@link UnsupportedOperationException} if this {@link Writer} doesn't record - * {@link BytesRef} values. - * - * @throws UnsupportedOperationException - * if this writer doesn't record {@link BytesRef} values - */ - protected void add(int docID, BytesRef value) throws IOException { - throw new UnsupportedOperationException("override this method to support byte types"); - } - /** * Merges a document with the given docID. The methods * implementation obtains the value for the sourceDoc id from the - * current {@link Source} set to setNextMergeSource(Source). + * current {@link Source}. *

* This method is used during merging to provide implementation agnostic * default merge implementation. @@ -177,67 +169,29 @@ public abstract class DocValuesConsumer { * ID must always be greater than the previous ID or 0 if called the * first time. */ - protected void mergeDoc(int docID, int sourceDoc) + protected void mergeDoc(Field scratchField, Source source, int docID, int sourceDoc) throws IOException { - switch(currentMergeSource.type()) { + switch(source.type()) { case BYTES_FIXED_DEREF: case BYTES_FIXED_SORTED: case BYTES_FIXED_STRAIGHT: case BYTES_VAR_DEREF: case BYTES_VAR_SORTED: case BYTES_VAR_STRAIGHT: - add(docID, currentMergeSource.getBytes(sourceDoc, spare)); + scratchField.setValue(source.getBytes(sourceDoc, spare)); break; case FIXED_INTS_16: case FIXED_INTS_32: case FIXED_INTS_64: case FIXED_INTS_8: case VAR_INTS: - add(docID, currentMergeSource.getInt(sourceDoc)); + scratchField.setValue(source.getInt(sourceDoc)); break; case FLOAT_32: case FLOAT_64: - add(docID, currentMergeSource.getFloat(sourceDoc)); + scratchField.setValue(source.getFloat(sourceDoc)); break; } - } - - /** - * Sets the next {@link Source} to consume values from on calls to - * {@link #mergeDoc(int, int)} - * - * @param mergeSource - * the next {@link Source}, this must not be null - */ - protected final void setNextMergeSource(Source mergeSource) { - currentMergeSource = mergeSource; - } - - /** - * Specialized auxiliary MergeState is necessary since we don't want to - * exploit internals up to the codecs consumer. An instance of this class is - * created for each merged low level {@link IndexReader} we are merging to - * support low level bulk copies. - */ - public static class SingleSubMergeState { - /** - * the source reader for this MergeState - merged values should be read from - * this instance - */ - public final DocValues reader; - /** the absolute docBase for this MergeState within the resulting segment */ - public final int docBase; - /** the number of documents in this MergeState */ - public final int docCount; - /** the not deleted bits for this MergeState */ - public final Bits liveDocs; - - public SingleSubMergeState(DocValues reader, int docBase, int docCount, Bits liveDocs) { - assert reader != null; - this.reader = reader; - this.docBase = docBase; - this.docCount = docCount; - this.liveDocs = liveDocs; - } + add(docID, scratchField); } } diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsReader.java b/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsReader.java index fb5e2ceb635..4f15c05b55c 100644 --- a/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsReader.java +++ b/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsReader.java @@ -85,7 +85,6 @@ public final class Lucene40StoredFieldsReader extends StoredFieldsReader impleme } finally { idxStream.close(); } - } // Used only by clone diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsWriter.java b/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsWriter.java index 05282c4613c..5586adeac53 100644 --- a/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsWriter.java +++ b/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsWriter.java @@ -25,9 +25,9 @@ import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.MergePolicy.MergeAbortedException; import org.apache.lucene.index.MergeState; import org.apache.lucene.index.SegmentReader; -import org.apache.lucene.index.MergePolicy.MergeAbortedException; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IndexInput; @@ -50,11 +50,11 @@ public final class Lucene40StoredFieldsWriter extends StoredFieldsWriter { static final int FIELD_IS_NUMERIC_LONG = 2 << _NUMERIC_BIT_SHIFT; static final int FIELD_IS_NUMERIC_FLOAT = 3 << _NUMERIC_BIT_SHIFT; static final int FIELD_IS_NUMERIC_DOUBLE = 4 << _NUMERIC_BIT_SHIFT; + + // the next possible bits are: 1 << 6; 1 << 7 // currently unused: static final int FIELD_IS_NUMERIC_SHORT = 5 << _NUMERIC_BIT_SHIFT; // currently unused: static final int FIELD_IS_NUMERIC_BYTE = 6 << _NUMERIC_BIT_SHIFT; - // the next possible bits are: 1 << 6; 1 << 7 - // Lucene 3.0: Removal of compressed fields static final int FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS = 2; @@ -127,7 +127,7 @@ public final class Lucene40StoredFieldsWriter extends StoredFieldsWriter { IndexFileNames.segmentFileName(segment, "", FIELDS_INDEX_EXTENSION)); } - public final void writeField(FieldInfo info, IndexableField field) throws IOException { + public void writeField(FieldInfo info, IndexableField field) throws IOException { fieldsStream.writeVInt(info.number); int bits = 0; final BytesRef bytes; @@ -136,18 +136,19 @@ public final class Lucene40StoredFieldsWriter extends StoredFieldsWriter { // this way we don't bake into indexer all these // specific encodings for different fields? and apps // can customize... - if (field.numeric()) { - switch (field.numericDataType()) { - case INT: - bits |= FIELD_IS_NUMERIC_INT; break; - case LONG: - bits |= FIELD_IS_NUMERIC_LONG; break; - case FLOAT: - bits |= FIELD_IS_NUMERIC_FLOAT; break; - case DOUBLE: - bits |= FIELD_IS_NUMERIC_DOUBLE; break; - default: - assert false : "Should never get here"; + + Number number = field.numericValue(); + if (number != null) { + if (number instanceof Byte || number instanceof Short || number instanceof Integer) { + bits |= FIELD_IS_NUMERIC_INT; + } else if (number instanceof Long) { + bits |= FIELD_IS_NUMERIC_LONG; + } else if (number instanceof Float) { + bits |= FIELD_IS_NUMERIC_FLOAT; + } else if (number instanceof Double) { + bits |= FIELD_IS_NUMERIC_DOUBLE; + } else { + throw new IllegalArgumentException("cannot store numeric type " + number.getClass()); } string = null; bytes = null; @@ -158,6 +159,9 @@ public final class Lucene40StoredFieldsWriter extends StoredFieldsWriter { string = null; } else { string = field.stringValue(); + if (string == null) { + throw new IllegalArgumentException("field " + field.name() + " is stored but does not have binaryValue, stringValue nor numericValue"); + } } } @@ -169,21 +173,16 @@ public final class Lucene40StoredFieldsWriter extends StoredFieldsWriter { } else if (string != null) { fieldsStream.writeString(field.stringValue()); } else { - final Number n = field.numericValue(); - if (n == null) { - throw new IllegalArgumentException("field " + field.name() + " is stored but does not have binaryValue, stringValue nor numericValue"); - } - switch (field.numericDataType()) { - case INT: - fieldsStream.writeInt(n.intValue()); break; - case LONG: - fieldsStream.writeLong(n.longValue()); break; - case FLOAT: - fieldsStream.writeInt(Float.floatToIntBits(n.floatValue())); break; - case DOUBLE: - fieldsStream.writeLong(Double.doubleToLongBits(n.doubleValue())); break; - default: - assert false : "Should never get here"; + if (number instanceof Byte || number instanceof Short || number instanceof Integer) { + fieldsStream.writeInt(number.intValue()); + } else if (number instanceof Long) { + fieldsStream.writeLong(number.longValue()); + } else if (number instanceof Float) { + fieldsStream.writeInt(Float.floatToIntBits(number.floatValue())); + } else if (number instanceof Double) { + fieldsStream.writeLong(Double.doubleToLongBits(number.doubleValue())); + } else { + assert false; } } } @@ -193,7 +192,7 @@ public final class Lucene40StoredFieldsWriter extends StoredFieldsWriter { * document. The stream IndexInput is the * fieldsStream from which we should bulk-copy all * bytes. */ - public final void addRawDocuments(IndexInput stream, int[] lengths, int numDocs) throws IOException { + public void addRawDocuments(IndexInput stream, int[] lengths, int numDocs) throws IOException { long position = fieldsStream.getFilePointer(); long start = position; for(int i=0;i BYTE_BLOCK_SIZE) { - throw new IllegalArgumentException("bytes arrays > " + Short.MAX_VALUE + " are not supported"); + throw new IllegalArgumentException("bytes arrays > " + BYTE_BLOCK_SIZE + " are not supported"); } size = bytes.length; } else if (bytes.length != size) { - throw new IllegalArgumentException("expected bytes size=" + size - + " but got " + bytes.length); + throw new IllegalArgumentException("byte[] length changed for BYTES_FIXED_STRAIGHT type (before=" + size + " now=" + bytes.length); } if (lastDocID+1 < docID) { advancePool(docID); @@ -134,7 +135,7 @@ class FixedStraightBytesImpl { @Override - protected void merge(SingleSubMergeState state) throws IOException { + protected void merge(DocValues readerIn, int docBase, int docCount, Bits liveDocs) throws IOException { datOut = getOrCreateDataOut(); boolean success = false; try { @@ -142,8 +143,8 @@ class FixedStraightBytesImpl { datOut.writeInt(size); } - if (state.liveDocs == null && tryBulkMerge(state.reader)) { - FixedStraightReader reader = (FixedStraightReader) state.reader; + if (liveDocs == null && tryBulkMerge(readerIn)) { + FixedStraightReader reader = (FixedStraightReader) readerIn; final int maxDocs = reader.maxDoc; if (maxDocs == 0) { return; @@ -155,9 +156,9 @@ class FixedStraightBytesImpl { throw new IllegalArgumentException("expected bytes size=" + size + " but got " + reader.size); } - if (lastDocID+1 < state.docBase) { - fill(datOut, state.docBase); - lastDocID = state.docBase-1; + if (lastDocID+1 < docBase) { + fill(datOut, docBase); + lastDocID = docBase-1; } // TODO should we add a transfer to API to each reader? final IndexInput cloneData = reader.cloneData(); @@ -169,7 +170,7 @@ class FixedStraightBytesImpl { lastDocID += maxDocs; } else { - super.merge(state); + super.merge(readerIn, docBase, docCount, liveDocs); } success = true; } finally { @@ -185,9 +186,9 @@ class FixedStraightBytesImpl { } @Override - protected void mergeDoc(int docID, int sourceDoc) throws IOException { + protected void mergeDoc(Field scratchField, Source source, int docID, int sourceDoc) throws IOException { assert lastDocID < docID; - setMergeBytes(sourceDoc); + setMergeBytes(source, sourceDoc); if (size == -1) { size = bytesRef.length; datOut.writeInt(size); @@ -200,12 +201,10 @@ class FixedStraightBytesImpl { lastDocID = docID; } - protected void setMergeBytes(int sourceDoc) { - currentMergeSource.getBytes(sourceDoc, bytesRef); + protected void setMergeBytes(Source source, int sourceDoc) { + source.getBytes(sourceDoc, bytesRef); } - - // Fills up to but not including this docID private void fill(IndexOutput datOut, int docID) throws IOException { assert size >= 0; diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene40/values/Floats.java b/lucene/src/java/org/apache/lucene/codecs/lucene40/values/Floats.java index 321c82d5186..f205505e5fa 100644 --- a/lucene/src/java/org/apache/lucene/codecs/lucene40/values/Floats.java +++ b/lucene/src/java/org/apache/lucene/codecs/lucene40/values/Floats.java @@ -19,10 +19,10 @@ package org.apache.lucene.codecs.lucene40.values; import java.io.IOException; import org.apache.lucene.codecs.DocValuesConsumer; -import org.apache.lucene.index.DocValues; -import org.apache.lucene.index.DocValue; import org.apache.lucene.index.DocValues.Source; import org.apache.lucene.index.DocValues.Type; +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.IndexableField; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IndexInput; @@ -86,8 +86,8 @@ public class Floats { } @Override - public void add(int docID, DocValue docValue) throws IOException { - add(docID, docValue.getFloat()); + public void add(int docID, IndexableField docValue) throws IOException { + add(docID, docValue.numericValue().doubleValue()); } @Override @@ -97,8 +97,8 @@ public class Floats { } @Override - protected void setMergeBytes(int sourceDoc) { - final double value = currentMergeSource.getFloat(sourceDoc); + protected void setMergeBytes(Source source, int sourceDoc) { + final double value = source.getFloat(sourceDoc); template.toBytes(value, bytesRef); } } diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene40/values/Ints.java b/lucene/src/java/org/apache/lucene/codecs/lucene40/values/Ints.java index f22e8e1a341..066bb4a7ddc 100644 --- a/lucene/src/java/org/apache/lucene/codecs/lucene40/values/Ints.java +++ b/lucene/src/java/org/apache/lucene/codecs/lucene40/values/Ints.java @@ -20,9 +20,10 @@ package org.apache.lucene.codecs.lucene40.values; import java.io.IOException; import org.apache.lucene.codecs.DocValuesConsumer; -import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.DocValues.Source; import org.apache.lucene.index.DocValues.Type; -import org.apache.lucene.index.DocValue; +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.IndexableField; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IndexInput; @@ -103,20 +104,19 @@ public final class Ints { template = DocValuesArray.TEMPLATES.get(valueType); } - @Override protected void add(int docID, long v) throws IOException { template.toBytes(v, bytesRef); add(docID, bytesRef); } @Override - public void add(int docID, DocValue docValue) throws IOException { - add(docID, docValue.getInt()); + public void add(int docID, IndexableField docValue) throws IOException { + add(docID, docValue.numericValue().longValue()); } @Override - protected void setMergeBytes(int sourceDoc) { - final long value = currentMergeSource.getInt(sourceDoc); + protected void setMergeBytes(Source source, int sourceDoc) { + final long value = source.getInt(sourceDoc); template.toBytes(value, bytesRef); } diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene40/values/PackedIntValues.java b/lucene/src/java/org/apache/lucene/codecs/lucene40/values/PackedIntValues.java index e6a1c6a223d..106321f7d99 100644 --- a/lucene/src/java/org/apache/lucene/codecs/lucene40/values/PackedIntValues.java +++ b/lucene/src/java/org/apache/lucene/codecs/lucene40/values/PackedIntValues.java @@ -20,11 +20,12 @@ import java.io.IOException; import org.apache.lucene.codecs.lucene40.values.DocValuesArray.LongValues; import org.apache.lucene.codecs.lucene40.values.FixedStraightBytesImpl.FixedBytesWriterBase; -import org.apache.lucene.index.DocValues; -import org.apache.lucene.index.IndexFileNames; -import org.apache.lucene.index.DocValue; +import org.apache.lucene.document.Field; import org.apache.lucene.index.DocValues.Source; import org.apache.lucene.index.DocValues.Type; +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.IndexFileNames; +import org.apache.lucene.index.IndexableField; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IndexInput; @@ -62,7 +63,6 @@ class PackedIntValues { bytesRef = new BytesRef(8); } - @Override protected void add(int docID, long v) throws IOException { assert lastDocId < docID; if (!started) { @@ -113,10 +113,10 @@ class PackedIntValues { } @Override - protected void mergeDoc(int docID, int sourceDoc) throws IOException { + protected void mergeDoc(Field scratchField, Source source, int docID, int sourceDoc) throws IOException { assert docID > lastDocId : "docID: " + docID + " must be greater than the last added doc id: " + lastDocId; - add(docID, currentMergeSource.getInt(sourceDoc)); + add(docID, source.getInt(sourceDoc)); } private void writePackedInts(IndexOutput datOut, int docCount) throws IOException { @@ -151,8 +151,8 @@ class PackedIntValues { } @Override - public void add(int docID, DocValue docValue) throws IOException { - add(docID, docValue.getInt()); + public void add(int docID, IndexableField docValue) throws IOException { + add(docID, docValue.numericValue().longValue()); } } diff --git a/lucene/src/java/org/apache/lucene/codecs/lucene40/values/VarStraightBytesImpl.java b/lucene/src/java/org/apache/lucene/codecs/lucene40/values/VarStraightBytesImpl.java index 80924f21b6b..2902801c259 100644 --- a/lucene/src/java/org/apache/lucene/codecs/lucene40/values/VarStraightBytesImpl.java +++ b/lucene/src/java/org/apache/lucene/codecs/lucene40/values/VarStraightBytesImpl.java @@ -22,21 +22,25 @@ import java.io.IOException; import org.apache.lucene.codecs.lucene40.values.Bytes.BytesReaderBase; import org.apache.lucene.codecs.lucene40.values.Bytes.BytesSourceBase; import org.apache.lucene.codecs.lucene40.values.Bytes.BytesWriterBase; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.DocValues.Source; import org.apache.lucene.index.DocValues.Type; +import org.apache.lucene.index.DocValues; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.Bits; +import org.apache.lucene.util.ByteBlockPool.DirectTrackingAllocator; import org.apache.lucene.util.ByteBlockPool; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.Counter; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.PagedBytes; import org.apache.lucene.util.RamUsageEstimator; -import org.apache.lucene.util.ByteBlockPool.DirectTrackingAllocator; -import org.apache.lucene.util.packed.PackedInts; import org.apache.lucene.util.packed.PackedInts.ReaderIterator; +import org.apache.lucene.util.packed.PackedInts; // Variable length byte[] per document, no sharing @@ -93,21 +97,21 @@ class VarStraightBytesImpl { } @Override - protected void merge(SingleSubMergeState state) throws IOException { + protected void merge(DocValues readerIn, int docBase, int docCount, Bits liveDocs) throws IOException { merge = true; datOut = getOrCreateDataOut(); boolean success = false; try { - if (state.liveDocs == null && state.reader instanceof VarStraightReader) { + if (liveDocs == null && readerIn instanceof VarStraightReader) { // bulk merge since we don't have any deletes - VarStraightReader reader = (VarStraightReader) state.reader; + VarStraightReader reader = (VarStraightReader) readerIn; final int maxDocs = reader.maxDoc; if (maxDocs == 0) { return; } - if (lastDocID+1 < state.docBase) { - fill(state.docBase, address); - lastDocID = state.docBase-1; + if (lastDocID+1 < docBase) { + fill(docBase, address); + lastDocID = docBase-1; } final long numDataBytes; final IndexInput cloneIdx = reader.cloneIndex(); @@ -137,7 +141,7 @@ class VarStraightBytesImpl { IOUtils.close(cloneData); } } else { - super.merge(state); + super.merge(readerIn, docBase, docCount, liveDocs); } success = true; } finally { @@ -148,10 +152,10 @@ class VarStraightBytesImpl { } @Override - protected void mergeDoc(int docID, int sourceDoc) throws IOException { + protected void mergeDoc(Field scratchField, Source source, int docID, int sourceDoc) throws IOException { assert merge; assert lastDocID < docID; - currentMergeSource.getBytes(sourceDoc, bytesRef); + source.getBytes(sourceDoc, bytesRef); if (bytesRef.length == 0) { return; // default } @@ -226,7 +230,7 @@ class VarStraightBytesImpl { } public static class VarStraightReader extends BytesReaderBase { - private final int maxDoc; + final int maxDoc; VarStraightReader(Directory dir, String id, int maxDoc, IOContext context) throws IOException { super(dir, id, CODEC_NAME, VERSION_START, true, context, Type.BYTES_VAR_STRAIGHT); diff --git a/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextNormsConsumer.java b/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextNormsConsumer.java index cc90fdc2222..4a747918359 100644 --- a/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextNormsConsumer.java +++ b/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextNormsConsumer.java @@ -23,13 +23,13 @@ import java.util.Set; import org.apache.lucene.codecs.DocValuesConsumer; import org.apache.lucene.codecs.PerDocConsumer; -import org.apache.lucene.index.DocValue; import org.apache.lucene.index.DocValues.Type; import org.apache.lucene.index.DocValues; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; @@ -130,8 +130,8 @@ public class SimpleTextNormsConsumer extends PerDocConsumer { } @Override - public void add(int docID, DocValue docValue) throws IOException { - add(docID, docValue.getBytes()); + public void add(int docID, IndexableField docValue) throws IOException { + add(docID, docValue.binaryValue()); } protected void add(int docID, BytesRef value) throws IOException { diff --git a/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsWriter.java b/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsWriter.java index 82480a68dce..a9efb2dbff3 100644 --- a/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsWriter.java +++ b/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsWriter.java @@ -98,46 +98,39 @@ public class SimpleTextStoredFieldsWriter extends StoredFieldsWriter { newLine(); write(TYPE); - if (field.numeric()) { - switch (field.numericDataType()) { - case INT: - write(TYPE_INT); - newLine(); + final Number n = field.numericValue(); + + if (n != null) { + if (n instanceof Byte || n instanceof Short || n instanceof Integer) { + write(TYPE_INT); + newLine(); - write(VALUE); - write(Integer.toString(field.numericValue().intValue())); - newLine(); + write(VALUE); + write(Integer.toString(n.intValue())); + newLine(); + } else if (n instanceof Long) { + write(TYPE_LONG); + newLine(); + + write(VALUE); + write(Long.toString(n.longValue())); + newLine(); + } else if (n instanceof Float) { + write(TYPE_FLOAT); + newLine(); - break; - case LONG: - write(TYPE_LONG); - newLine(); + write(VALUE); + write(Float.toString(n.floatValue())); + newLine(); + } else if (n instanceof Double) { + write(TYPE_DOUBLE); + newLine(); - write(VALUE); - write(Long.toString(field.numericValue().longValue())); - newLine(); - - break; - case FLOAT: - write(TYPE_FLOAT); - newLine(); - - write(VALUE); - write(Float.toString(field.numericValue().floatValue())); - newLine(); - - break; - case DOUBLE: - write(TYPE_DOUBLE); - newLine(); - - write(VALUE); - write(Double.toString(field.numericValue().doubleValue())); - newLine(); - - break; - default: - assert false : "Should never get here"; + write(VALUE); + write(Double.toString(n.doubleValue())); + newLine(); + } else { + throw new IllegalArgumentException("cannot store numeric type " + n.getClass()); } } else { BytesRef bytes = field.binaryValue(); diff --git a/lucene/src/java/org/apache/lucene/document/BinaryField.java b/lucene/src/java/org/apache/lucene/document/BinaryField.java deleted file mode 100644 index 813ba401109..00000000000 --- a/lucene/src/java/org/apache/lucene/document/BinaryField.java +++ /dev/null @@ -1,46 +0,0 @@ -package org.apache.lucene.document; - -import org.apache.lucene.util.BytesRef; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** A field with byte[] value that is only stored. */ - -public final class BinaryField extends Field { - - public static final FieldType TYPE_STORED = new FieldType(); - static { - TYPE_STORED.setStored(true); - TYPE_STORED.freeze(); - } - - /** Creates a new BinaryField */ - public BinaryField(String name, byte[] value) { - super(name, value, BinaryField.TYPE_STORED); - } - - /** Creates a new BinaryField */ - public BinaryField(String name, byte[] value, int offset, int length) { - super(name, value, offset, length, BinaryField.TYPE_STORED); - } - - /** Creates a new BinaryField */ - public BinaryField(String name, BytesRef bytes) { - super(name, bytes, BinaryField.TYPE_STORED); - } -} diff --git a/lucene/src/java/org/apache/lucene/document/DocValuesField.java b/lucene/src/java/org/apache/lucene/document/DocValuesField.java index 2cb494c2e43..c318c331cc5 100644 --- a/lucene/src/java/org/apache/lucene/document/DocValuesField.java +++ b/lucene/src/java/org/apache/lucene/document/DocValuesField.java @@ -16,13 +16,14 @@ package org.apache.lucene.document; * See the License for the specific language governing permissions and * limitations under the License. */ -import java.io.Reader; -import java.util.Comparator; -import org.apache.lucene.index.IndexableFieldType; -import org.apache.lucene.index.DocValue; -import org.apache.lucene.index.DocValues; +import java.util.Comparator; +import java.util.EnumSet; +import java.util.HashMap; +import java.util.Map; + import org.apache.lucene.index.DocValues.Type; // javadocs +import org.apache.lucene.index.DocValues; import org.apache.lucene.util.BytesRef; /** @@ -32,14 +33,16 @@ import org.apache.lucene.util.BytesRef; * example usage, adding an int value: * *

- * document.add(new DocValuesField(name).setInt(value));
+ * DocValuesField field = new DocValuesField(name, DocValues.Type.VAR_INTS);
+ * field.setInt(value);
+ * document.add(field);
  * 
* * For optimal performance, re-use the DocValuesField and * {@link Document} instance for more than one document: * *
- *  DocValuesField field = new DocValuesField(name);
+ *  DocValuesField field = new DocValuesField(name, DocValues.Type.VAR_INTS);
  *  Document document = new Document();
  *  document.add(field);
  * 
@@ -69,326 +72,79 @@ import org.apache.lucene.util.BytesRef;
  * 
* * */ -public class DocValuesField extends Field implements DocValue { - protected BytesRef bytes; - protected double doubleValue; - protected long longValue; - protected DocValues.Type type; +public class DocValuesField extends Field { + protected Comparator bytesComparator; - /** - * Creates a new {@link DocValuesField} with the given name. - */ - public DocValuesField(String name) { - this(name, new FieldType()); - } - - public DocValuesField(String name, IndexableFieldType type) { - this(name, type, null); - } - - public DocValuesField(String name, IndexableFieldType type, String value) { - super(name, type); - fieldsData = value; - } - - @Override - public DocValue docValue() { - return this; - } - - /** - * Sets the given long value and sets the field's {@link Type} to - * {@link Type#VAR_INTS} unless already set. If you want to change the - * default type use {@link #setDocValuesType(DocValues.Type)}. - */ - public void setInt(long value) { - setInt(value, false); - } - - /** - * Sets the given long value as a 64 bit signed integer. - * - * @param value - * the value to set - * @param fixed - * if true {@link Type#FIXED_INTS_64} is used - * otherwise {@link Type#VAR_INTS} - */ - public void setInt(long value, boolean fixed) { - if (type == null) { - type = fixed ? DocValues.Type.FIXED_INTS_64 : DocValues.Type.VAR_INTS; + private static final Map types = new HashMap(); + static { + for(DocValues.Type type : DocValues.Type.values()) { + final FieldType ft = new FieldType(); + ft.setDocValueType(type); + ft.freeze(); + types.put(type, ft); } - longValue = value; } - /** - * Sets the given int value and sets the field's {@link Type} to - * {@link Type#VAR_INTS} unless already set. If you want to change the - * default type use {@link #setDocValuesType(DocValues.Type)}. - */ - public void setInt(int value) { - setInt(value, false); + private static EnumSet BYTES = EnumSet.of( + Type.BYTES_FIXED_DEREF, + Type.BYTES_FIXED_STRAIGHT, + Type.BYTES_VAR_DEREF, + Type.BYTES_VAR_STRAIGHT, + Type.BYTES_FIXED_SORTED, + Type.BYTES_VAR_SORTED); + + private static EnumSet INTS = EnumSet.of( + Type.VAR_INTS, + Type.FIXED_INTS_8, + Type.FIXED_INTS_16, + Type.FIXED_INTS_32, + Type.FIXED_INTS_64); + + public static FieldType getFieldType(DocValues.Type type) { + return types.get(type); } - /** - * Sets the given int value as a 32 bit signed integer. - * - * @param value - * the value to set - * @param fixed - * if true {@link Type#FIXED_INTS_32} is used - * otherwise {@link Type#VAR_INTS} - */ - public void setInt(int value, boolean fixed) { - if (type == null) { - type = fixed ? DocValues.Type.FIXED_INTS_32 : DocValues.Type.VAR_INTS; + public DocValuesField(String name, BytesRef bytes, DocValues.Type docValueType) { + super(name, getFieldType(docValueType)); + if (!BYTES.contains(docValueType)) { + throw new IllegalArgumentException("docValueType must be one of: " + BYTES + "; got " + docValueType); } - longValue = value; + fieldsData = bytes; } - /** - * Sets the given short value and sets the field's {@link Type} to - * {@link Type#VAR_INTS} unless already set. If you want to change the - * default type use {@link #setDocValuesType(DocValues.Type)}. - */ - public void setInt(short value) { - setInt(value, false); - } - - /** - * Sets the given short value as a 16 bit signed integer. - * - * @param value - * the value to set - * @param fixed - * if true {@link Type#FIXED_INTS_16} is used - * otherwise {@link Type#VAR_INTS} - */ - public void setInt(short value, boolean fixed) { - if (type == null) { - type = fixed ? DocValues.Type.FIXED_INTS_16 : DocValues.Type.VAR_INTS; + public DocValuesField(String name, int value, DocValues.Type docValueType) { + super(name, getFieldType(docValueType)); + if (!INTS.contains(docValueType)) { + throw new IllegalArgumentException("docValueType must be one of: " + INTS +"; got " + docValueType); } - longValue = value; + fieldsData = Integer.valueOf(value); } - /** - * Sets the given byte value and sets the field's {@link Type} to - * {@link Type#VAR_INTS} unless already set. If you want to change the - * default type use {@link #setDocValuesType(DocValues.Type)}. - */ - public void setInt(byte value) { - setInt(value, false); - } - - /** - * Sets the given byte value as a 8 bit signed integer. - * - * @param value - * the value to set - * @param fixed - * if true {@link Type#FIXED_INTS_8} is used - * otherwise {@link Type#VAR_INTS} - */ - public void setInt(byte value, boolean fixed) { - if (type == null) { - type = fixed ? DocValues.Type.FIXED_INTS_8 : DocValues.Type.VAR_INTS; + public DocValuesField(String name, long value, DocValues.Type docValueType) { + super(name, getFieldType(docValueType)); + if (!INTS.contains(docValueType)) { + throw new IllegalArgumentException("docValueType must be one of: " + INTS +"; got " + docValueType); } - longValue = value; + fieldsData = Long.valueOf(value); } - /** - * Sets the given float value and sets the field's {@link Type} - * to {@link Type#FLOAT_32} unless already set. If you want to - * change the type use {@link #setDocValuesType(DocValues.Type)}. - */ - public void setFloat(float value) { - if (type == null) { - type = DocValues.Type.FLOAT_32; + public DocValuesField(String name, float value, DocValues.Type docValueType) { + super(name, getFieldType(docValueType)); + if (docValueType != DocValues.Type.FLOAT_32 && + docValueType != DocValues.Type.FLOAT_64) { + throw new IllegalArgumentException("docValueType must be FLOAT_32/64; got " + docValueType); } - doubleValue = value; + fieldsData = Float.valueOf(value); } - /** - * Sets the given double value and sets the field's {@link Type} - * to {@link Type#FLOAT_64} unless already set. If you want to - * change the default type use {@link #setDocValuesType(DocValues.Type)}. - */ - public void setFloat(double value) { - if (type == null) { - type = DocValues.Type.FLOAT_64; + public DocValuesField(String name, double value, DocValues.Type docValueType) { + super(name, getFieldType(docValueType)); + if (docValueType != DocValues.Type.FLOAT_32 && + docValueType != DocValues.Type.FLOAT_64) { + throw new IllegalArgumentException("docValueType must be FLOAT_32/64; got " + docValueType); } - doubleValue = value; - } - - /** - * Sets the given {@link BytesRef} value and the field's {@link Type}. The - * comparator for this field is set to null. If a - * null comparator is set the default comparator for the given - * {@link Type} is used. - */ - public void setBytes(BytesRef value, DocValues.Type type) { - setBytes(value, type, null); - } - - /** - * Sets the given {@link BytesRef} value, the field's {@link Type} and the - * field's comparator. If the {@link Comparator} is set to null - * the default for the given {@link Type} is used instead. - * - * @throws IllegalArgumentException - * if the value or the type are null - */ - public void setBytes(BytesRef value, DocValues.Type type, Comparator comp) { - if (value == null) { - throw new IllegalArgumentException("value must not be null"); - } - setDocValuesType(type); - if (bytes == null) { - bytes = BytesRef.deepCopyOf(value); - } else { - bytes.copyBytes(value); - } - bytesComparator = comp; - } - - /** - * Returns the set {@link BytesRef} or null if not set. - */ - public BytesRef getBytes() { - return bytes; - } - - /** - * Returns the set {@link BytesRef} comparator or null if not set - */ - public Comparator bytesComparator() { - return bytesComparator; - } - - /** - * Returns the set floating point value or 0.0d if not set. - */ - public double getFloat() { - return doubleValue; - } - - /** - * Returns the set long value of 0 if not set. - */ - public long getInt() { - return longValue; - } - - /** - * Sets the {@link BytesRef} comparator for this field. If the field has a - * numeric {@link Type} the comparator will be ignored. - */ - public void setBytesComparator(Comparator comp) { - this.bytesComparator = comp; - } - - /** - * Sets the {@link Type} for this field. - */ - public void setDocValuesType(DocValues.Type type) { - if (type == null) { - throw new IllegalArgumentException("Type must not be null"); - } - this.type = type; - } - - /** - * Returns always null - */ - public Reader readerValue() { - return null; - } - - @Override - public DocValues.Type docValueType() { - return type; - } - - @Override - public String toString() { - final String value; - switch (type) { - case BYTES_FIXED_DEREF: - case BYTES_FIXED_STRAIGHT: - case BYTES_VAR_DEREF: - case BYTES_VAR_STRAIGHT: - case BYTES_FIXED_SORTED: - case BYTES_VAR_SORTED: - // don't use to unicode string this is not necessarily unicode here - value = "bytes: " + bytes.toString(); - break; - case FIXED_INTS_16: - value = "int16: " + longValue; - break; - case FIXED_INTS_32: - value = "int32: " + longValue; - break; - case FIXED_INTS_64: - value = "int64: " + longValue; - break; - case FIXED_INTS_8: - value = "int8: " + longValue; - break; - case VAR_INTS: - value = "vint: " + longValue; - break; - case FLOAT_32: - value = "float32: " + doubleValue; - break; - case FLOAT_64: - value = "float64: " + doubleValue; - break; - default: - throw new IllegalArgumentException("unknown type: " + type); - } - return "<" + name() + ": DocValuesField " + value + ">"; - } - - /** - * Returns an DocValuesField holding the value from - * the provided string field, as the specified type. The - * incoming field must have a string value. The name, {@link - * FieldType} and string value are carried over from the - * incoming Field. - */ - public static DocValuesField build(Field field, DocValues.Type type) { - if (field instanceof DocValuesField) { - return (DocValuesField) field; - } - final DocValuesField valField = new DocValuesField(field.name(), field.fieldType(), field.stringValue()); - switch (type) { - case BYTES_FIXED_DEREF: - case BYTES_FIXED_STRAIGHT: - case BYTES_VAR_DEREF: - case BYTES_VAR_STRAIGHT: - case BYTES_FIXED_SORTED: - case BYTES_VAR_SORTED: - BytesRef ref = field.isBinary() ? field.binaryValue() : new BytesRef(field.stringValue()); - valField.setBytes(ref, type); - break; - case FIXED_INTS_16: - case FIXED_INTS_32: - case FIXED_INTS_64: - case FIXED_INTS_8: - case VAR_INTS: - valField.setInt(Long.parseLong(field.stringValue())); - break; - case FLOAT_32: - valField.setFloat(Float.parseFloat(field.stringValue())); - break; - case FLOAT_64: - valField.setFloat(Double.parseDouble(field.stringValue())); - break; - default: - throw new IllegalArgumentException("unknown type: " + type); - } - return valField; + fieldsData = Double.valueOf(value); } } diff --git a/lucene/src/java/org/apache/lucene/document/DocumentStoredFieldVisitor.java b/lucene/src/java/org/apache/lucene/document/DocumentStoredFieldVisitor.java index ca9b95b3c9c..94376928cf6 100644 --- a/lucene/src/java/org/apache/lucene/document/DocumentStoredFieldVisitor.java +++ b/lucene/src/java/org/apache/lucene/document/DocumentStoredFieldVisitor.java @@ -57,7 +57,7 @@ public class DocumentStoredFieldVisitor extends StoredFieldVisitor { @Override public void binaryField(FieldInfo fieldInfo, byte[] value, int offset, int length) throws IOException { - doc.add(new BinaryField(fieldInfo.name, value)); + doc.add(new StoredField(fieldInfo.name, value)); } @Override @@ -73,30 +73,22 @@ public class DocumentStoredFieldVisitor extends StoredFieldVisitor { @Override public void intField(FieldInfo fieldInfo, int value) { - FieldType ft = new FieldType(NumericField.TYPE_STORED); - ft.setIndexed(fieldInfo.isIndexed); - doc.add(new NumericField(fieldInfo.name, ft).setIntValue(value)); + doc.add(new StoredField(fieldInfo.name, value)); } @Override public void longField(FieldInfo fieldInfo, long value) { - FieldType ft = new FieldType(NumericField.TYPE_STORED); - ft.setIndexed(fieldInfo.isIndexed); - doc.add(new NumericField(fieldInfo.name, ft).setLongValue(value)); + doc.add(new StoredField(fieldInfo.name, value)); } @Override public void floatField(FieldInfo fieldInfo, float value) { - FieldType ft = new FieldType(NumericField.TYPE_STORED); - ft.setIndexed(fieldInfo.isIndexed); - doc.add(new NumericField(fieldInfo.name, ft).setFloatValue(value)); + doc.add(new StoredField(fieldInfo.name, value)); } @Override public void doubleField(FieldInfo fieldInfo, double value) { - FieldType ft = new FieldType(NumericField.TYPE_STORED); - ft.setIndexed(fieldInfo.isIndexed); - doc.add(new NumericField(fieldInfo.name, ft).setDoubleValue(value)); + doc.add(new StoredField(fieldInfo.name, value)); } @Override diff --git a/lucene/src/java/org/apache/lucene/document/Field.java b/lucene/src/java/org/apache/lucene/document/Field.java index 26b26be4aa6..ac8f1ea341b 100644 --- a/lucene/src/java/org/apache/lucene/document/Field.java +++ b/lucene/src/java/org/apache/lucene/document/Field.java @@ -22,69 +22,102 @@ import java.io.Reader; import java.io.StringReader; import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.NumericTokenStream; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; -import org.apache.lucene.index.DocValues; -import org.apache.lucene.index.IndexableFieldType; +import org.apache.lucene.index.IndexWriter; // javadocs import org.apache.lucene.index.IndexableField; -import org.apache.lucene.index.DocValue; +import org.apache.lucene.index.IndexableFieldType; import org.apache.lucene.util.BytesRef; /** - * A field is a section of a Document. Each field has two parts, a name and a - * value. Values may be free text, provided as a String or as a Reader, or they - * may be atomic keywords, which are not further processed. Such keywords may be - * used to represent dates, urls, etc. Fields are optionally stored in the + * Expert: directly creata a field for a document. Most + * users should use one of the sugar subclasses: {@link + * NumericField}, {@link DocValuesField}, {@link + * StringField}, {@link TextField}, {@link StoredField}. + * + *

A field is a section of a Document. Each field has three + * parts: name, type andvalue. Values may be text + * (String, Reader or pre-analyzed TokenStream), binary + * (byte[]), or numeric (a Number). Fields are optionally stored in the * index, so that they may be returned with hits on the document. + * *

- * Note, Field instances are instantiated with a {@link IndexableFieldType}. Making changes - * to the state of the FieldType will impact any Field it is used in, therefore - * it is strongly recommended that no changes are made after Field instantiation. + * NOTE: the field type is an {@link IndexableFieldType}. Making changes + * to the state of the IndexableFieldType will impact any + * Field it is used in. It is strongly recommended that no + * changes be made after Field instantiation. */ public class Field implements IndexableField { - - protected IndexableFieldType type; - protected String name = "body"; - // the data object for all different kind of field values + + protected final FieldType type; + protected final String name; + + // Field's value: protected Object fieldsData; - // pre-analyzed tokenStream for indexed fields + + // Pre-analyzed tokenStream for indexed fields; this is + // separate from fieldsData because you are allowed to + // have both; eg maybe field has a String value but you + // customize how it's tokenized: protected TokenStream tokenStream; - // length/offset for all primitive types - protected DocValue docValue; - + + protected transient NumericTokenStream numericTokenStream; + protected float boost = 1.0f; - public Field(String name, IndexableFieldType type) { + protected Field(String name, FieldType type) { + if (name == null) { + throw new IllegalArgumentException("name cannot be null"); + } this.name = name; + if (type == null) { + throw new IllegalArgumentException("type cannot be null"); + } this.type = type; } - - public Field(String name, Reader reader, IndexableFieldType type) { + + /** + * Create field with Reader value. + */ + public Field(String name, Reader reader, FieldType type) { if (name == null) { - throw new NullPointerException("name cannot be null"); + throw new IllegalArgumentException("name cannot be null"); + } + if (type == null) { + throw new IllegalArgumentException("type cannot be null"); } if (reader == null) { throw new NullPointerException("reader cannot be null"); } + if (type.stored()) { + throw new IllegalArgumentException("fields with a Reader value cannot be stored"); + } if (type.indexed() && !type.tokenized()) { - throw new IllegalArgumentException("Non-tokenized fields must use String values"); + throw new IllegalArgumentException("non-tokenized fields must use String values"); } this.name = name; this.fieldsData = reader; this.type = type; } - - public Field(String name, TokenStream tokenStream, IndexableFieldType type) { + + /** + * Create field with TokenStream value. + */ + public Field(String name, TokenStream tokenStream, FieldType type) { if (name == null) { - throw new NullPointerException("name cannot be null"); + throw new IllegalArgumentException("name cannot be null"); } if (tokenStream == null) { throw new NullPointerException("tokenStream cannot be null"); } - if (type.indexed() && !type.tokenized()) { - throw new IllegalArgumentException("Non-tokenized fields must use String values"); + if (!type.indexed() || !type.tokenized()) { + throw new IllegalArgumentException("TokenStream fields must be indexed and tokenized"); + } + if (type.stored()) { + throw new IllegalArgumentException("TokenStream fields cannot be stored"); } this.name = name; @@ -93,25 +126,42 @@ public class Field implements IndexableField { this.type = type; } - public Field(String name, byte[] value, IndexableFieldType type) { + /** + * Create field with binary value. + */ + public Field(String name, byte[] value, FieldType type) { this(name, value, 0, value.length, type); } - public Field(String name, byte[] value, int offset, int length, IndexableFieldType type) { + /** + * Create field with binary value. + */ + public Field(String name, byte[] value, int offset, int length, FieldType type) { this(name, new BytesRef(value, offset, length), type); } - public Field(String name, BytesRef bytes, IndexableFieldType type) { - if (type.indexed() && !type.tokenized()) { - throw new IllegalArgumentException("Non-tokenized fields must use String values"); + /** + * Create field with binary value. + * + *

NOTE: the provided BytesRef is not copied so be sure + * not to change it until you're done with this field. + */ + public Field(String name, BytesRef bytes, FieldType type) { + if (name == null) { + throw new IllegalArgumentException("name cannot be null"); + } + if (type.indexed()) { + throw new IllegalArgumentException("Fields with BytesRef values cannot be indexed"); } - this.fieldsData = bytes; this.type = type; this.name = name; } - - public Field(String name, String value, IndexableFieldType type) { + + /** + * Create field with String value. + */ + public Field(String name, String value, FieldType type) { if (name == null) { throw new IllegalArgumentException("name cannot be null"); } @@ -122,7 +172,7 @@ public class Field implements IndexableField { throw new IllegalArgumentException("it doesn't make sense to have a field that " + "is neither indexed nor stored"); } - if (!type.indexed() && !type.tokenized() && (type.storeTermVectors())) { + if (!type.indexed() && (type.storeTermVectors())) { throw new IllegalArgumentException("cannot store term vector information " + "for a field that is not indexed"); } @@ -132,6 +182,54 @@ public class Field implements IndexableField { this.fieldsData = value; } + /** + * Create field with an int value. + */ + public Field(String name, int value, FieldType type) { + if (name == null) { + throw new IllegalArgumentException("name cannot be null"); + } + this.type = type; + this.name = name; + this.fieldsData = Integer.valueOf(value); + } + + /** + * Create field with an long value. + */ + public Field(String name, long value, FieldType type) { + if (name == null) { + throw new IllegalArgumentException("name cannot be null"); + } + this.type = type; + this.name = name; + this.fieldsData = Long.valueOf(value); + } + + /** + * Create field with a float value. + */ + public Field(String name, float value, FieldType type) { + if (name == null) { + throw new IllegalArgumentException("name cannot be null"); + } + this.type = type; + this.name = name; + this.fieldsData = Float.valueOf(value); + } + + /** + * Create field with a double value. + */ + public Field(String name, double value, FieldType type) { + if (name == null) { + throw new IllegalArgumentException("name cannot be null"); + } + this.type = type; + this.name = name; + this.fieldsData = Double.valueOf(value); + } + /** * The value of the field as a String, or null. If null, the Reader value or * binary value is used. Exactly one of stringValue(), readerValue(), and @@ -175,9 +273,8 @@ public class Field implements IndexableField { *

*/ public void setValue(String value) { - if (isBinary()) { - throw new IllegalArgumentException( - "cannot set a String value on a binary field"); + if (!(fieldsData instanceof String)) { + throw new IllegalArgumentException("cannot change value type from " + fieldsData.getClass().getSimpleName() + " to String"); } fieldsData = value; } @@ -187,13 +284,8 @@ public class Field implements IndexableField { * href="#setValue(java.lang.String)">setValue(String). */ public void setValue(Reader value) { - if (isBinary()) { - throw new IllegalArgumentException( - "cannot set a Reader value on a binary field"); - } - if (type.stored()) { - throw new IllegalArgumentException( - "cannot set a Reader value on a stored field"); + if (!(fieldsData instanceof Reader)) { + throw new IllegalArgumentException("cannot change value type from " + fieldsData.getClass().getSimpleName() + " to Reader"); } fieldsData = value; } @@ -203,13 +295,66 @@ public class Field implements IndexableField { * href="#setValue(java.lang.String)">setValue(String). */ public void setValue(byte[] value) { - if (!isBinary()) { - throw new IllegalArgumentException( - "cannot set a byte[] value on a non-binary field"); - } - fieldsData = new BytesRef(value); + setValue(new BytesRef(value)); } - + + /** + * Expert: change the value of this field. See setValue(String). + * + *

NOTE: the provided BytesRef is not copied so be sure + * not to change it until you're done with this field. + */ + public void setValue(BytesRef value) { + if (!(fieldsData instanceof BytesRef)) { + throw new IllegalArgumentException("cannot change value type from " + fieldsData.getClass().getSimpleName() + " to BytesRef"); + } + if (type.indexed()) { + throw new IllegalArgumentException("cannot set a Reader value on an indexed field"); + } + fieldsData = value; + } + + public void setValue(int value) { + if (!(fieldsData instanceof Integer)) { + throw new IllegalArgumentException("cannot change value type from " + fieldsData.getClass().getSimpleName() + " to Integer"); + } + if (numericTokenStream != null) { + numericTokenStream.setIntValue(value); + } + fieldsData = Integer.valueOf(value); + } + + public void setValue(long value) { + if (!(fieldsData instanceof Long)) { + throw new IllegalArgumentException("cannot change value type from " + fieldsData.getClass().getSimpleName() + " to Long"); + } + if (numericTokenStream != null) { + numericTokenStream.setLongValue(value); + } + fieldsData = Long.valueOf(value); + } + + public void setValue(float value) { + if (!(fieldsData instanceof Float)) { + throw new IllegalArgumentException("cannot change value type from " + fieldsData.getClass().getSimpleName() + " to Float"); + } + if (numericTokenStream != null) { + numericTokenStream.setFloatValue(value); + } + fieldsData = Float.valueOf(value); + } + + public void setValue(double value) { + if (!(fieldsData instanceof Double)) { + throw new IllegalArgumentException("cannot change value type from " + fieldsData.getClass().getSimpleName() + " to Double"); + } + if (numericTokenStream != null) { + numericTokenStream.setDoubleValue(value); + } + fieldsData = Double.valueOf(value); + } + /** * Expert: sets the token stream to be used for indexing and causes * isIndexed() and isTokenized() to return true. May be combined with stored @@ -217,8 +362,10 @@ public class Field implements IndexableField { */ public void setTokenStream(TokenStream tokenStream) { if (!type.indexed() || !type.tokenized()) { - throw new IllegalArgumentException( - "cannot set token stream on non indexed and tokenized field"); + throw new IllegalArgumentException("TokenStream fields must be indexed and tokenized"); + } + if (type.numericType() != null) { + throw new IllegalArgumentException("cannot set private TokenStream on numeric fields"); } this.tokenStream = tokenStream; } @@ -248,31 +395,21 @@ public class Field implements IndexableField { public void setBoost(float boost) { this.boost = boost; } - - public boolean numeric() { - return false; - } public Number numericValue() { - return null; - } - - public NumericField.DataType numericDataType() { - return null; - } - - public BytesRef binaryValue() { - if (!isBinary()) { - return null; + if (fieldsData instanceof Number) { + return (Number) fieldsData; } else { - return (BytesRef) fieldsData; + return null; } } - - /** methods from inner IndexableFieldType */ - - public boolean isBinary() { - return fieldsData instanceof BytesRef; + + public BytesRef binaryValue() { + if (fieldsData instanceof BytesRef) { + return (BytesRef) fieldsData; + } else { + return null; + } } /** Prints a Field for human consumption. */ @@ -292,22 +429,8 @@ public class Field implements IndexableField { return result.toString(); } - public void setDocValue(DocValue docValue) { - this.docValue = docValue; - } - - @Override - public DocValue docValue() { - return null; - } - - @Override - public DocValues.Type docValueType() { - return null; - } - - /** Returns FieldType for this field. */ - public IndexableFieldType fieldType() { + /** Returns the {@link FieldType} for this field. */ + public FieldType fieldType() { return type; } @@ -319,6 +442,38 @@ public class Field implements IndexableField { return null; } + final NumericField.DataType numericType = fieldType().numericType(); + if (numericType != null) { + if (numericTokenStream == null) { + // lazy init the TokenStream as it is heavy to instantiate + // (attributes,...) if not needed (stored field loading) + numericTokenStream = new NumericTokenStream(type.numericPrecisionStep()); + // initialize value in TokenStream + final Number val = (Number) fieldsData; + switch (numericType) { + case INT: + numericTokenStream.setIntValue(val.intValue()); + break; + case LONG: + numericTokenStream.setLongValue(val.longValue()); + break; + case FLOAT: + numericTokenStream.setFloatValue(val.floatValue()); + break; + case DOUBLE: + numericTokenStream.setDoubleValue(val.doubleValue()); + break; + default: + assert false : "Should never get here"; + } + } else { + // OK -- previously cached and we already updated if + // setters were called. + } + + return numericTokenStream; + } + if (!fieldType().tokenized()) { if (stringValue() == null) { throw new IllegalArgumentException("Non-Tokenized Fields must have a String value"); @@ -355,6 +510,449 @@ public class Field implements IndexableField { return analyzer.tokenStream(name(), new StringReader(stringValue())); } - throw new IllegalArgumentException("Field must have either TokenStream, String or Reader value"); + throw new IllegalArgumentException("Field must have either TokenStream, String, Reader or Number value"); + } + + + // + // Deprecated transition API below: + // + + /** Specifies whether and how a field should be stored. + * + * @deprecated This is here only to ease transition from + * the pre-4.0 APIs. */ + @Deprecated + public static enum Store { + + /** Store the original field value in the index. This is useful for short texts + * like a document's title which should be displayed with the results. The + * value is stored in its original form, i.e. no analyzer is used before it is + * stored. + */ + YES { + @Override + public boolean isStored() { return true; } + }, + + /** Do not store the field value in the index. */ + NO { + @Override + public boolean isStored() { return false; } + }; + + public abstract boolean isStored(); + } + + /** Specifies whether and how a field should be indexed. + * + * @deprecated This is here only to ease transition from + * the pre-4.0 APIs. */ + @Deprecated + public static enum Index { + + /** Do not index the field value. This field can thus not be searched, + * but one can still access its contents provided it is + * {@link Field.Store stored}. */ + NO { + @Override + public boolean isIndexed() { return false; } + @Override + public boolean isAnalyzed() { return false; } + @Override + public boolean omitNorms() { return true; } + }, + + /** Index the tokens produced by running the field's + * value through an Analyzer. This is useful for + * common text. */ + ANALYZED { + @Override + public boolean isIndexed() { return true; } + @Override + public boolean isAnalyzed() { return true; } + @Override + public boolean omitNorms() { return false; } + }, + + /** Index the field's value without using an Analyzer, so it can be searched. + * As no analyzer is used the value will be stored as a single term. This is + * useful for unique Ids like product numbers. + */ + NOT_ANALYZED { + @Override + public boolean isIndexed() { return true; } + @Override + public boolean isAnalyzed() { return false; } + @Override + public boolean omitNorms() { return false; } + }, + + /** Expert: Index the field's value without an Analyzer, + * and also disable the indexing of norms. Note that you + * can also separately enable/disable norms by calling + * {@link FieldType#setOmitNorms}. No norms means that + * index-time field and document boosting and field + * length normalization are disabled. The benefit is + * less memory usage as norms take up one byte of RAM + * per indexed field for every document in the index, + * during searching. Note that once you index a given + * field with norms enabled, disabling norms will + * have no effect. In other words, for this to have the + * above described effect on a field, all instances of + * that field must be indexed with NOT_ANALYZED_NO_NORMS + * from the beginning. */ + NOT_ANALYZED_NO_NORMS { + @Override + public boolean isIndexed() { return true; } + @Override + public boolean isAnalyzed() { return false; } + @Override + public boolean omitNorms() { return true; } + }, + + /** Expert: Index the tokens produced by running the + * field's value through an Analyzer, and also + * separately disable the storing of norms. See + * {@link #NOT_ANALYZED_NO_NORMS} for what norms are + * and why you may want to disable them. */ + ANALYZED_NO_NORMS { + @Override + public boolean isIndexed() { return true; } + @Override + public boolean isAnalyzed() { return true; } + @Override + public boolean omitNorms() { return true; } + }; + + /** Get the best representation of the index given the flags. */ + public static Index toIndex(boolean indexed, boolean analyzed) { + return toIndex(indexed, analyzed, false); + } + + /** Expert: Get the best representation of the index given the flags. */ + public static Index toIndex(boolean indexed, boolean analyzed, boolean omitNorms) { + + // If it is not indexed nothing else matters + if (!indexed) { + return Index.NO; + } + + // typical, non-expert + if (!omitNorms) { + if (analyzed) { + return Index.ANALYZED; + } + return Index.NOT_ANALYZED; + } + + // Expert: Norms omitted + if (analyzed) { + return Index.ANALYZED_NO_NORMS; + } + return Index.NOT_ANALYZED_NO_NORMS; + } + + public abstract boolean isIndexed(); + public abstract boolean isAnalyzed(); + public abstract boolean omitNorms(); + } + + /** Specifies whether and how a field should have term vectors. + * + * @deprecated This is here only to ease transition from + * the pre-4.0 APIs. */ + @Deprecated + public static enum TermVector { + + /** Do not store term vectors. + */ + NO { + @Override + public boolean isStored() { return false; } + @Override + public boolean withPositions() { return false; } + @Override + public boolean withOffsets() { return false; } + }, + + /** Store the term vectors of each document. A term vector is a list + * of the document's terms and their number of occurrences in that document. */ + YES { + @Override + public boolean isStored() { return true; } + @Override + public boolean withPositions() { return false; } + @Override + public boolean withOffsets() { return false; } + }, + + /** + * Store the term vector + token position information + * + * @see #YES + */ + WITH_POSITIONS { + @Override + public boolean isStored() { return true; } + @Override + public boolean withPositions() { return true; } + @Override + public boolean withOffsets() { return false; } + }, + + /** + * Store the term vector + Token offset information + * + * @see #YES + */ + WITH_OFFSETS { + @Override + public boolean isStored() { return true; } + @Override + public boolean withPositions() { return false; } + @Override + public boolean withOffsets() { return true; } + }, + + /** + * Store the term vector + Token position and offset information + * + * @see #YES + * @see #WITH_POSITIONS + * @see #WITH_OFFSETS + */ + WITH_POSITIONS_OFFSETS { + @Override + public boolean isStored() { return true; } + @Override + public boolean withPositions() { return true; } + @Override + public boolean withOffsets() { return true; } + }; + + /** Get the best representation of a TermVector given the flags. */ + public static TermVector toTermVector(boolean stored, boolean withOffsets, boolean withPositions) { + + // If it is not stored, nothing else matters. + if (!stored) { + return TermVector.NO; + } + + if (withOffsets) { + if (withPositions) { + return Field.TermVector.WITH_POSITIONS_OFFSETS; + } + return Field.TermVector.WITH_OFFSETS; + } + + if (withPositions) { + return Field.TermVector.WITH_POSITIONS; + } + return Field.TermVector.YES; + } + + public abstract boolean isStored(); + public abstract boolean withPositions(); + public abstract boolean withOffsets(); + } + + /** Translates the pre-4.0 enums for specifying how a + * field should be indexed into the 4.0 {@link FieldType} + * approach. + * + * @deprecated This is here only to ease transition from + * the pre-4.0 APIs. + */ + @Deprecated + public static final FieldType translateFieldType(Store store, Index index, TermVector termVector) { + final FieldType ft = new FieldType(); + + ft.setStored(store == Store.YES); + + switch(index) { + case ANALYZED: + ft.setIndexed(true); + ft.setTokenized(true); + break; + case ANALYZED_NO_NORMS: + ft.setIndexed(true); + ft.setTokenized(true); + ft.setOmitNorms(true); + break; + case NOT_ANALYZED: + ft.setIndexed(true); + break; + case NOT_ANALYZED_NO_NORMS: + ft.setIndexed(true); + ft.setOmitNorms(true); + break; + case NO: + break; + } + + switch(termVector) { + case NO: + break; + case YES: + ft.setStoreTermVectors(true); + break; + case WITH_POSITIONS: + ft.setStoreTermVectors(true); + ft.setStoreTermVectorPositions(true); + break; + case WITH_OFFSETS: + ft.setStoreTermVectors(true); + ft.setStoreTermVectorOffsets(true); + break; + case WITH_POSITIONS_OFFSETS: + ft.setStoreTermVectors(true); + ft.setStoreTermVectorPositions(true); + ft.setStoreTermVectorOffsets(true); + break; + } + ft.freeze(); + return ft; + } + + /** + * Create a field by specifying its name, value and how it will + * be saved in the index. Term vectors will not be stored in the index. + * + * @param name The name of the field + * @param value The string to process + * @param store Whether value should be stored in the index + * @param index Whether the field should be indexed, and if so, if it should + * be tokenized before indexing + * @throws NullPointerException if name or value is null + * @throws IllegalArgumentException if the field is neither stored nor indexed + * + * @deprecated Use {@link StringField}, {@link TextField} instead. */ + @Deprecated + public Field(String name, String value, Store store, Index index) { + this(name, value, translateFieldType(store, index, TermVector.NO)); + } + + /** + * Create a field by specifying its name, value and how it will + * be saved in the index. + * + * @param name The name of the field + * @param value The string to process + * @param store Whether value should be stored in the index + * @param index Whether the field should be indexed, and if so, if it should + * be tokenized before indexing + * @param termVector Whether term vector should be stored + * @throws NullPointerException if name or value is null + * @throws IllegalArgumentException in any of the following situations: + *

    + *
  • the field is neither stored nor indexed
  • + *
  • the field is not indexed but termVector is TermVector.YES
  • + *
+ * + * @deprecated Use {@link StringField}, {@link TextField} instead. */ + @Deprecated + public Field(String name, String value, Store store, Index index, TermVector termVector) { + this(name, value, translateFieldType(store, index, termVector)); + } + + /** + * Create a tokenized and indexed field that is not stored. Term vectors will + * not be stored. The Reader is read only when the Document is added to the index, + * i.e. you may not close the Reader until {@link IndexWriter#addDocument} + * has been called. + * + * @param name The name of the field + * @param reader The reader with the content + * @throws NullPointerException if name or reader is null + * + * @deprecated Use {@link TextField} instead. + */ + @Deprecated + public Field(String name, Reader reader) { + this(name, reader, TermVector.NO); + } + + /** + * Create a tokenized and indexed field that is not stored, optionally with + * storing term vectors. The Reader is read only when the Document is added to the index, + * i.e. you may not close the Reader until {@link IndexWriter#addDocument} + * has been called. + * + * @param name The name of the field + * @param reader The reader with the content + * @param termVector Whether term vector should be stored + * @throws NullPointerException if name or reader is null + * + * @deprecated Use {@link TextField} instead. + */ + @Deprecated + public Field(String name, Reader reader, TermVector termVector) { + this(name, reader, translateFieldType(Store.NO, Index.ANALYZED, termVector)); + } + + /** + * Create a tokenized and indexed field that is not stored. Term vectors will + * not be stored. This is useful for pre-analyzed fields. + * The TokenStream is read only when the Document is added to the index, + * i.e. you may not close the TokenStream until {@link IndexWriter#addDocument} + * has been called. + * + * @param name The name of the field + * @param tokenStream The TokenStream with the content + * @throws NullPointerException if name or tokenStream is null + * + * @deprecated Use {@link TextField} instead + */ + @Deprecated + public Field(String name, TokenStream tokenStream) { + this(name, tokenStream, TermVector.NO); + } + + /** + * Create a tokenized and indexed field that is not stored, optionally with + * storing term vectors. This is useful for pre-analyzed fields. + * The TokenStream is read only when the Document is added to the index, + * i.e. you may not close the TokenStream until {@link IndexWriter#addDocument} + * has been called. + * + * @param name The name of the field + * @param tokenStream The TokenStream with the content + * @param termVector Whether term vector should be stored + * @throws NullPointerException if name or tokenStream is null + * + * @deprecated Use {@link TextField} instead + */ + @Deprecated + public Field(String name, TokenStream tokenStream, TermVector termVector) { + this(name, tokenStream, translateFieldType(Store.NO, Index.ANALYZED, termVector)); + } + + /** + * Create a stored field with binary value. Optionally the value may be compressed. + * + * @param name The name of the field + * @param value The binary value + * + * @deprecated Use {@link StoredField} instead. + */ + @Deprecated + public Field(String name, byte[] value) { + this(name, value, translateFieldType(Store.YES, Index.NO, TermVector.NO)); + } + + /** + * Create a stored field with binary value. Optionally the value may be compressed. + * + * @param name The name of the field + * @param value The binary value + * @param offset Starting offset in value where this Field's bytes are + * @param length Number of bytes to use for this Field, starting at offset + * + * @deprecated Use {@link StoredField} instead. + */ + @Deprecated + public Field(String name, byte[] value, int offset, int length) { + this(name, value, offset, length, translateFieldType(Store.YES, Index.NO, TermVector.NO)); } } diff --git a/lucene/src/java/org/apache/lucene/document/FieldType.java b/lucene/src/java/org/apache/lucene/document/FieldType.java index a5bc9c1d60c..319b65fd35a 100644 --- a/lucene/src/java/org/apache/lucene/document/FieldType.java +++ b/lucene/src/java/org/apache/lucene/document/FieldType.java @@ -17,8 +17,11 @@ package org.apache.lucene.document; * limitations under the License. */ +import org.apache.lucene.index.DocValues; import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.IndexableFieldType; +import org.apache.lucene.search.NumericRangeQuery; // javadocs +import org.apache.lucene.util.NumericUtils; public class FieldType implements IndexableFieldType { @@ -30,9 +33,12 @@ public class FieldType implements IndexableFieldType { private boolean storeTermVectorPositions; private boolean omitNorms; private IndexOptions indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS; + private DocValues.Type docValueType; + private NumericField.DataType numericType; private boolean frozen; + private int numericPrecisionStep = NumericUtils.PRECISION_STEP_DEFAULT; - public FieldType(IndexableFieldType ref) { + public FieldType(FieldType ref) { this.indexed = ref.indexed(); this.stored = ref.stored(); this.tokenized = ref.tokenized(); @@ -41,6 +47,8 @@ public class FieldType implements IndexableFieldType { this.storeTermVectorPositions = ref.storeTermVectorPositions(); this.omitNorms = ref.omitNorms(); this.indexOptions = ref.indexOptions(); + this.docValueType = ref.docValueType(); + this.numericType = ref.numericType(); // Do not copy frozen! } @@ -49,7 +57,7 @@ public class FieldType implements IndexableFieldType { private void checkIfFrozen() { if (frozen) { - throw new IllegalStateException(); + throw new IllegalStateException("this FieldType is already frozen and cannot be changed"); } } @@ -134,6 +142,42 @@ public class FieldType implements IndexableFieldType { this.indexOptions = value; } + public void setDocValueType(DocValues.Type type) { + checkIfFrozen(); + docValueType = type; + } + + @Override + public DocValues.Type docValueType() { + return docValueType; + } + + public void setNumericType(NumericField.DataType type) { + checkIfFrozen(); + numericType = type; + } + + /** Numeric {@link NumericField.DataType}; if + * non-null then the field's value will be indexed + * numerically so that {@link NumericRangeQuery} can be + * used at search time. */ + public NumericField.DataType numericType() { + return numericType; + } + + public void setNumericPrecisionStep(int precisionStep) { + checkIfFrozen(); + if (precisionStep < 1) { + throw new IllegalArgumentException("precisionStep must be >= 1 (got " + precisionStep + ")"); + } + this.numericPrecisionStep = precisionStep; + } + + /** Precision step for numeric field. */ + public int numericPrecisionStep() { + return numericPrecisionStep; + } + /** Prints a Field for human consumption. */ @Override public final String toString() { @@ -172,6 +216,16 @@ public class FieldType implements IndexableFieldType { result.append(",indexOptions="); result.append(indexOptions); } + if (numericType != null) { + result.append(",numericType="); + result.append(numericType); + result.append(",numericPrecisionStep="); + result.append(numericPrecisionStep); + } + } + if (docValueType != null) { + result.append(",docValueType="); + result.append(docValueType); } return result.toString(); diff --git a/lucene/src/java/org/apache/lucene/document/NumericField.java b/lucene/src/java/org/apache/lucene/document/NumericField.java index e0b52cf7259..1edb9e702a0 100644 --- a/lucene/src/java/org/apache/lucene/document/NumericField.java +++ b/lucene/src/java/org/apache/lucene/document/NumericField.java @@ -17,17 +17,14 @@ package org.apache.lucene.document; * limitations under the License. */ -import java.io.Reader; -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.NumericTokenStream; -import org.apache.lucene.index.FieldInfo.IndexOptions; +import org.apache.lucene.analysis.NumericTokenStream; // javadocs import org.apache.lucene.document.NumericField.DataType; -import org.apache.lucene.util.NumericUtils; -import org.apache.lucene.search.NumericRangeQuery; // javadocs -import org.apache.lucene.search.NumericRangeFilter; // javadocs +import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.search.FieldCache; // javadocs +import org.apache.lucene.search.NumericRangeFilter; // javadocs +import org.apache.lucene.search.NumericRangeQuery; // javadocs +import org.apache.lucene.util.NumericUtils; /** *

@@ -36,20 +33,20 @@ import org.apache.lucene.search.FieldCache; // javadocs * int value: * *

- * document.add(new NumericField(name).setIntValue(value));
+ * document.add(new NumericField(name, value));
  * 
* * For optimal performance, re-use the NumericField and * {@link Document} instance for more than one document: * *
- *  NumericField field = new NumericField(name);
+ *  NumericField field = new NumericField(name, NumericField.DataType.INT);
  *  Document document = new Document();
  *  document.add(field);
  * 
  *  for(all documents) {
  *    ...
- *    field.setIntValue(value)
+ *    field.setValue(value)
  *    writer.addDocument(document);
  *    ...
  *  }
@@ -77,8 +74,8 @@ import org.apache.lucene.search.FieldCache; // javadocs
  *
  * 

By default, a NumericField's value is not stored but * is indexed for range filtering and sorting. You can use - * the {@link #NumericField(String, FieldType)} - * constructor if you need to change these defaults.

+ * {@link Field#Field(String,Number,FieldType)} + * if you need to change these defaults.

* *

You may add the same field name as a NumericField to * the same document more than once. Range querying and @@ -104,8 +101,8 @@ import org.apache.lucene.search.FieldCache; // javadocs * but may result in faster range search performance. The * default value, 4, was selected for a reasonable tradeoff * of disk space consumption versus performance. You can - * use the expert constructor {@link - * #NumericField(String,int, FieldType)} if you'd + * create a custom {@link FieldType} and invoke the {@link + * FieldType#setNumericPrecisionStep} method if you'd * like to change the value. Note that you must also * specify a congruent value when creating {@link * NumericRangeQuery} or {@link NumericRangeFilter}. @@ -137,244 +134,90 @@ public final class NumericField extends Field { /** Data type of the value in {@link NumericField}. * @since 3.2 */ - public static enum DataType { INT, LONG, FLOAT, DOUBLE } + public static enum DataType {INT, LONG, FLOAT, DOUBLE} - public static final FieldType TYPE_UNSTORED = new FieldType(); - public static final FieldType TYPE_STORED = new FieldType(); - static { - TYPE_UNSTORED.setIndexed(true); - TYPE_UNSTORED.setTokenized(true); - TYPE_UNSTORED.setOmitNorms(true); - TYPE_UNSTORED.setIndexOptions(IndexOptions.DOCS_ONLY); - TYPE_UNSTORED.freeze(); - - TYPE_STORED.setIndexed(true); - TYPE_STORED.setStored(true); - TYPE_STORED.setTokenized(true); - TYPE_STORED.setOmitNorms(true); - TYPE_STORED.setIndexOptions(IndexOptions.DOCS_ONLY); - TYPE_STORED.freeze(); + /** @lucene.experimental */ + public static FieldType getFieldType(DataType type, boolean stored) { + final FieldType ft = new FieldType(); + ft.setIndexed(true); + ft.setStored(stored); + ft.setTokenized(true); + ft.setOmitNorms(true); + ft.setIndexOptions(IndexOptions.DOCS_ONLY); + ft.setNumericType(type); + ft.freeze(); + return ft; } - //public static enum DataType { INT, LONG, FLOAT, DOUBLE } - - private DataType dataType; - private transient NumericTokenStream numericTS; - private final int precisionStep; - - /** - * Creates a field for numeric values using the default - * precisionStep {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). - * The instance is not yet initialized with a numeric value, before indexing a - * document containing this field, set a value using the various set - * ???Value() methods. This constructor creates an indexed, but not - * stored field. - * - * @param name - * the field name - */ - public NumericField(String name) { - this(name, NumericUtils.PRECISION_STEP_DEFAULT, NumericField.TYPE_UNSTORED); - } - - /** - * Creates a field for numeric values using the default - * precisionStep {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). - * The instance is not yet initialized with a numeric value, before indexing a - * document containing this field, set a value using the various set - * ???Value() methods. - * - * @param name - * the field name - * @param type - * if the defualt field should be altered, e.g. stored, - * {@link Document#getField} then returns {@code NumericField} - * instances on search results, or indexed using - * {@link NumericTokenStream} - */ - public NumericField(String name, FieldType type) { - this(name, NumericUtils.PRECISION_STEP_DEFAULT, type); - } - - /** - * Creates a field for numeric values with the specified - * precisionStep. The instance is not yet initialized with a - * numeric value, before indexing a document containing this field, set a - * value using the various set???Value() methods. This constructor - * creates an indexed, but not stored field. - * - * @param name - * the field name - * @param precisionStep - * the used precision step - */ - public NumericField(String name, int precisionStep) { - this(name, precisionStep, NumericField.TYPE_UNSTORED); - } - - /** - * Creates a field for numeric values with the specified - * precisionStep. The instance is not yet initialized with a - * numeric value, before indexing a document containing this field, set a - * value using the various set???Value() methods. - * - * @param name - * the field name - * @param precisionStep - * the used precision step - * @param type - * if the defualt field should be altered, e.g. stored, - * {@link Document#getField} then returns {@code NumericField} - * instances on search results, or indexed using - * {@link NumericTokenStream} - */ - public NumericField(String name, int precisionStep, FieldType type) { - super(name, type); - if (precisionStep < 1) - throw new IllegalArgumentException("precisionStep must be >=1"); - this.precisionStep = precisionStep; - } - - /** Returns a {@link NumericTokenStream} for indexing the numeric value. */ - public TokenStream tokenStream(Analyzer analyzer) { - if (!type.indexed()) return null; - if (numericTS == null) { - // lazy init the TokenStream as it is heavy to instantiate - // (attributes,...), - // if not needed (stored field loading) - numericTS = new NumericTokenStream(precisionStep); - // initialize value in TokenStream - if (fieldsData != null) { - assert dataType != null; - final Number val = (Number) fieldsData; - switch (dataType) { - case INT: - numericTS.setIntValue(val.intValue()); - break; - case LONG: - numericTS.setLongValue(val.longValue()); - break; - case FLOAT: - numericTS.setFloatValue(val.floatValue()); - break; - case DOUBLE: - numericTS.setDoubleValue(val.doubleValue()); - break; - default: - assert false : "Should never get here"; - } - } - } - return numericTS; - } - - /** Returns always null for numeric fields */ - public Reader readerValue() { - return null; - } - - /** - * Returns the numeric value as a string. It is recommended to - * use {@link Document#getField} instead that returns {@code NumericField} - * instances. You can then use {@link #numericValue} to return the stored - * value. - */ - @Override - public String stringValue() { - return (fieldsData == null) ? null : fieldsData.toString(); - } - - /** - * Returns the current numeric value as a subclass of {@link Number}, - * null if not yet initialized. - */ - @Override - public Number numericValue() { - return (Number) fieldsData; - } - - /** Returns the precision step. */ - public int getPrecisionStep() { - return precisionStep; - } - - /** - * Returns the data type of the current value, {@code null} if not yet set. - * - * @since 3.2 - */ - @Override - public DataType numericDataType() { - return dataType; - } + private static final FieldType INT_TYPE = getFieldType(DataType.INT, false); + private static final FieldType LONG_TYPE = getFieldType(DataType.LONG, false); + private static final FieldType FLOAT_TYPE = getFieldType(DataType.FLOAT, false); + private static final FieldType DOUBLE_TYPE = getFieldType(DataType.DOUBLE, false); - @Override - public boolean numeric() { - return true; - } - - /** - * Initializes the field with the supplied long value. - * - * @param value - * the numeric value - * @return this instance, because of this you can use it the following way: - * document.add(new NumericField(name, precisionStep).setLongValue(value)) - */ - public NumericField setLongValue(final long value) { - if (numericTS != null) numericTS.setLongValue(value); - fieldsData = Long.valueOf(value); - dataType = DataType.LONG; - return this; - } - - /** - * Initializes the field with the supplied int value. - * - * @param value - * the numeric value - * @return this instance, because of this you can use it the following way: - * document.add(new NumericField(name, precisionStep).setIntValue(value)) - */ - public NumericField setIntValue(final int value) { - if (numericTS != null) numericTS.setIntValue(value); + /** Creates an int NumericField with the provided value + * and default precisionStep {@link + * NumericUtils#PRECISION_STEP_DEFAULT} (4). */ + public NumericField(String name, int value) { + super(name, INT_TYPE); fieldsData = Integer.valueOf(value); - dataType = DataType.INT; - return this; } - - /** - * Initializes the field with the supplied double value. - * - * @param value - * the numeric value - * @return this instance, because of this you can use it the following way: - * document.add(new NumericField(name, precisionStep).setDoubleValue(value)) - */ - public NumericField setDoubleValue(final double value) { - if (numericTS != null) numericTS.setDoubleValue(value); - fieldsData = Double.valueOf(value); - dataType = DataType.DOUBLE; - return this; + + /** Creates a long NumericField with the provided value. + * and default precisionStep {@link + * NumericUtils#PRECISION_STEP_DEFAULT} (4). */ + public NumericField(String name, long value) { + super(name, LONG_TYPE); + fieldsData = Long.valueOf(value); } - - /** - * Initializes the field with the supplied float value. - * - * @param value - * the numeric value - * @return this instance, because of this you can use it the following way: - * document.add(new NumericField(name, precisionStep).setFloatValue(value)) - */ - public NumericField setFloatValue(final float value) { - if (numericTS != null) numericTS.setFloatValue(value); + + /** Creates a float NumericField with the provided value. + * and default precisionStep {@link + * NumericUtils#PRECISION_STEP_DEFAULT} (4). */ + public NumericField(String name, float value) { + super(name, FLOAT_TYPE); fieldsData = Float.valueOf(value); - dataType = DataType.FLOAT; - return this; + } + + /** Creates a double NumericField with the provided value. + * and default precisionStep {@link + * NumericUtils#PRECISION_STEP_DEFAULT} (4). */ + public NumericField(String name, double value) { + super(name, DOUBLE_TYPE); + fieldsData = Double.valueOf(value); } + public NumericField(String name, Number value, FieldType type) { + super(name, type); + final NumericField.DataType numericType = type.numericType(); + if (numericType == null) { + throw new IllegalArgumentException("FieldType.numericType() cannot be null"); + } + + switch(numericType) { + case INT: + if (!(value instanceof Integer)) { + throw new IllegalArgumentException("value must be an Integer but got " + value); + } + break; + case LONG: + if (!(value instanceof Long)) { + throw new IllegalArgumentException("value must be a Long but got " + value); + } + break; + case FLOAT: + if (!(value instanceof Float)) { + throw new IllegalArgumentException("value must be a Float but got " + value); + } + break; + case DOUBLE: + if (!(value instanceof Double)) { + throw new IllegalArgumentException("value must be a Double but got " + value); + } + break; + default: + assert false : "Should never get here"; + } + + fieldsData = value; + } } diff --git a/lucene/src/java/org/apache/lucene/document/StoredField.java b/lucene/src/java/org/apache/lucene/document/StoredField.java new file mode 100644 index 00000000000..100980ad4df --- /dev/null +++ b/lucene/src/java/org/apache/lucene/document/StoredField.java @@ -0,0 +1,71 @@ +package org.apache.lucene.document; + +import org.apache.lucene.index.IndexReader; // javadocs +import org.apache.lucene.search.IndexSearcher; // javadocs +import org.apache.lucene.util.BytesRef; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** A field whose value is stored so that {@link + * IndexSearcher#doc} and {@link IndexReader#document} will + * return the field and its value. */ +public final class StoredField extends Field { + + public final static FieldType TYPE; + static { + TYPE = new FieldType(); + TYPE.setStored(true); + TYPE.freeze(); + } + + public StoredField(String name, byte[] value) { + super(name, value, TYPE); + } + + public StoredField(String name, byte[] value, int offset, int length) { + super(name, value, offset, length, TYPE); + } + + public StoredField(String name, BytesRef value) { + super(name, value, TYPE); + } + + public StoredField(String name, String value) { + super(name, value, TYPE); + } + + public StoredField(String name, int value) { + super(name, TYPE); + fieldsData = value; + } + + public StoredField(String name, float value) { + super(name, TYPE); + fieldsData = value; + } + + public StoredField(String name, long value) { + super(name, TYPE); + fieldsData = value; + } + + public StoredField(String name, double value) { + super(name, TYPE); + fieldsData = value; + } +} diff --git a/lucene/src/java/org/apache/lucene/document/StringField.java b/lucene/src/java/org/apache/lucene/document/StringField.java index 3b66f5105c0..61272726eb9 100644 --- a/lucene/src/java/org/apache/lucene/document/StringField.java +++ b/lucene/src/java/org/apache/lucene/document/StringField.java @@ -51,12 +51,12 @@ public final class StringField extends Field { TYPE_STORED.setIndexOptions(IndexOptions.DOCS_ONLY); TYPE_STORED.freeze(); } - + /** Creates a new un-stored StringField */ public StringField(String name, String value) { super(name, value, TYPE_UNSTORED); } - + @Override public String stringValue() { return (fieldsData == null) ? null : fieldsData.toString(); diff --git a/lucene/src/java/org/apache/lucene/document/TextField.java b/lucene/src/java/org/apache/lucene/document/TextField.java index 8d2bf2bd517..7ab87375bcb 100644 --- a/lucene/src/java/org/apache/lucene/document/TextField.java +++ b/lucene/src/java/org/apache/lucene/document/TextField.java @@ -48,6 +48,8 @@ public final class TextField extends Field { TYPE_STORED.freeze(); } + // TODO: add sugar for term vectors...? + /** Creates a new un-stored TextField */ public TextField(String name, Reader reader) { super(name, reader, TextField.TYPE_UNSTORED); diff --git a/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java b/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java index 50b4808f88d..b6dabbc197c 100644 --- a/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java +++ b/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java @@ -26,11 +26,9 @@ import java.util.Map; import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.DocValuesConsumer; -import org.apache.lucene.codecs.DocValuesFormat; import org.apache.lucene.codecs.FieldInfosWriter; import org.apache.lucene.codecs.PerDocConsumer; import org.apache.lucene.index.DocumentsWriterPerThread.DocState; -import org.apache.lucene.index.DocValues; import org.apache.lucene.store.IOContext; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.IOUtils; @@ -82,17 +80,19 @@ final class DocFieldProcessor extends DocConsumer { fieldsWriter.flush(state); consumer.flush(childFields, state); + for (DocValuesConsumerAndDocID consumer : docValues.values()) { + consumer.docValuesConsumer.finish(state.numDocs); + } + // Important to save after asking consumer to flush so // consumer can alter the FieldInfo* if necessary. EG, // FreqProxTermsWriter does this with // FieldInfo.storePayload. FieldInfosWriter infosWriter = codec.fieldInfosFormat().getFieldInfosWriter(); infosWriter.write(state.directory, state.segmentName, state.fieldInfos, IOContext.DEFAULT); - for (DocValuesConsumerAndDocID consumers : docValues.values()) { - consumers.docValuesConsumer.finish(state.numDocs); - } + // close perDocConsumer during flush to ensure all files are flushed due to PerCodec CFS - IOUtils.close(perDocConsumers.values()); + IOUtils.close(perDocConsumer); } @Override @@ -112,7 +112,7 @@ final class DocFieldProcessor extends DocConsumer { field = next; } } - IOUtils.closeWhileHandlingException(perDocConsumers.values()); + IOUtils.closeWhileHandlingException(perDocConsumer); // TODO add abort to PerDocConsumer! try { @@ -132,7 +132,6 @@ final class DocFieldProcessor extends DocConsumer { } try { - PerDocConsumer perDocConsumer = perDocConsumers.get(0); if (perDocConsumer != null) { perDocConsumer.abort(); } @@ -176,7 +175,7 @@ final class DocFieldProcessor extends DocConsumer { fieldHash = new DocFieldProcessorPerField[2]; hashMask = 1; totalFieldCount = 0; - perDocConsumers.clear(); + perDocConsumer = null; docValues.clear(); } @@ -270,9 +269,9 @@ final class DocFieldProcessor extends DocConsumer { if (field.fieldType().stored()) { fieldsWriter.addField(field, fp.fieldInfo); } - final DocValue docValue = field.docValue(); - if (docValue != null) { - docValuesConsumer(field.docValueType(), docState, fp.fieldInfo).add(docState.docID, docValue); + final DocValues.Type dvType = field.fieldType().docValueType(); + if (dvType != null) { + docValuesConsumer(dvType, docState, fp.fieldInfo).add(docState.docID, field); } } @@ -310,6 +309,8 @@ final class DocFieldProcessor extends DocConsumer { } private static class DocValuesConsumerAndDocID { + // Only used to enforce that same DV field name is never + // added more than once per doc: public int docID; final DocValuesConsumer docValuesConsumer; @@ -319,7 +320,7 @@ final class DocFieldProcessor extends DocConsumer { } final private Map docValues = new HashMap(); - final private Map perDocConsumers = new HashMap(); + private PerDocConsumer perDocConsumer; DocValuesConsumer docValuesConsumer(DocValues.Type valueType, DocState docState, FieldInfo fieldInfo) throws IOException { @@ -333,12 +334,9 @@ final class DocFieldProcessor extends DocConsumer { return docValuesConsumerAndDocID.docValuesConsumer; } - PerDocConsumer perDocConsumer = perDocConsumers.get(0); if (perDocConsumer == null) { PerDocWriteState perDocWriteState = docState.docWriter.newPerDocWriteState(""); - DocValuesFormat dvFormat = docState.docWriter.codec.docValuesFormat(); - perDocConsumer = dvFormat.docsConsumer(perDocWriteState); - perDocConsumers.put(0, perDocConsumer); + perDocConsumer = docState.docWriter.codec.docValuesFormat().docsConsumer(perDocWriteState); } DocValuesConsumer docValuesConsumer = perDocConsumer.addValuesField(valueType, fieldInfo); fieldInfo.setDocValuesType(valueType); diff --git a/lucene/src/java/org/apache/lucene/index/DocValue.java b/lucene/src/java/org/apache/lucene/index/DocValue.java deleted file mode 100644 index 63b69cd86e7..00000000000 --- a/lucene/src/java/org/apache/lucene/index/DocValue.java +++ /dev/null @@ -1,53 +0,0 @@ -package org.apache.lucene.index; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -import java.util.Comparator; - -import org.apache.lucene.codecs.DocValuesConsumer; -import org.apache.lucene.document.DocValuesField; -import org.apache.lucene.util.BytesRef; - -/** - * Per document and field values consumed by {@link DocValuesConsumer}. - * @see DocValuesField - * - * @lucene.experimental - */ -public interface DocValue { - - /** - * Returns the set {@link BytesRef} or null if not set. - */ - public BytesRef getBytes(); - - /** - * Returns the set {@link BytesRef} comparator or null if not set - */ - public Comparator bytesComparator(); - - /** - * Returns the set floating point value or 0.0d if not set. - */ - public double getFloat(); - - /** - * Returns the set long value of 0 if not set. - */ - public long getInt(); - -} diff --git a/lucene/src/java/org/apache/lucene/index/DocValues.java b/lucene/src/java/org/apache/lucene/index/DocValues.java index 2703cc11000..8ba6d3a3baa 100644 --- a/lucene/src/java/org/apache/lucene/index/DocValues.java +++ b/lucene/src/java/org/apache/lucene/index/DocValues.java @@ -493,6 +493,7 @@ public abstract class DocValues implements Closeable { *

*/ FIXED_INTS_64, + /** * A 32 bit floating point value. By default there is no compression * applied. To fit custom float values into less than 32bit either a custom @@ -507,6 +508,7 @@ public abstract class DocValues implements Closeable { *

*/ FLOAT_32, + /** * * A 64 bit floating point value. By default there is no compression @@ -613,7 +615,6 @@ public abstract class DocValues implements Closeable { * @see SortedSource */ BYTES_FIXED_SORTED - } /** diff --git a/lucene/src/java/org/apache/lucene/index/IndexableField.java b/lucene/src/java/org/apache/lucene/index/IndexableField.java index 54ffca44497..267de2e140d 100644 --- a/lucene/src/java/org/apache/lucene/index/IndexableField.java +++ b/lucene/src/java/org/apache/lucene/index/IndexableField.java @@ -22,8 +22,6 @@ import java.io.Reader; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.document.NumericField; -import org.apache.lucene.index.DocValues; import org.apache.lucene.util.BytesRef; // TODO: how to handle versioning here...? @@ -37,17 +35,16 @@ import org.apache.lucene.util.BytesRef; public interface IndexableField { - // TODO: add attrs to this API? - /** Field name */ public String name(); - // NOTE: if doc/field impl has the notion of "doc level boost" - // it must be multiplied in w/ this field's boost - + /** {@link IndexableFieldType} describing the properties + * of this field. */ + public IndexableFieldType fieldType(); + /** Field boost (you must pre-multiply in any doc boost). */ public float boost(); - + /** Non-null if this field has a binary value */ public BytesRef binaryValue(); @@ -57,30 +54,9 @@ public interface IndexableField { /** Non-null if this field has a Reader value */ public Reader readerValue(); - // Numeric field: - /** True if this field is numeric */ - public boolean numeric(); - - /** Numeric {@link org.apache.lucene.document.NumericField.DataType}; only used if - * the field is numeric */ - public NumericField.DataType numericDataType(); - - /** Numeric value; only used if the field is numeric */ + /** Non-null if this field hasa numeric value */ public Number numericValue(); - /** - * Returns the IndexableFieldType describing the properties of this field - * - * @return IndexableFieldType for this field - */ - public IndexableFieldType fieldType(); - - /** Non-null if doc values should be indexed */ - public DocValue docValue(); - - /** DocValues type; only used if docValue is non-null */ - public DocValues.Type docValueType(); - /** * Creates the TokenStream used for indexing this field. If appropriate, * implementations should use the given Analyzer to create the TokenStreams. diff --git a/lucene/src/java/org/apache/lucene/index/IndexableFieldType.java b/lucene/src/java/org/apache/lucene/index/IndexableFieldType.java index d5c1abbe047..620503689ad 100644 --- a/lucene/src/java/org/apache/lucene/index/IndexableFieldType.java +++ b/lucene/src/java/org/apache/lucene/index/IndexableFieldType.java @@ -46,4 +46,8 @@ public interface IndexableFieldType { /** {@link IndexOptions}, describing what should be * recorded into the inverted index */ public IndexOptions indexOptions(); + + /** DocValues type; if non-null then the field's value + * will be indexed into docValues */ + public DocValues.Type docValueType(); } diff --git a/lucene/src/java/org/apache/lucene/index/NormsConsumer.java b/lucene/src/java/org/apache/lucene/index/NormsConsumer.java index e6e2f6454a2..1210611135c 100644 --- a/lucene/src/java/org/apache/lucene/index/NormsConsumer.java +++ b/lucene/src/java/org/apache/lucene/index/NormsConsumer.java @@ -39,11 +39,9 @@ import org.apache.lucene.util.IOUtils; final class NormsConsumer extends InvertedDocEndConsumer { private final NormsFormat normsFormat; private PerDocConsumer consumer; - private final DocumentsWriterPerThread dwpt; public NormsConsumer(DocumentsWriterPerThread dwpt) { normsFormat = dwpt.codec.normsFormat(); - this.dwpt = dwpt; } @Override @@ -75,8 +73,7 @@ final class NormsConsumer extends InvertedDocEndConsumer { } else if (fi.isIndexed) { anythingFlushed = true; final DocValuesConsumer valuesConsumer = newConsumer(new PerDocWriteState(state), fi); - final DocValuesField value = new DocValuesField(""); - value.setBytes(new BytesRef(new byte[] {0x00}), Type.BYTES_FIXED_STRAIGHT); + final DocValuesField value = new DocValuesField("", new BytesRef(new byte[] {0x0}), Type.BYTES_FIXED_STRAIGHT); valuesConsumer.add(state.numDocs-1, value); valuesConsumer.finish(state.numDocs); } diff --git a/lucene/src/java/org/apache/lucene/index/NormsConsumerPerField.java b/lucene/src/java/org/apache/lucene/index/NormsConsumerPerField.java index 09390372f78..136c0ce6bb2 100644 --- a/lucene/src/java/org/apache/lucene/index/NormsConsumerPerField.java +++ b/lucene/src/java/org/apache/lucene/index/NormsConsumerPerField.java @@ -29,8 +29,8 @@ public class NormsConsumerPerField extends InvertedDocEndConsumerPerField implem private final Similarity similarity; private final FieldInvertState fieldState; private DocValuesConsumer consumer; - private final DocValuesField value = new DocValuesField(""); private final BytesRef spare = new BytesRef(1); + private final DocValuesField value = new DocValuesField("", spare, Type.BYTES_FIXED_STRAIGHT); private final NormsConsumer parent; public NormsConsumerPerField(final DocInverterPerField docInverterPerField, final FieldInfo fieldInfo, NormsConsumer parent) { @@ -53,9 +53,7 @@ public class NormsConsumerPerField extends InvertedDocEndConsumerPerField implem if (fieldInfo.isIndexed && !fieldInfo.omitNorms) { DocValuesConsumer consumer = getConsumer(); spare.bytes[0] = similarity.computeNorm(fieldState); - value.setBytes(spare, Type.BYTES_FIXED_STRAIGHT); consumer.add(docState.docID, value); - } } diff --git a/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexNormsConsumer.java b/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexNormsConsumer.java index 0ec0cdef938..ecd7fb84f6e 100644 --- a/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexNormsConsumer.java +++ b/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexNormsConsumer.java @@ -22,12 +22,12 @@ import java.util.Arrays; import org.apache.lucene.codecs.DocValuesConsumer; import org.apache.lucene.codecs.PerDocConsumer; -import org.apache.lucene.index.DocValue; -import org.apache.lucene.index.DocValues; import org.apache.lucene.index.DocValues.Source; import org.apache.lucene.index.DocValues.Type; +import org.apache.lucene.index.DocValues; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.IndexFileNames; +import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.MergeState; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; @@ -133,8 +133,8 @@ class PreFlexNormsConsumer extends PerDocConsumer { } @Override - public void add(int docID, DocValue docValue) throws IOException { - add(docID, docValue.getBytes()); + public void add(int docID, IndexableField docValue) throws IOException { + add(docID, docValue.binaryValue()); } protected void add(int docID, BytesRef value) throws IOException { diff --git a/lucene/src/test-framework/java/org/apache/lucene/index/DocHelper.java b/lucene/src/test-framework/java/org/apache/lucene/index/DocHelper.java index e0b30f0ce7f..ab4233ece6e 100644 --- a/lucene/src/test-framework/java/org/apache/lucene/index/DocHelper.java +++ b/lucene/src/test-framework/java/org/apache/lucene/index/DocHelper.java @@ -26,13 +26,13 @@ import java.util.Random; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.MockTokenizer; -import org.apache.lucene.document.BinaryField; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; -import org.apache.lucene.document.StringField; -import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.StoredField; +import org.apache.lucene.document.StringField; import org.apache.lucene.document.TextField; +import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.search.similarities.SimilarityProvider; import org.apache.lucene.store.Directory; @@ -197,7 +197,7 @@ class DocHelper { LAZY_FIELD_BINARY_BYTES = "These are some binary field bytes".getBytes("UTF8"); } catch (UnsupportedEncodingException e) { } - lazyFieldBinary = new BinaryField(LAZY_FIELD_BINARY_KEY, LAZY_FIELD_BINARY_BYTES); + lazyFieldBinary = new StoredField(LAZY_FIELD_BINARY_KEY, LAZY_FIELD_BINARY_BYTES); fields[fields.length - 2] = lazyFieldBinary; LARGE_LAZY_FIELD_TEXT = buffer.toString(); largeLazyField = new Field(LARGE_LAZY_FIELD_KEY, LARGE_LAZY_FIELD_TEXT, customType); diff --git a/lucene/src/test-framework/java/org/apache/lucene/index/RandomIndexWriter.java b/lucene/src/test-framework/java/org/apache/lucene/index/RandomIndexWriter.java index 4d126e3700d..a03836d086e 100644 --- a/lucene/src/test-framework/java/org/apache/lucene/index/RandomIndexWriter.java +++ b/lucene/src/test-framework/java/org/apache/lucene/index/RandomIndexWriter.java @@ -25,10 +25,10 @@ import java.util.Random; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.codecs.Codec; -import org.apache.lucene.document.DocValuesField; import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.FieldType; import org.apache.lucene.index.IndexWriter; // javadoc -import org.apache.lucene.index.DocValues; import org.apache.lucene.search.Query; import org.apache.lucene.store.Directory; import org.apache.lucene.util.BytesRef; @@ -172,7 +172,10 @@ public class RandomIndexWriter implements Closeable { String name = "random_" + type.name() + "" + docValuesFieldPrefix; if ("Lucene3x".equals(codec.getName()) || doc.getField(name) != null) return; - DocValuesField docValuesField = new DocValuesField(name); + FieldType ft = new FieldType(); + ft.setDocValueType(type); + ft.freeze(); + final Field f; switch (type) { case BYTES_FIXED_DEREF: case BYTES_FIXED_STRAIGHT: @@ -186,40 +189,38 @@ public class RandomIndexWriter implements Closeable { fixedRef.grow(fixedBytesLength); fixedRef.length = fixedBytesLength; } - docValuesField.setBytes(fixedRef, type); + f = new Field(name, fixedRef, ft); break; case BYTES_VAR_DEREF: case BYTES_VAR_STRAIGHT: case BYTES_VAR_SORTED: - BytesRef ref = new BytesRef(_TestUtil.randomUnicodeString(random, 200)); - docValuesField.setBytes(ref, type); + f = new Field(name, new BytesRef(_TestUtil.randomUnicodeString(random, 200)), ft); break; case FLOAT_32: - docValuesField.setFloat(random.nextFloat()); + f = new Field(name, random.nextFloat(), ft); break; case FLOAT_64: - docValuesField.setFloat(random.nextDouble()); + f = new Field(name, random.nextDouble(), ft); break; case VAR_INTS: - docValuesField.setInt(random.nextLong()); + f = new Field(name, random.nextLong(), ft); break; case FIXED_INTS_16: - docValuesField.setInt(random.nextInt(Short.MAX_VALUE)); + f = new Field(name, random.nextInt(Short.MAX_VALUE), ft); break; case FIXED_INTS_32: - docValuesField.setInt(random.nextInt()); + f = new Field(name, random.nextInt(), ft); break; case FIXED_INTS_64: - docValuesField.setInt(random.nextLong()); + f = new Field(name, random.nextLong(), ft); break; case FIXED_INTS_8: - docValuesField.setInt(random.nextInt(128)); + f = new Field(name, random.nextInt(128), ft); break; default: throw new IllegalArgumentException("no such type: " + type); } - - doc.add(docValuesField); + doc.add(f); } private void maybeCommit() throws IOException { diff --git a/lucene/src/test/org/apache/lucene/codecs/lucene40/TestDocValues.java b/lucene/src/test/org/apache/lucene/codecs/lucene40/TestDocValues.java index 1d2f97c7eae..c72a1e21ea8 100644 --- a/lucene/src/test/org/apache/lucene/codecs/lucene40/TestDocValues.java +++ b/lucene/src/test/org/apache/lucene/codecs/lucene40/TestDocValues.java @@ -18,17 +18,21 @@ package org.apache.lucene.codecs.lucene40; */ import java.io.IOException; +import java.io.Reader; import java.util.Comparator; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.codecs.DocValuesConsumer; import org.apache.lucene.codecs.lucene40.values.Bytes; import org.apache.lucene.codecs.lucene40.values.Floats; import org.apache.lucene.codecs.lucene40.values.Ints; -import org.apache.lucene.index.DocValue; -import org.apache.lucene.index.DocValues; import org.apache.lucene.index.DocValues.SortedSource; import org.apache.lucene.index.DocValues.Source; import org.apache.lucene.index.DocValues.Type; +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.IndexableFieldType; import org.apache.lucene.store.Directory; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.Counter; @@ -175,9 +179,9 @@ public class TestDocValues extends LuceneTestCase { Directory dir = newDirectory(); final Counter trackBytes = Counter.newCounter(); DocValuesConsumer w = Ints.getWriter(dir, "test", trackBytes, Type.VAR_INTS, newIOContext(random)); - valueHolder.intValue = minMax[i][0]; + valueHolder.numberValue = minMax[i][0]; w.add(0, valueHolder); - valueHolder.intValue = minMax[i][1]; + valueHolder.numberValue = minMax[i][1]; w.add(1, valueHolder); w.finish(2); assertEquals(0, trackBytes.get()); @@ -212,7 +216,7 @@ public class TestDocValues extends LuceneTestCase { final Counter trackBytes = Counter.newCounter(); DocValuesConsumer w = Ints.getWriter(dir, "test", trackBytes, Type.FIXED_INTS_8, newIOContext(random)); for (int i = 0; i < sourceArray.length; i++) { - valueHolder.intValue = (long) sourceArray[i]; + valueHolder.numberValue = (long) sourceArray[i]; w.add(i, valueHolder); } w.finish(sourceArray.length); @@ -235,7 +239,7 @@ public class TestDocValues extends LuceneTestCase { final Counter trackBytes = Counter.newCounter(); DocValuesConsumer w = Ints.getWriter(dir, "test", trackBytes, Type.FIXED_INTS_16, newIOContext(random)); for (int i = 0; i < sourceArray.length; i++) { - valueHolder.intValue = (long) sourceArray[i]; + valueHolder.numberValue = (long) sourceArray[i]; w.add(i, valueHolder); } w.finish(sourceArray.length); @@ -258,7 +262,7 @@ public class TestDocValues extends LuceneTestCase { final Counter trackBytes = Counter.newCounter(); DocValuesConsumer w = Ints.getWriter(dir, "test", trackBytes, Type.FIXED_INTS_64, newIOContext(random)); for (int i = 0; i < sourceArray.length; i++) { - valueHolder.intValue = sourceArray[i]; + valueHolder.numberValue = sourceArray[i]; w.add(i, valueHolder); } w.finish(sourceArray.length); @@ -281,7 +285,7 @@ public class TestDocValues extends LuceneTestCase { final Counter trackBytes = Counter.newCounter(); DocValuesConsumer w = Ints.getWriter(dir, "test", trackBytes, Type.FIXED_INTS_32, newIOContext(random)); for (int i = 0; i < sourceArray.length; i++) { - valueHolder.intValue = (long) sourceArray[i]; + valueHolder.numberValue = (long) sourceArray[i]; w.add(i, valueHolder); } w.finish(sourceArray.length); @@ -304,7 +308,7 @@ public class TestDocValues extends LuceneTestCase { final Counter trackBytes = Counter.newCounter(); DocValuesConsumer w = Floats.getWriter(dir, "test", trackBytes, newIOContext(random), Type.FLOAT_32); for (int i = 0; i < sourceArray.length; i++) { - valueHolder.floatValue = sourceArray[i]; + valueHolder.numberValue = sourceArray[i]; w.add(i, valueHolder); } w.finish(sourceArray.length); @@ -327,7 +331,7 @@ public class TestDocValues extends LuceneTestCase { final Counter trackBytes = Counter.newCounter(); DocValuesConsumer w = Floats.getWriter(dir, "test", trackBytes, newIOContext(random), Type.FLOAT_64); for (int i = 0; i < sourceArray.length; i++) { - valueHolder.floatValue = sourceArray[i]; + valueHolder.numberValue = sourceArray[i]; w.add(i, valueHolder); } w.finish(sourceArray.length); @@ -354,7 +358,7 @@ public class TestDocValues extends LuceneTestCase { DocValuesConsumer w = Ints.getWriter(dir, "test", trackBytes, type, newIOContext(random)); for (int i = 0; i < NUM_VALUES; i++) { final long v = random.nextLong() % (1 + maxV); - valueHolder.intValue = values[i] = v; + valueHolder.numberValue = values[i] = v; w.add(i, valueHolder); } final int additionalDocs = 1 + random.nextInt(9); @@ -377,20 +381,20 @@ public class TestDocValues extends LuceneTestCase { } public void testFloats4() throws IOException { - runTestFloats(Type.FLOAT_32, 0.00001); + runTestFloats(Type.FLOAT_32); } - private void runTestFloats(Type type, double delta) throws IOException { + private void runTestFloats(Type type) throws IOException { DocValueHolder valueHolder = new DocValueHolder(); Directory dir = newDirectory(); final Counter trackBytes = Counter.newCounter(); DocValuesConsumer w = Floats.getWriter(dir, "test", trackBytes, newIOContext(random), type); - final int NUM_VALUES = 777 + random.nextInt(777);; + final int NUM_VALUES = 777 + random.nextInt(777); final double[] values = new double[NUM_VALUES]; for (int i = 0; i < NUM_VALUES; i++) { final double v = type == Type.FLOAT_32 ? random.nextFloat() : random .nextDouble(); - valueHolder.floatValue = values[i] = v; + valueHolder.numberValue = values[i] = v; w.add(i, valueHolder); } final int additionalValues = 1 + random.nextInt(10); @@ -409,7 +413,7 @@ public class TestDocValues extends LuceneTestCase { } public void testFloats8() throws IOException { - runTestFloats(Type.FLOAT_64, 0.0); + runTestFloats(Type.FLOAT_64); } @@ -431,31 +435,49 @@ public class TestDocValues extends LuceneTestCase { return getSource(values).asSortedSource(); } - public static class DocValueHolder implements DocValue { + public static class DocValueHolder implements IndexableField { BytesRef bytes; - long intValue; - double floatValue; + Number numberValue; Comparator comp; + @Override - public BytesRef getBytes() { + public TokenStream tokenStream(Analyzer a) { + return null; + } + + @Override + public float boost() { + return 0.0f; + } + + @Override + public String name() { + return "test"; + } + + @Override + public BytesRef binaryValue() { return bytes; } @Override - public Comparator bytesComparator() { - return comp; + public Number numericValue() { + return numberValue; } @Override - public double getFloat() { - return floatValue; + public String stringValue() { + return null; } @Override - public long getInt() { - return intValue; + public Reader readerValue() { + return null; + } + + @Override + public IndexableFieldType fieldType() { + return null; } - } - } diff --git a/lucene/src/test/org/apache/lucene/document/TestBinaryDocument.java b/lucene/src/test/org/apache/lucene/document/TestBinaryDocument.java index 2f41c208d9a..7e8b29cf0c5 100644 --- a/lucene/src/test/org/apache/lucene/document/TestBinaryDocument.java +++ b/lucene/src/test/org/apache/lucene/document/TestBinaryDocument.java @@ -37,7 +37,7 @@ public class TestBinaryDocument extends LuceneTestCase { { FieldType ft = new FieldType(); ft.setStored(true); - IndexableField binaryFldStored = new BinaryField("binaryStored", binaryValStored.getBytes()); + IndexableField binaryFldStored = new StoredField("binaryStored", binaryValStored.getBytes()); IndexableField stringFldStored = new Field("stringStored", binaryValStored, ft); Document doc = new Document(); @@ -75,8 +75,8 @@ public class TestBinaryDocument extends LuceneTestCase { } public void testCompressionTools() throws Exception { - IndexableField binaryFldCompressed = new BinaryField("binaryCompressed", CompressionTools.compress(binaryValCompressed.getBytes())); - IndexableField stringFldCompressed = new BinaryField("stringCompressed", CompressionTools.compressString(binaryValCompressed)); + IndexableField binaryFldCompressed = new StoredField("binaryCompressed", CompressionTools.compress(binaryValCompressed.getBytes())); + IndexableField stringFldCompressed = new StoredField("stringCompressed", CompressionTools.compressString(binaryValCompressed)); Document doc = new Document(); diff --git a/lucene/src/test/org/apache/lucene/document/TestDocument.java b/lucene/src/test/org/apache/lucene/document/TestDocument.java index 2deeaa316d3..1e977ec125b 100644 --- a/lucene/src/test/org/apache/lucene/document/TestDocument.java +++ b/lucene/src/test/org/apache/lucene/document/TestDocument.java @@ -1,17 +1,5 @@ package org.apache.lucene.document; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.IndexableField; -import org.apache.lucene.index.RandomIndexWriter; -import org.apache.lucene.index.Term; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.ScoreDoc; -import org.apache.lucene.search.TermQuery; -import org.apache.lucene.store.Directory; -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.LuceneTestCase; - /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -29,6 +17,26 @@ import org.apache.lucene.util.LuceneTestCase; * limitations under the License. */ +import java.io.StringReader; + +import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.index.DocsAndPositionsEnum; +import org.apache.lucene.index.Fields; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.LuceneTestCase; + + /** * Tests {@link Document} class. */ @@ -43,8 +51,8 @@ public class TestDocument extends LuceneTestCase { FieldType ft = new FieldType(); ft.setStored(true); IndexableField stringFld = new Field("string", binaryVal, ft); - IndexableField binaryFld = new BinaryField("binary", binaryVal.getBytes()); - IndexableField binaryFld2 = new BinaryField("binary", binaryVal2.getBytes()); + IndexableField binaryFld = new StoredField("binary", binaryVal.getBytes()); + IndexableField binaryFld2 = new StoredField("binary", binaryVal2.getBytes()); doc.add(stringFld); doc.add(binaryFld); @@ -274,20 +282,82 @@ public class TestDocument extends LuceneTestCase { assertEquals("did not see all IDs", 7, result); } - public void testFieldSetValueChangeBinary() { - Field field1 = new BinaryField("field1", new byte[0]); - Field field2 = new Field("field2", "", TextField.TYPE_STORED); + // LUCENE-3616 + public void testInvalidFields() { try { - field1.setValue("abc"); - fail("did not hit expected exception"); - } catch (IllegalArgumentException iae) { - // expected - } - try { - field2.setValue(new byte[0]); - fail("did not hit expected exception"); + new Field("foo", new Tokenizer() { + @Override + public boolean incrementToken() { + return false; + }}, StringField.TYPE_STORED); + fail("did not hit expected exc"); } catch (IllegalArgumentException iae) { // expected } } + + // LUCENE-3682 + public void testTransitionAPI() throws Exception { + Directory dir = newDirectory(); + RandomIndexWriter w = new RandomIndexWriter(random, dir); + + Document doc = new Document(); + doc.add(new Field("stored", "abc", Field.Store.YES, Field.Index.NO)); + doc.add(new Field("stored_indexed", "abc xyz", Field.Store.YES, Field.Index.NOT_ANALYZED)); + doc.add(new Field("stored_tokenized", "abc xyz", Field.Store.YES, Field.Index.ANALYZED)); + doc.add(new Field("indexed", "abc xyz", Field.Store.NO, Field.Index.NOT_ANALYZED)); + doc.add(new Field("tokenized", "abc xyz", Field.Store.NO, Field.Index.ANALYZED)); + doc.add(new Field("tokenized_reader", new StringReader("abc xyz"))); + doc.add(new Field("tokenized_tokenstream", w.w.getAnalyzer().tokenStream("tokenized_tokenstream", new StringReader("abc xyz")))); + doc.add(new Field("binary", new byte[10])); + doc.add(new Field("tv", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES)); + doc.add(new Field("tv_pos", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS)); + doc.add(new Field("tv_off", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_OFFSETS)); + doc.add(new Field("tv_pos_off", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); + w.addDocument(doc); + IndexReader r = w.getReader(); + w.close(); + + doc = r.document(0); + // 4 stored fields + assertEquals(4, doc.getFields().size()); + assertEquals("abc", doc.get("stored")); + assertEquals("abc xyz", doc.get("stored_indexed")); + assertEquals("abc xyz", doc.get("stored_tokenized")); + final BytesRef br = doc.getBinaryValue("binary"); + assertNotNull(br); + assertEquals(10, br.length); + + IndexSearcher s = new IndexSearcher(r); + assertEquals(1, s.search(new TermQuery(new Term("stored_indexed", "abc xyz")), 1).totalHits); + assertEquals(1, s.search(new TermQuery(new Term("stored_tokenized", "abc")), 1).totalHits); + assertEquals(1, s.search(new TermQuery(new Term("stored_tokenized", "xyz")), 1).totalHits); + assertEquals(1, s.search(new TermQuery(new Term("indexed", "abc xyz")), 1).totalHits); + assertEquals(1, s.search(new TermQuery(new Term("tokenized", "abc")), 1).totalHits); + assertEquals(1, s.search(new TermQuery(new Term("tokenized", "xyz")), 1).totalHits); + assertEquals(1, s.search(new TermQuery(new Term("tokenized_reader", "abc")), 1).totalHits); + assertEquals(1, s.search(new TermQuery(new Term("tokenized_reader", "xyz")), 1).totalHits); + assertEquals(1, s.search(new TermQuery(new Term("tokenized_tokenstream", "abc")), 1).totalHits); + assertEquals(1, s.search(new TermQuery(new Term("tokenized_tokenstream", "xyz")), 1).totalHits); + + for(String field : new String[] {"tv", "tv_pos", "tv_off", "tv_pos_off"}) { + Fields tvFields = r.getTermVectors(0); + Terms tvs = tvFields.terms(field); + assertNotNull(tvs); + assertEquals(2, tvs.getUniqueTermCount()); + TermsEnum tvsEnum = tvs.iterator(null); + assertEquals(new BytesRef("abc"), tvsEnum.next()); + final DocsAndPositionsEnum dpEnum = tvsEnum.docsAndPositions(null, null); + if (field.equals("tv")) { + assertNull(dpEnum); + } else { + assertNotNull(dpEnum); + } + assertEquals(new BytesRef("xyz"), tvsEnum.next()); + assertNull(tvsEnum.next()); + } + + r.close(); + dir.close(); + } } diff --git a/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java b/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java index a42055d7086..9758b773157 100755 --- a/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java +++ b/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java @@ -1256,9 +1256,7 @@ public class TestAddIndexes extends LuceneTestCase { RandomIndexWriter w = new RandomIndexWriter(random, d1); Document doc = new Document(); doc.add(newField("id", "1", StringField.TYPE_STORED)); - DocValuesField dv = new DocValuesField("dv"); - dv.setInt(1); - doc.add(dv); + doc.add(new DocValuesField("dv", 1, DocValues.Type.VAR_INTS)); w.addDocument(doc); IndexReader r1 = w.getReader(); w.close(); @@ -1267,9 +1265,7 @@ public class TestAddIndexes extends LuceneTestCase { w = new RandomIndexWriter(random, d2); doc = new Document(); doc.add(newField("id", "2", StringField.TYPE_STORED)); - dv = new DocValuesField("dv"); - dv.setInt(2); - doc.add(dv); + doc.add(new DocValuesField("dv", 2, DocValues.Type.VAR_INTS)); w.addDocument(doc); IndexReader r2 = w.getReader(); w.close(); diff --git a/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java b/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java index 9660b940a0f..6ee51656d88 100644 --- a/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java +++ b/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java @@ -29,8 +29,6 @@ import java.util.Map; import java.util.Random; import org.apache.lucene.analysis.MockAnalyzer; -import org.apache.lucene.codecs.Codec; -import org.apache.lucene.codecs.FieldInfosReader; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; @@ -45,9 +43,7 @@ import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.NumericRangeQuery; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TermQuery; -import org.apache.lucene.store.CompoundFileDirectory; import org.apache.lucene.store.Directory; -import org.apache.lucene.store.IOContext; import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; @@ -544,8 +540,8 @@ public class TestBackwardsCompatibility extends LuceneTestCase { doc.add(new Field("content2", "here is more content with aaa aaa aaa", customType2)); doc.add(new Field("fie\u2C77ld", "field with non-ascii name", customType2)); // add numeric fields, to test if flex preserves encoding - doc.add(new NumericField("trieInt", 4).setIntValue(id)); - doc.add(new NumericField("trieLong", 4).setLongValue(id)); + doc.add(new NumericField("trieInt", id)); + doc.add(new NumericField("trieLong", (long) id)); writer.addDocument(doc); } diff --git a/lucene/src/test/org/apache/lucene/index/TestConsistentFieldNumbers.java b/lucene/src/test/org/apache/lucene/index/TestConsistentFieldNumbers.java index ca48ff2eb42..92931011afa 100644 --- a/lucene/src/test/org/apache/lucene/index/TestConsistentFieldNumbers.java +++ b/lucene/src/test/org/apache/lucene/index/TestConsistentFieldNumbers.java @@ -20,10 +20,10 @@ package org.apache.lucene.index; import java.io.IOException; import org.apache.lucene.analysis.MockAnalyzer; -import org.apache.lucene.document.BinaryField; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.StoredField; import org.apache.lucene.document.StringField; import org.apache.lucene.document.TextField; import org.apache.lucene.store.Directory; @@ -189,7 +189,7 @@ public class TestConsistentFieldNumbers extends LuceneTestCase { : NoMergePolicy.COMPOUND_FILES)); Document d = new Document(); d.add(new Field("f1", "d2 first field", TextField.TYPE_STORED)); - d.add(new BinaryField("f3", new byte[] { 1, 2, 3 })); + d.add(new StoredField("f3", new byte[] { 1, 2, 3 })); writer.addDocument(d); writer.close(); SegmentInfos sis = new SegmentInfos(); @@ -212,7 +212,7 @@ public class TestConsistentFieldNumbers extends LuceneTestCase { Document d = new Document(); d.add(new Field("f1", "d3 first field", TextField.TYPE_STORED)); d.add(new Field("f2", "d3 second field", TextField.TYPE_STORED)); - d.add(new BinaryField("f3", new byte[] { 1, 2, 3, 4, 5 })); + d.add(new StoredField("f3", new byte[] { 1, 2, 3, 4, 5 })); writer.addDocument(d); writer.close(); SegmentInfos sis = new SegmentInfos(); diff --git a/lucene/src/test/org/apache/lucene/index/TestDocTermOrds.java b/lucene/src/test/org/apache/lucene/index/TestDocTermOrds.java index 7fe0df8cecb..1723553f7c3 100644 --- a/lucene/src/test/org/apache/lucene/index/TestDocTermOrds.java +++ b/lucene/src/test/org/apache/lucene/index/TestDocTermOrds.java @@ -124,8 +124,7 @@ public class TestDocTermOrds extends LuceneTestCase { for(int id=0;id valueVarList, boolean withDeletions, int bytesSize) throws CorruptIndexException, IOException { - final boolean isNumeric = NUMERICS.contains(value); + final boolean isNumeric = NUMERICS.contains(valueType); FixedBitSet deleted = new FixedBitSet(numValues); Document doc = new Document(); - DocValuesField valField = new DocValuesField(value.name()); + final DocValuesField valField; + if (isNumeric) { + switch (valueType) { + case VAR_INTS: + valField = new DocValuesField(valueType.name(), (long) 0, valueType); + break; + case FIXED_INTS_16: + valField = new DocValuesField(valueType.name(), (short) 0, valueType); + break; + case FIXED_INTS_32: + valField = new DocValuesField(valueType.name(), 0, valueType); + break; + case FIXED_INTS_64: + valField = new DocValuesField(valueType.name(), (long) 0, valueType); + break; + case FIXED_INTS_8: + valField = new DocValuesField(valueType.name(), (byte) 0, valueType); + break; + case FLOAT_32: + valField = new DocValuesField(valueType.name(), (float) 0, valueType); + break; + case FLOAT_64: + valField = new DocValuesField(valueType.name(), (double) 0, valueType); + break; + default: + valField = null; + fail("unhandled case"); + } + } else { + valField = new DocValuesField(valueType.name(), new BytesRef(), valueType); + } doc.add(valField); final BytesRef bytesRef = new BytesRef(); - final String idBase = value.name() + "_"; + final String idBase = valueType.name() + "_"; final byte[] b = new byte[bytesSize]; if (bytesRef != null) { bytesRef.bytes = b; @@ -596,38 +612,37 @@ public class TestDocValuesIndexing extends LuceneTestCase { byte upto = 0; for (int i = 0; i < numValues; i++) { if (isNumeric) { - switch (value) { + switch (valueType) { case VAR_INTS: - valField.setInt((long)i); + valField.setValue((long)i); break; case FIXED_INTS_16: - valField.setInt((short)i, random.nextInt(10) != 0); + valField.setValue((short)i); break; case FIXED_INTS_32: - valField.setInt(i, random.nextInt(10) != 0); + valField.setValue(i); break; case FIXED_INTS_64: - valField.setInt((long)i, random.nextInt(10) != 0); + valField.setValue((long)i); break; case FIXED_INTS_8: - valField.setInt((byte)(0xFF & (i % 128)), random.nextInt(10) != 0); + valField.setValue((byte)(0xFF & (i % 128))); break; case FLOAT_32: - valField.setFloat(2.0f * i); + valField.setValue(2.0f * i); break; case FLOAT_64: - valField.setFloat(2.0d * i); + valField.setValue(2.0d * i); break; - default: - fail("unexpected value " + value); + fail("unexpected value " + valueType); } } else { for (int j = 0; j < b.length; j++) { b[j] = upto++; } if (bytesRef != null) { - valField.setBytes(bytesRef, value); + valField.setValue(bytesRef); } } doc.removeFields("id"); @@ -637,11 +652,11 @@ public class TestDocValuesIndexing extends LuceneTestCase { if (i % 7 == 0) { if (withDeletions && random.nextBoolean()) { Type val = valueVarList.get(random.nextInt(1 + valueVarList - .indexOf(value))); - final int randInt = val == value ? random.nextInt(1 + i) : random + .indexOf(valueType))); + final int randInt = val == valueType ? random.nextInt(1 + i) : random .nextInt(numValues); w.deleteDocuments(new Term("id", val.name() + "_" + randInt)); - if (val == value) { + if (val == valueType) { deleted.set(randInt); } } @@ -663,8 +678,7 @@ public class TestDocValuesIndexing extends LuceneTestCase { Directory d = newDirectory(); RandomIndexWriter w = new RandomIndexWriter(random, d); Document doc = new Document(); - DocValuesField f = new DocValuesField("field"); - f.setInt(17); + DocValuesField f = new DocValuesField("field", 17, Type.VAR_INTS); // Index doc values are single-valued so we should not // be able to add same field more than once: doc.add(f); @@ -691,14 +705,11 @@ public class TestDocValuesIndexing extends LuceneTestCase { Directory d = newDirectory(); RandomIndexWriter w = new RandomIndexWriter(random, d); Document doc = new Document(); - DocValuesField f = new DocValuesField("field"); - f.setInt(17); // Index doc values are single-valued so we should not // be able to add same field more than once: - doc.add(f); - DocValuesField f2 = new DocValuesField("field"); - f2.setFloat(22.0); - doc.add(f2); + Field f; + doc.add(f = new DocValuesField("field", 17, Type.VAR_INTS)); + doc.add(new DocValuesField("field", 22.0, Type.FLOAT_32)); try { w.addDocument(doc); fail("didn't hit expected exception"); @@ -725,7 +736,6 @@ public class TestDocValuesIndexing extends LuceneTestCase { IndexWriterConfig cfg = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)); IndexWriter w = new IndexWriter(d, cfg); - Comparator comp = BytesRef.getUTF8SortedAsUnicodeComparator(); int numDocs = atLeast(100); BytesRefHash hash = new BytesRefHash(); Map docToString = new HashMap(); @@ -733,14 +743,12 @@ public class TestDocValuesIndexing extends LuceneTestCase { for (int i = 0; i < numDocs; i++) { Document doc = new Document(); doc.add(newField("id", "" + i, TextField.TYPE_STORED)); - DocValuesField f = new DocValuesField("field"); String string =fixed ? _TestUtil.randomFixedByteLengthUnicodeString(random, len) : _TestUtil.randomRealisticUnicodeString(random, 1, len); - hash.add(new BytesRef(string)); + BytesRef br = new BytesRef(string); + doc.add(new DocValuesField("field", br, type)); + hash.add(br); docToString.put("" + i, string); - - f.setBytes(new BytesRef(string), type, comp); - doc.add(f); w.addDocument(doc); } if (rarely()) { @@ -763,13 +771,12 @@ public class TestDocValuesIndexing extends LuceneTestCase { Document doc = new Document(); String id = "" + i + numDocs; doc.add(newField("id", id, TextField.TYPE_STORED)); - DocValuesField f = new DocValuesField("field"); String string = fixed ? _TestUtil.randomFixedByteLengthUnicodeString(random, len) : _TestUtil.randomRealisticUnicodeString(random, 1, len); - hash.add(new BytesRef(string)); + BytesRef br = new BytesRef(string); + hash.add(br); docToString.put(id, string); - f.setBytes(new BytesRef(string), type, comp); - doc.add(f); + doc.add( new DocValuesField("field", br, type)); w.addDocument(doc); } w.commit(); @@ -777,7 +784,7 @@ public class TestDocValuesIndexing extends LuceneTestCase { DocValues docValues = MultiDocValues.getDocValues(reader, "field"); Source source = getSource(docValues); SortedSource asSortedSource = source.asSortedSource(); - int[] sort = hash.sort(comp); + int[] sort = hash.sort(BytesRef.getUTF8SortedAsUnicodeComparator()); BytesRef expected = new BytesRef(); BytesRef actual = new BytesRef(); assertEquals(hash.size(), asSortedSource.getValueCount()); diff --git a/lucene/src/test/org/apache/lucene/index/TestDuelingCodecs.java b/lucene/src/test/org/apache/lucene/index/TestDuelingCodecs.java index 358487a157f..8ce19dfe823 100644 --- a/lucene/src/test/org/apache/lucene/index/TestDuelingCodecs.java +++ b/lucene/src/test/org/apache/lucene/index/TestDuelingCodecs.java @@ -500,8 +500,6 @@ public class TestDuelingCodecs extends LuceneTestCase { assertEquals(info, leftField.binaryValue(), rightField.binaryValue()); assertEquals(info, leftField.stringValue(), rightField.stringValue()); assertEquals(info, leftField.numericValue(), rightField.numericValue()); - assertEquals(info, leftField.numeric(), rightField.numeric()); - assertEquals(info, leftField.numericDataType(), rightField.numericDataType()); // TODO: should we check the FT at all? } diff --git a/lucene/src/test/org/apache/lucene/index/TestFieldsReader.java b/lucene/src/test/org/apache/lucene/index/TestFieldsReader.java index cd1a48dec5f..3bb525f53d9 100644 --- a/lucene/src/test/org/apache/lucene/index/TestFieldsReader.java +++ b/lucene/src/test/org/apache/lucene/index/TestFieldsReader.java @@ -27,6 +27,7 @@ import org.apache.lucene.document.DocumentStoredFieldVisitor; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; import org.apache.lucene.document.NumericField; +import org.apache.lucene.document.StoredField; import org.apache.lucene.document.StringField; import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.IndexWriterConfig.OpenMode; @@ -237,40 +238,42 @@ public class TestFieldsReader extends LuceneTestCase { final NumericField.DataType[] typeAnswers = new NumericField.DataType[numDocs]; for(int id=0;id terms, Map termToID, int id) throws IOException { Document doc = new Document(); - doc.add(new NumericField("id").setIntValue(id)); + doc.add(new NumericField("id", id)); if (VERBOSE) { System.out.println("TEST: addDoc id:" + id + " terms=" + terms); } diff --git a/lucene/src/test/org/apache/lucene/index/TestTypePromotion.java b/lucene/src/test/org/apache/lucene/index/TestTypePromotion.java index b7028932513..f66a4c24b7f 100644 --- a/lucene/src/test/org/apache/lucene/index/TestTypePromotion.java +++ b/lucene/src/test/org/apache/lucene/index/TestTypePromotion.java @@ -1,31 +1,5 @@ package org.apache.lucene.index; -import java.io.IOException; -import java.util.EnumSet; -import java.util.Random; - -import org.apache.lucene.analysis.MockAnalyzer; -import org.apache.lucene.codecs.Codec; -import org.apache.lucene.codecs.lucene40.values.BytesRefUtils; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; -import org.apache.lucene.document.DocValuesField; -import org.apache.lucene.document.TextField; -import org.apache.lucene.index.CorruptIndexException; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.IndexReader.ReaderContext; -import org.apache.lucene.index.DocValues; -import org.apache.lucene.index.DocValues.Source; -import org.apache.lucene.index.DocValues.Type; -import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.index.IndexWriterConfig; -import org.apache.lucene.index.NoMergePolicy; -import org.apache.lucene.index.SlowMultiReaderWrapper; -import org.apache.lucene.store.Directory; -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.LuceneTestCase; -import org.junit.Before; - /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with this @@ -42,6 +16,26 @@ import org.junit.Before; * License for the specific language governing permissions and limitations under * the License. */ + +import java.io.IOException; +import java.util.EnumSet; +import java.util.Random; + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.codecs.Codec; +import org.apache.lucene.codecs.lucene40.values.BytesRefUtils; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.DocValuesField; +import org.apache.lucene.document.TextField; +import org.apache.lucene.index.IndexReader.ReaderContext; +import org.apache.lucene.index.DocValues.Source; +import org.apache.lucene.index.DocValues.Type; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.LuceneTestCase; +import org.junit.Before; + public class TestTypePromotion extends LuceneTestCase { @Before public void setUp() throws Exception { @@ -81,11 +75,11 @@ public class TestTypePromotion extends LuceneTestCase { int num_2 = atLeast(200); int num_3 = atLeast(200); long[] values = new long[num_1 + num_2 + num_3]; - index(writer, new DocValuesField("promote"), + index(writer, randomValueType(types, random), values, 0, num_1); writer.commit(); - index(writer, new DocValuesField("promote"), + index(writer, randomValueType(types, random), values, num_1, num_2); writer.commit(); @@ -96,7 +90,7 @@ public class TestTypePromotion extends LuceneTestCase { Directory dir_2 = newDirectory() ; IndexWriter writer_2 = new IndexWriter(dir_2, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); - index(writer_2, new DocValuesField("promote"), + index(writer_2, randomValueType(types, random), values, num_1 + num_2, num_3); writer_2.commit(); writer_2.close(); @@ -110,7 +104,7 @@ public class TestTypePromotion extends LuceneTestCase { } dir_2.close(); } else { - index(writer, new DocValuesField("promote"), + index(writer, randomValueType(types, random), values, num_1 + num_2, num_3); } @@ -172,9 +166,45 @@ public class TestTypePromotion extends LuceneTestCase { reader.close(); } - public void index(IndexWriter writer, DocValuesField valField, + public void index(IndexWriter writer, Type valueType, long[] values, int offset, int num) throws CorruptIndexException, IOException { + final DocValuesField valField; + switch (valueType) { + case FIXED_INTS_8: + valField = new DocValuesField("promote", (byte) 0, valueType); + break; + case FIXED_INTS_16: + valField = new DocValuesField("promote", (short) 0, valueType); + break; + case FIXED_INTS_32: + valField = new DocValuesField("promote", 0, valueType); + break; + case VAR_INTS: + valField = new DocValuesField("promote", 0L, valueType); + break; + case FIXED_INTS_64: + valField = new DocValuesField("promote", (long) 0, valueType); + break; + case FLOAT_64: + valField = new DocValuesField("promote", (double) 0, valueType); + break; + case FLOAT_32: + valField = new DocValuesField("promote", (float) 0, valueType); + break; + case BYTES_FIXED_DEREF: + case BYTES_FIXED_SORTED: + case BYTES_FIXED_STRAIGHT: + case BYTES_VAR_DEREF: + case BYTES_VAR_SORTED: + case BYTES_VAR_STRAIGHT: + valField = new DocValuesField("promote", new BytesRef(), valueType); + break; + default: + fail("unexpected value " + valueType); + valField = null; + } + BytesRef ref = new BytesRef(new byte[] { 1, 2, 3, 4 }); for (int i = offset; i < offset + num; i++) { Document doc = new Document(); @@ -182,40 +212,40 @@ public class TestTypePromotion extends LuceneTestCase { switch (valueType) { case VAR_INTS: values[i] = random.nextInt(); - valField.setInt(values[i]); + valField.setValue(values[i]); break; case FIXED_INTS_16: values[i] = random.nextInt(Short.MAX_VALUE); - valField.setInt((short) values[i], true); + valField.setValue((short) values[i]); break; case FIXED_INTS_32: values[i] = random.nextInt(); - valField.setInt((int) values[i], true); + valField.setValue((int) values[i]); break; case FIXED_INTS_64: values[i] = random.nextLong(); - valField.setInt(values[i], true); + valField.setValue(values[i]); break; case FLOAT_64: double nextDouble = random.nextDouble(); values[i] = Double.doubleToRawLongBits(nextDouble); - valField.setFloat(nextDouble); + valField.setValue(nextDouble); break; case FLOAT_32: final float nextFloat = random.nextFloat(); values[i] = Double.doubleToRawLongBits(nextFloat); - valField.setFloat(nextFloat); + valField.setValue(nextFloat); break; case FIXED_INTS_8: - values[i] = (byte) i; - valField.setInt((byte)values[i], true); + values[i] = (byte) i; + valField.setValue((byte)values[i]); break; case BYTES_FIXED_DEREF: case BYTES_FIXED_SORTED: case BYTES_FIXED_STRAIGHT: values[i] = random.nextLong(); BytesRefUtils.copyLong(ref, values[i]); - valField.setBytes(ref, valueType); + valField.setValue(ref); break; case BYTES_VAR_DEREF: case BYTES_VAR_SORTED: @@ -227,12 +257,11 @@ public class TestTypePromotion extends LuceneTestCase { BytesRefUtils.copyLong(ref, random.nextLong()); values[i] = BytesRefUtils.asLong(ref); } - valField.setBytes(ref, valueType); + valField.setValue(ref); break; default: fail("unexpected value " + valueType); - } doc.add(valField); writer.addDocument(doc); @@ -267,7 +296,7 @@ public class TestTypePromotion extends LuceneTestCase { int num_1 = atLeast(200); int num_2 = atLeast(200); long[] values = new long[num_1 + num_2]; - index(writer, new DocValuesField("promote"), + index(writer, randomValueType(INTEGERS, random), values, 0, num_1); writer.commit(); @@ -275,8 +304,8 @@ public class TestTypePromotion extends LuceneTestCase { // once in a while use addIndexes Directory dir_2 = newDirectory() ; IndexWriter writer_2 = new IndexWriter(dir_2, - newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); - index(writer_2, new DocValuesField("promote"), + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); + index(writer_2, randomValueType(random.nextBoolean() ? UNSORTED_BYTES : SORTED_BYTES, random), values, num_1, num_2); writer_2.commit(); writer_2.close(); @@ -290,7 +319,7 @@ public class TestTypePromotion extends LuceneTestCase { } dir_2.close(); } else { - index(writer, new DocValuesField("promote"), + index(writer, randomValueType(random.nextBoolean() ? UNSORTED_BYTES : SORTED_BYTES, random), values, num_1, num_2); writer.commit(); } diff --git a/lucene/src/test/org/apache/lucene/search/TestDocValuesScoring.java b/lucene/src/test/org/apache/lucene/search/TestDocValuesScoring.java index 1a20de526c9..2e207cdb381 100644 --- a/lucene/src/test/org/apache/lucene/search/TestDocValuesScoring.java +++ b/lucene/src/test/org/apache/lucene/search/TestDocValuesScoring.java @@ -20,16 +20,17 @@ package org.apache.lucene.search; import java.io.IOException; import org.apache.lucene.codecs.Codec; +import org.apache.lucene.document.DocValuesField; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; -import org.apache.lucene.document.DocValuesField; import org.apache.lucene.document.TextField; +import org.apache.lucene.index.DocValues.Source; +import org.apache.lucene.index.DocValues; import org.apache.lucene.index.FieldInvertState; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; -import org.apache.lucene.index.DocValues.Source; -import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.search.similarities.SimilarityProvider; import org.apache.lucene.store.Directory; @@ -54,18 +55,18 @@ public class TestDocValuesScoring extends LuceneTestCase { Document doc = new Document(); Field field = newField("foo", "", TextField.TYPE_UNSTORED); doc.add(field); - DocValuesField dvField = new DocValuesField("foo_boost"); + DocValuesField dvField = new DocValuesField("foo_boost", 0.0f, DocValues.Type.FLOAT_32); doc.add(dvField); Field field2 = newField("bar", "", TextField.TYPE_UNSTORED); doc.add(field2); field.setValue("quick brown fox"); field2.setValue("quick brown fox"); - dvField.setFloat(2f); // boost x2 + dvField.setValue(2f); // boost x2 iw.addDocument(doc); field.setValue("jumps over lazy brown dog"); field2.setValue("jumps over lazy brown dog"); - dvField.setFloat(4f); // boost x4 + dvField.setValue(4f); // boost x4 iw.addDocument(doc); IndexReader ir = iw.getReader(); iw.close(); diff --git a/lucene/src/test/org/apache/lucene/search/TestFieldCache.java b/lucene/src/test/org/apache/lucene/search/TestFieldCache.java index 9f5d4c28a47..95bfd4c30b0 100644 --- a/lucene/src/test/org/apache/lucene/search/TestFieldCache.java +++ b/lucene/src/test/org/apache/lucene/search/TestFieldCache.java @@ -78,7 +78,7 @@ public class TestFieldCache extends LuceneTestCase { } if (i%2 == 0) { - doc.add(new NumericField("numInt").setIntValue(i)); + doc.add(new NumericField("numInt", i)); } // sometimes skip the field: diff --git a/lucene/src/test/org/apache/lucene/search/TestMultiValuedNumericRangeQuery.java b/lucene/src/test/org/apache/lucene/search/TestMultiValuedNumericRangeQuery.java index 382d974aab4..b9fb1920a55 100644 --- a/lucene/src/test/org/apache/lucene/search/TestMultiValuedNumericRangeQuery.java +++ b/lucene/src/test/org/apache/lucene/search/TestMultiValuedNumericRangeQuery.java @@ -52,7 +52,7 @@ public class TestMultiValuedNumericRangeQuery extends LuceneTestCase { for (int m=0, c=random.nextInt(10); m<=c; m++) { int value = random.nextInt(Integer.MAX_VALUE); doc.add(newField("asc", format.format(value), StringField.TYPE_UNSTORED)); - doc.add(new NumericField("trie").setIntValue(value)); + doc.add(new NumericField("trie", value)); } writer.addDocument(doc); } diff --git a/lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery32.java b/lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery32.java index 02f00fbc16d..75fe5124f3e 100644 --- a/lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery32.java +++ b/lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery32.java @@ -19,9 +19,10 @@ package org.apache.lucene.search; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; +import org.apache.lucene.document.FieldType; import org.apache.lucene.document.NumericField; -import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader.AtomicReaderContext; +import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.MultiFields; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.SlowMultiReaderWrapper; @@ -58,15 +59,40 @@ public class TestNumericRangeQuery32 extends LuceneTestCase { newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)) .setMaxBufferedDocs(_TestUtil.nextInt(random, 100, 1000)) .setMergePolicy(newLogMergePolicy())); - + + final FieldType storedInt = NumericField.getFieldType(NumericField.DataType.INT, true); + + final FieldType storedInt8 = new FieldType(storedInt); + storedInt8.setNumericPrecisionStep(8); + + final FieldType storedInt4 = new FieldType(storedInt); + storedInt4.setNumericPrecisionStep(4); + + final FieldType storedInt2 = new FieldType(storedInt); + storedInt2.setNumericPrecisionStep(2); + + final FieldType storedIntNone = new FieldType(storedInt); + storedIntNone.setNumericPrecisionStep(Integer.MAX_VALUE); + + final FieldType unstoredInt = NumericField.getFieldType(NumericField.DataType.INT, false); + + final FieldType unstoredInt8 = new FieldType(unstoredInt); + unstoredInt8.setNumericPrecisionStep(8); + + final FieldType unstoredInt4 = new FieldType(unstoredInt); + unstoredInt4.setNumericPrecisionStep(4); + + final FieldType unstoredInt2 = new FieldType(unstoredInt); + unstoredInt2.setNumericPrecisionStep(2); + NumericField - field8 = new NumericField("field8", 8, NumericField.TYPE_STORED), - field4 = new NumericField("field4", 4, NumericField.TYPE_STORED), - field2 = new NumericField("field2", 2, NumericField.TYPE_STORED), - fieldNoTrie = new NumericField("field"+Integer.MAX_VALUE, Integer.MAX_VALUE, rarely() ? NumericField.TYPE_STORED : NumericField.TYPE_UNSTORED), - ascfield8 = new NumericField("ascfield8", 8, NumericField.TYPE_UNSTORED), - ascfield4 = new NumericField("ascfield4", 4, NumericField.TYPE_UNSTORED), - ascfield2 = new NumericField("ascfield2", 2, NumericField.TYPE_UNSTORED); + field8 = new NumericField("field8", 0, storedInt8), + field4 = new NumericField("field4", 0, storedInt4), + field2 = new NumericField("field2", 0, storedInt2), + fieldNoTrie = new NumericField("field"+Integer.MAX_VALUE, 0, storedIntNone), + ascfield8 = new NumericField("ascfield8", 0, unstoredInt8), + ascfield4 = new NumericField("ascfield4", 0, unstoredInt4), + ascfield2 = new NumericField("ascfield2", 0, unstoredInt2); Document doc = new Document(); // add fields, that have a distance to test general functionality @@ -77,15 +103,15 @@ public class TestNumericRangeQuery32 extends LuceneTestCase { // Add a series of noDocs docs with increasing int values for (int l=0; lact ); last=act; } diff --git a/lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery64.java b/lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery64.java index 979406abe88..70d3611de91 100644 --- a/lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery64.java +++ b/lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery64.java @@ -19,14 +19,15 @@ package org.apache.lucene.search; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; +import org.apache.lucene.document.FieldType; import org.apache.lucene.document.NumericField; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.MultiFields; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.SlowMultiReaderWrapper; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; -import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.store.Directory; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.LuceneTestCase; @@ -58,18 +59,49 @@ public class TestNumericRangeQuery64 extends LuceneTestCase { newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)) .setMaxBufferedDocs(_TestUtil.nextInt(random, 100, 1000)) .setMergePolicy(newLogMergePolicy())); - + + final FieldType storedLong = NumericField.getFieldType(NumericField.DataType.LONG, true); + + final FieldType storedLong8 = new FieldType(storedLong); + storedLong8.setNumericPrecisionStep(8); + + final FieldType storedLong4 = new FieldType(storedLong); + storedLong4.setNumericPrecisionStep(4); + + final FieldType storedLong6 = new FieldType(storedLong); + storedLong6.setNumericPrecisionStep(6); + + final FieldType storedLong2 = new FieldType(storedLong); + storedLong2.setNumericPrecisionStep(2); + + final FieldType storedLongNone = new FieldType(storedLong); + storedLongNone.setNumericPrecisionStep(Integer.MAX_VALUE); + + final FieldType unstoredLong = NumericField.getFieldType(NumericField.DataType.LONG, false); + + final FieldType unstoredLong8 = new FieldType(unstoredLong); + unstoredLong8.setNumericPrecisionStep(8); + + final FieldType unstoredLong6 = new FieldType(unstoredLong); + unstoredLong6.setNumericPrecisionStep(6); + + final FieldType unstoredLong4 = new FieldType(unstoredLong); + unstoredLong4.setNumericPrecisionStep(4); + + final FieldType unstoredLong2 = new FieldType(unstoredLong); + unstoredLong2.setNumericPrecisionStep(2); + NumericField - field8 = new NumericField("field8", 8, NumericField.TYPE_STORED), - field6 = new NumericField("field6", 6, NumericField.TYPE_STORED), - field4 = new NumericField("field4", 4, NumericField.TYPE_STORED), - field2 = new NumericField("field2", 2, NumericField.TYPE_STORED), - fieldNoTrie = new NumericField("field"+Integer.MAX_VALUE, Integer.MAX_VALUE, rarely() ? NumericField.TYPE_STORED : NumericField.TYPE_UNSTORED), - ascfield8 = new NumericField("ascfield8", 8, NumericField.TYPE_UNSTORED), - ascfield6 = new NumericField("ascfield6", 6, NumericField.TYPE_UNSTORED), - ascfield4 = new NumericField("ascfield4", 4, NumericField.TYPE_UNSTORED), - ascfield2 = new NumericField("ascfield2", 2, NumericField.TYPE_UNSTORED); - + field8 = new NumericField("field8", 0L, storedLong8), + field6 = new NumericField("field6", 0L, storedLong6), + field4 = new NumericField("field4", 0L, storedLong4), + field2 = new NumericField("field2", 0L, storedLong2), + fieldNoTrie = new NumericField("field"+Integer.MAX_VALUE, 0L, storedLongNone), + ascfield8 = new NumericField("ascfield8", 0L, unstoredLong8), + ascfield6 = new NumericField("ascfield6", 0L, unstoredLong6), + ascfield4 = new NumericField("ascfield4", 0L, unstoredLong4), + ascfield2 = new NumericField("ascfield2", 0L, unstoredLong2); + Document doc = new Document(); // add fields, that have a distance to test general functionality doc.add(field8); doc.add(field6); doc.add(field4); doc.add(field2); doc.add(fieldNoTrie); @@ -79,17 +111,17 @@ public class TestNumericRangeQuery64 extends LuceneTestCase { // Add a series of noDocs docs with increasing long values, by updating the fields for (int l=0; lact ); last=act; } diff --git a/lucene/src/test/org/apache/lucene/search/TestSort.java b/lucene/src/test/org/apache/lucene/search/TestSort.java index a7c1bf9079f..ea674c20144 100644 --- a/lucene/src/test/org/apache/lucene/search/TestSort.java +++ b/lucene/src/test/org/apache/lucene/search/TestSort.java @@ -36,11 +36,9 @@ import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.MultiReader; import org.apache.lucene.index.RandomIndexWriter; -import org.apache.lucene.index.SlowMultiReaderWrapper; import org.apache.lucene.index.Term; import org.apache.lucene.index.DocValues; import org.apache.lucene.search.BooleanClause.Occur; @@ -146,35 +144,31 @@ public class TestSort extends LuceneTestCase { doc.add (new Field ("tracer", data[i][0], ft1)); doc.add (new TextField ("contents", data[i][1])); if (data[i][2] != null) { - Field f = new StringField ("int", data[i][2]); + doc.add(new StringField ("int", data[i][2])); if (supportsDocValues) { - f = DocValuesField.build(f, DocValues.Type.VAR_INTS); + doc.add(new DocValuesField("int", Integer.parseInt(data[i][2]), DocValues.Type.VAR_INTS)); } - doc.add(f); } if (data[i][3] != null) { - Field f = new StringField ("float", data[i][3]); + doc.add(new StringField ("float", data[i][3])); if (supportsDocValues) { - f = DocValuesField.build(f, DocValues.Type.FLOAT_32); + doc.add(new DocValuesField("float", Float.parseFloat(data[i][3]), DocValues.Type.FLOAT_32)); } - doc.add(f); } if (data[i][4] != null) { - Field f = new StringField ("string", data[i][4]); + doc.add(new StringField ("string", data[i][4])); if (supportsDocValues) { - f = DocValuesField.build(f, stringDVType); + doc.add(new DocValuesField("string", new BytesRef(data[i][4]), stringDVType)); } - doc.add(f); } if (data[i][5] != null) doc.add (new StringField ("custom", data[i][5])); if (data[i][6] != null) doc.add (new StringField ("i18n", data[i][6])); if (data[i][7] != null) doc.add (new StringField ("long", data[i][7])); if (data[i][8] != null) { - Field f = new StringField ("double", data[i][8]); + doc.add(new StringField ("double", data[i][8])); if (supportsDocValues) { - f = DocValuesField.build(f, DocValues.Type.FLOAT_64); + doc.add(new DocValuesField("double", Double.parseDouble(data[i][8]), DocValues.Type.FLOAT_64)); } - doc.add(f); } if (data[i][9] != null) doc.add (new StringField ("short", data[i][9])); if (data[i][10] != null) doc.add (new StringField ("byte", data[i][10])); @@ -216,17 +210,15 @@ public class TestSort extends LuceneTestCase { String num = getRandomCharString(getRandomNumber(2, 8), 48, 52); doc.add (new Field ("tracer", num, onlyStored)); //doc.add (new Field ("contents", Integer.toString(i), Field.Store.NO, Field.Index.ANALYZED)); - Field f = new StringField("string", num); + doc.add(new StringField("string", num)); if (supportsDocValues) { - f = DocValuesField.build(f, DocValues.Type.BYTES_VAR_SORTED); + doc.add(new DocValuesField("string", new BytesRef(num), DocValues.Type.BYTES_VAR_SORTED)); } - doc.add (f); String num2 = getRandomCharString(getRandomNumber(1, 4), 48, 50); - f = new StringField ("string2", num2); + doc.add(new StringField ("string2", num2)); if (supportsDocValues) { - f = DocValuesField.build(f, DocValues.Type.BYTES_VAR_SORTED); + doc.add(new DocValuesField("string2", new BytesRef(num2), DocValues.Type.BYTES_VAR_SORTED)); } - doc.add (f); doc.add (new Field ("tracer2", num2, onlyStored)); for(IndexableField f2 : doc.getFields()) { ((Field) f2).setBoost(2.0f); @@ -235,17 +227,15 @@ public class TestSort extends LuceneTestCase { String numFixed = getRandomCharString(fixedLen, 48, 52); doc.add (new Field ("fixed_tracer", numFixed, onlyStored)); //doc.add (new Field ("contents", Integer.toString(i), Field.Store.NO, Field.Index.ANALYZED)); - f = new StringField("string_fixed", numFixed); + doc.add(new StringField("string_fixed", numFixed)); if (supportsDocValues) { - f = DocValuesField.build(f, DocValues.Type.BYTES_FIXED_SORTED); + doc.add(new DocValuesField("string_fixed", new BytesRef(numFixed), DocValues.Type.BYTES_FIXED_SORTED)); } - doc.add (f); String num2Fixed = getRandomCharString(fixedLen2, 48, 52); - f = new StringField ("string2_fixed", num2Fixed); + doc.add(new StringField ("string2_fixed", num2Fixed)); if (supportsDocValues) { - f = DocValuesField.build(f, DocValues.Type.BYTES_FIXED_SORTED); + doc.add(new DocValuesField("string2_fixed", new BytesRef(num2Fixed), DocValues.Type.BYTES_FIXED_SORTED)); } - doc.add (f); doc.add (new Field ("tracer2_fixed", num2Fixed, onlyStored)); for(IndexableField f2 : doc.getFields()) { @@ -387,7 +377,7 @@ public class TestSort extends LuceneTestCase { sort.setSort (useDocValues(new SortField ("int", SortField.Type.INT)), SortField.FIELD_DOC ); assertMatches (full, queryX, sort, "IGAEC"); assertMatches (full, queryY, sort, "DHFJB"); - + sort.setSort (useDocValues(new SortField ("float", SortField.Type.FLOAT)), SortField.FIELD_DOC ); assertMatches (full, queryX, sort, "GCIEA"); assertMatches (full, queryY, sort, "DHJFB"); diff --git a/lucene/src/test/org/apache/lucene/search/TestTopDocsMerge.java b/lucene/src/test/org/apache/lucene/search/TestTopDocsMerge.java index cb1ca7315d0..b8359397e3b 100644 --- a/lucene/src/test/org/apache/lucene/search/TestTopDocsMerge.java +++ b/lucene/src/test/org/apache/lucene/search/TestTopDocsMerge.java @@ -93,7 +93,7 @@ public class TestTopDocsMerge extends LuceneTestCase { final Document doc = new Document(); doc.add(newField("string", _TestUtil.randomRealisticUnicodeString(random), StringField.TYPE_UNSTORED)); doc.add(newField("text", content[random.nextInt(content.length)], TextField.TYPE_UNSTORED)); - doc.add(new NumericField("float").setFloatValue(random.nextFloat())); + doc.add(new NumericField("float", random.nextFloat())); final int intValue; if (random.nextInt(100) == 17) { intValue = Integer.MIN_VALUE; @@ -102,7 +102,7 @@ public class TestTopDocsMerge extends LuceneTestCase { } else { intValue = random.nextInt(); } - doc.add(new NumericField("int").setIntValue(intValue)); + doc.add(new NumericField("int", intValue)); if (VERBOSE) { System.out.println(" doc=" + doc); } diff --git a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java index 609b2313268..1eb031d3877 100644 --- a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java +++ b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java @@ -109,8 +109,8 @@ public class DocMaker implements Closeable { fields.put(ID_FIELD, new Field(ID_FIELD, "", StringField.TYPE_STORED)); fields.put(NAME_FIELD, new Field(NAME_FIELD, "", ft)); - numericFields.put(DATE_MSEC_FIELD, new NumericField(DATE_MSEC_FIELD)); - numericFields.put(TIME_SEC_FIELD, new NumericField(TIME_SEC_FIELD)); + numericFields.put(DATE_MSEC_FIELD, new NumericField(DATE_MSEC_FIELD, 0L)); + numericFields.put(TIME_SEC_FIELD, new NumericField(TIME_SEC_FIELD, 0)); doc = new Document(); } else { @@ -138,15 +138,34 @@ public class DocMaker implements Closeable { return f; } - NumericField getNumericField(String name) { - if (!reuseFields) { - return new NumericField(name); + NumericField getNumericField(String name, NumericField.DataType type) { + NumericField f; + if (reuseFields) { + f = numericFields.get(name); + } else { + f = null; } - - NumericField f = numericFields.get(name); + if (f == null) { - f = new NumericField(name); - numericFields.put(name, f); + switch(type) { + case INT: + f = new NumericField(name, 0); + break; + case LONG: + f = new NumericField(name, 0L); + break; + case FLOAT: + f = new NumericField(name, 0.0f); + break; + case DOUBLE: + f = new NumericField(name, 0.0); + break; + default: + assert false; + } + if (reuseFields) { + numericFields.put(name, f); + } } return f; } @@ -249,15 +268,15 @@ public class DocMaker implements Closeable { date = new Date(); } - NumericField dateField = ds.getNumericField(DATE_MSEC_FIELD); - dateField.setLongValue(date.getTime()); + NumericField dateField = ds.getNumericField(DATE_MSEC_FIELD, NumericField.DataType.LONG); + dateField.setValue(date.getTime()); doc.add(dateField); util.cal.setTime(date); final int sec = util.cal.get(Calendar.HOUR_OF_DAY)*3600 + util.cal.get(Calendar.MINUTE)*60 + util.cal.get(Calendar.SECOND); - NumericField timeSecField = ds.getNumericField(TIME_SEC_FIELD); - timeSecField.setIntValue(sec); + NumericField timeSecField = ds.getNumericField(TIME_SEC_FIELD, NumericField.DataType.INT); + timeSecField.setValue(sec); doc.add(timeSecField); // Set TITLE_FIELD diff --git a/modules/grouping/src/test/org/apache/lucene/search/grouping/AllGroupHeadsCollectorTest.java b/modules/grouping/src/test/org/apache/lucene/search/grouping/AllGroupHeadsCollectorTest.java index 369c0314397..2558eff7bda 100644 --- a/modules/grouping/src/test/org/apache/lucene/search/grouping/AllGroupHeadsCollectorTest.java +++ b/modules/grouping/src/test/org/apache/lucene/search/grouping/AllGroupHeadsCollectorTest.java @@ -211,7 +211,7 @@ public class AllGroupHeadsCollectorTest extends LuceneTestCase { doc.add(group); DocValuesField valuesField = null; if (canUseIDV) { - valuesField = new DocValuesField("group"); + valuesField = new DocValuesField("group", new BytesRef(), valueType); doc.add(valuesField); } Field sort1 = newField("sort1", "", StringField.TYPE_UNSTORED); @@ -226,7 +226,7 @@ public class AllGroupHeadsCollectorTest extends LuceneTestCase { Field content = newField("content", "", TextField.TYPE_UNSTORED); doc.add(content); docNoGroup.add(content); - NumericField id = new NumericField("id"); + NumericField id = new NumericField("id", 0); doc.add(id); docNoGroup.add(id); final GroupDoc[] groupDocs = new GroupDoc[numDocs]; @@ -257,14 +257,14 @@ public class AllGroupHeadsCollectorTest extends LuceneTestCase { if (groupDoc.group != null) { group.setValue(groupDoc.group.utf8ToString()); if (canUseIDV) { - valuesField.setBytes(new BytesRef(groupDoc.group.utf8ToString()), valueType); + valuesField.setValue(new BytesRef(groupDoc.group.utf8ToString())); } } sort1.setValue(groupDoc.sort1.utf8ToString()); sort2.setValue(groupDoc.sort2.utf8ToString()); sort3.setValue(groupDoc.sort3.utf8ToString()); content.setValue(groupDoc.content); - id.setIntValue(groupDoc.id); + id.setValue(groupDoc.id); if (groupDoc.group == null) { w.addDocument(docNoGroup); } else { @@ -527,9 +527,7 @@ public class AllGroupHeadsCollectorTest extends LuceneTestCase { private void addGroupField(Document doc, String groupField, String value, boolean canUseIDV, Type valueType) { doc.add(new Field(groupField, value, TextField.TYPE_STORED)); if (canUseIDV) { - DocValuesField valuesField = new DocValuesField(groupField); - valuesField.setBytes(new BytesRef(value), valueType); - doc.add(valuesField); + doc.add(new DocValuesField(groupField, new BytesRef(value), valueType)); } } diff --git a/modules/grouping/src/test/org/apache/lucene/search/grouping/AllGroupsCollectorTest.java b/modules/grouping/src/test/org/apache/lucene/search/grouping/AllGroupsCollectorTest.java index adb3cfcbc9e..719fd66220e 100644 --- a/modules/grouping/src/test/org/apache/lucene/search/grouping/AllGroupsCollectorTest.java +++ b/modules/grouping/src/test/org/apache/lucene/search/grouping/AllGroupsCollectorTest.java @@ -123,9 +123,7 @@ public class AllGroupsCollectorTest extends LuceneTestCase { private void addGroupField(Document doc, String groupField, String value, boolean canUseIDV) { doc.add(new Field(groupField, value, TextField.TYPE_STORED)); if (canUseIDV) { - DocValuesField valuesField = new DocValuesField(groupField); - valuesField.setBytes(new BytesRef(value), Type.BYTES_VAR_SORTED); - doc.add(valuesField); + doc.add(new DocValuesField(groupField, new BytesRef(value), Type.BYTES_VAR_SORTED)); } } diff --git a/modules/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java b/modules/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java index fa68d7e32f4..8b622036e6c 100644 --- a/modules/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java +++ b/modules/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java @@ -171,9 +171,7 @@ public class TestGrouping extends LuceneTestCase { private void addGroupField(Document doc, String groupField, String value, boolean canUseIDV) { doc.add(new Field(groupField, value, TextField.TYPE_STORED)); if (canUseIDV) { - DocValuesField valuesField = new DocValuesField(groupField); - valuesField.setBytes(new BytesRef(value), Type.BYTES_VAR_SORTED); - doc.add(valuesField); + doc.add(new DocValuesField(groupField, new BytesRef(value), Type.BYTES_VAR_SORTED)); } } @@ -593,7 +591,7 @@ public class TestGrouping extends LuceneTestCase { } doc.add(newField("sort1", groupValue.sort1.utf8ToString(), StringField.TYPE_UNSTORED)); doc.add(newField("sort2", groupValue.sort2.utf8ToString(), StringField.TYPE_UNSTORED)); - doc.add(new NumericField("id").setIntValue(groupValue.id)); + doc.add(new NumericField("id", groupValue.id)); doc.add(newField("content", groupValue.content, TextField.TYPE_UNSTORED)); //System.out.println("TEST: doc content=" + groupValue.content + " group=" + (groupValue.group == null ? "null" : groupValue.group.utf8ToString()) + " sort1=" + groupValue.sort1.utf8ToString() + " id=" + groupValue.id); } @@ -705,7 +703,7 @@ public class TestGrouping extends LuceneTestCase { Document doc = new Document(); Document docNoGroup = new Document(); - DocValuesField idvGroupField = new DocValuesField("group"); + DocValuesField idvGroupField = new DocValuesField("group", new BytesRef(), Type.BYTES_VAR_SORTED); if (canUseIDV) { doc.add(idvGroupField); } @@ -721,7 +719,7 @@ public class TestGrouping extends LuceneTestCase { Field content = newField("content", "", TextField.TYPE_UNSTORED); doc.add(content); docNoGroup.add(content); - NumericField id = new NumericField("id"); + NumericField id = new NumericField("id", 0); doc.add(id); docNoGroup.add(id); final GroupDoc[] groupDocs = new GroupDoc[numDocs]; @@ -747,13 +745,13 @@ public class TestGrouping extends LuceneTestCase { if (groupDoc.group != null) { group.setValue(groupDoc.group.utf8ToString()); if (canUseIDV) { - idvGroupField.setBytes(BytesRef.deepCopyOf(groupDoc.group), Type.BYTES_VAR_SORTED); + idvGroupField.setValue(BytesRef.deepCopyOf(groupDoc.group)); } } sort1.setValue(groupDoc.sort1.utf8ToString()); sort2.setValue(groupDoc.sort2.utf8ToString()); content.setValue(groupDoc.content); - id.setIntValue(groupDoc.id); + id.setValue(groupDoc.id); if (groupDoc.group == null) { w.addDocument(docNoGroup); } else { diff --git a/modules/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java b/modules/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java index 9905d940df3..869a21aa5ea 100644 --- a/modules/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java +++ b/modules/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java @@ -26,6 +26,7 @@ import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.NumericField; +import org.apache.lucene.document.StoredField; import org.apache.lucene.document.StringField; import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.IndexReader; @@ -58,7 +59,8 @@ public class TestBlockJoin extends LuceneTestCase { private Document makeJob(String skill, int year) { Document job = new Document(); job.add(newField("skill", skill, StringField.TYPE_STORED)); - job.add(new NumericField("year", NumericField.TYPE_STORED).setIntValue(year)); + job.add(new NumericField("year", year)); + job.add(new StoredField("year", year)); return job; } @@ -66,7 +68,7 @@ public class TestBlockJoin extends LuceneTestCase { private Document makeQualification(String qualification, int year) { Document job = new Document(); job.add(newField("qualification", qualification, StringField.TYPE_STORED)); - job.add(new NumericField("year").setIntValue(year)); + job.add(new NumericField("year", year)); return job; } @@ -147,7 +149,7 @@ public class TestBlockJoin extends LuceneTestCase { childDoc = s.doc(hits.scoreDocs[0].doc); //System.out.println("CHILD = " + childDoc + " docID=" + hits.scoreDocs[0].doc); assertEquals("java", childDoc.get("skill")); - assertEquals(2007, ((NumericField) childDoc.getField("year")).numericValue()); + assertEquals(2007, ((StoredField) childDoc.getField("year")).numericValue()); assertEquals("Lisa", getParentDoc(r, parentsFilter, hits.scoreDocs[0].doc).get("name")); r.close(); dir.close(); diff --git a/modules/queryparser/src/test/org/apache/lucene/queryparser/flexible/standard/TestNumericQueryParser.java b/modules/queryparser/src/test/org/apache/lucene/queryparser/flexible/standard/TestNumericQueryParser.java index ccd8a8b084e..c257896468e 100644 --- a/modules/queryparser/src/test/org/apache/lucene/queryparser/flexible/standard/TestNumericQueryParser.java +++ b/modules/queryparser/src/test/org/apache/lucene/queryparser/flexible/standard/TestNumericQueryParser.java @@ -33,6 +33,7 @@ import java.util.TimeZone; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; +import org.apache.lucene.document.FieldType; import org.apache.lucene.document.NumericField; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.RandomIndexWriter; @@ -192,17 +193,37 @@ public class TestNumericQueryParser extends LuceneTestCase { for (NumericField.DataType type : NumericField.DataType.values()) { numericConfigMap.put(type.name(), new NumericConfig(PRECISION_STEP, NUMBER_FORMAT, type)); - - NumericField field = new NumericField(type.name(), PRECISION_STEP, NumericField.TYPE_STORED); - + + FieldType ft = new FieldType(NumericField.getFieldType(type, true)); + ft.setNumericPrecisionStep(PRECISION_STEP); + final NumericField field; + + switch(type) { + case INT: + field = new NumericField(type.name(), 0, ft); + break; + case FLOAT: + field = new NumericField(type.name(), 0.0f, ft); + break; + case LONG: + field = new NumericField(type.name(), 0l, ft); + break; + case DOUBLE: + field = new NumericField(type.name(), 0.0, ft); + break; + default: + assert false; + field = null; + } numericFieldMap.put(type.name(), field); doc.add(field); - } numericConfigMap.put(DATE_FIELD_NAME, new NumericConfig(PRECISION_STEP, DATE_FORMAT, NumericField.DataType.LONG)); - NumericField dateField = new NumericField(DATE_FIELD_NAME, PRECISION_STEP, NumericField.TYPE_STORED); + FieldType ft = new FieldType(NumericField.getFieldType(NumericField.DataType.LONG, true)); + ft.setNumericPrecisionStep(PRECISION_STEP); + NumericField dateField = new NumericField(DATE_FIELD_NAME, 0l, ft); numericFieldMap.put(DATE_FIELD_NAME, dateField); doc.add(dateField); @@ -264,24 +285,23 @@ public class TestNumericQueryParser extends LuceneTestCase { Number number = getNumberType(numberType, NumericField.DataType.DOUBLE .name()); - numericFieldMap.get(NumericField.DataType.DOUBLE.name()).setDoubleValue( + numericFieldMap.get(NumericField.DataType.DOUBLE.name()).setValue( number.doubleValue()); number = getNumberType(numberType, NumericField.DataType.INT.name()); - numericFieldMap.get(NumericField.DataType.INT.name()).setIntValue( + numericFieldMap.get(NumericField.DataType.INT.name()).setValue( number.intValue()); number = getNumberType(numberType, NumericField.DataType.LONG.name()); - numericFieldMap.get(NumericField.DataType.LONG.name()).setLongValue( + numericFieldMap.get(NumericField.DataType.LONG.name()).setValue( number.longValue()); number = getNumberType(numberType, NumericField.DataType.FLOAT.name()); - numericFieldMap.get(NumericField.DataType.FLOAT.name()).setFloatValue( + numericFieldMap.get(NumericField.DataType.FLOAT.name()).setValue( number.floatValue()); number = getNumberType(numberType, DATE_FIELD_NAME); - numericFieldMap.get(DATE_FIELD_NAME).setLongValue(number.longValue()); - + numericFieldMap.get(DATE_FIELD_NAME).setValue(number.longValue()); } private static int randomDateStyle(Random random) { diff --git a/modules/queryparser/src/test/org/apache/lucene/queryparser/xml/TestParser.java b/modules/queryparser/src/test/org/apache/lucene/queryparser/xml/TestParser.java index 7d92978b27c..56b9b13f602 100644 --- a/modules/queryparser/src/test/org/apache/lucene/queryparser/xml/TestParser.java +++ b/modules/queryparser/src/test/org/apache/lucene/queryparser/xml/TestParser.java @@ -68,9 +68,7 @@ public class TestParser extends LuceneTestCase { Document doc = new Document(); doc.add(newField("date", date, TextField.TYPE_STORED)); doc.add(newField("contents", content, TextField.TYPE_STORED)); - NumericField numericField = new NumericField("date2"); - numericField.setIntValue(Integer.valueOf(date)); - doc.add(numericField); + doc.add(new NumericField("date2", Integer.valueOf(date))); writer.addDocument(doc); line = d.readLine(); } diff --git a/solr/core/src/java/org/apache/solr/response/transform/BaseEditorialTransformer.java b/solr/core/src/java/org/apache/solr/response/transform/BaseEditorialTransformer.java index 6f36adef1d6..1d8aa458fc1 100644 --- a/solr/core/src/java/org/apache/solr/response/transform/BaseEditorialTransformer.java +++ b/solr/core/src/java/org/apache/solr/response/transform/BaseEditorialTransformer.java @@ -19,9 +19,7 @@ package org.apache.solr.response.transform; */ import org.apache.lucene.document.Field; -import org.apache.lucene.document.NumericField; import org.apache.solr.common.SolrDocument; -import org.apache.solr.handler.component.QueryElevationComponent; import org.apache.solr.schema.FieldType; import java.util.Set; @@ -66,8 +64,14 @@ public abstract class BaseEditorialTransformer extends TransformerWithContext { protected String getKey(SolrDocument doc) { String key; Object field = doc.get(idFieldName); - if (field instanceof NumericField){ - key = ((Field)field).stringValue(); + final Number n; + if (field instanceof Field) { + n = ((Field) field).numericValue(); + } else { + n = null; + } + if (n != null) { + key = n.toString(); key = ft.readableToIndexed(key); } else if (field instanceof Field){ key = ((Field)field).stringValue(); diff --git a/solr/core/src/java/org/apache/solr/schema/BinaryField.java b/solr/core/src/java/org/apache/solr/schema/BinaryField.java index 0223ed51571..77e1a1caf09 100644 --- a/solr/core/src/java/org/apache/solr/schema/BinaryField.java +++ b/solr/core/src/java/org/apache/solr/schema/BinaryField.java @@ -81,7 +81,7 @@ public class BinaryField extends FieldType { len = buf.length; } - Field f = new org.apache.lucene.document.BinaryField(field.getName(), buf, offset, len); + Field f = new org.apache.lucene.document.StoredField(field.getName(), buf, offset, len); f.setBoost(boost); return f; } diff --git a/solr/core/src/java/org/apache/solr/schema/TrieField.java b/solr/core/src/java/org/apache/solr/schema/TrieField.java index 57246f27756..705f7e3e433 100644 --- a/solr/core/src/java/org/apache/solr/schema/TrieField.java +++ b/solr/core/src/java/org/apache/solr/schema/TrieField.java @@ -104,9 +104,8 @@ public class TrieField extends org.apache.solr.schema.FieldType { @Override public Object toObject(IndexableField f) { - if (f.numeric()) { - final Number val = f.numericValue(); - if (val==null) return badFieldString(f); + final Number val = f.numericValue(); + if (val != null) { return (type == TrieTypes.DATE) ? new Date(val.longValue()) : val; } else { // the following code is "deprecated" and only to support pre-3.2 indexes using the old BinaryField encoding: @@ -405,10 +404,8 @@ public class TrieField extends org.apache.solr.schema.FieldType { @Override public String storedToIndexed(IndexableField f) { final BytesRef bytes = new BytesRef(NumericUtils.BUF_SIZE_LONG); - if (f instanceof org.apache.lucene.document.NumericField) { - final Number val = ((org.apache.lucene.document.NumericField) f).numericValue(); - if (val==null) - throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Invalid field contents: "+f.name()); + final Number val = f.numericValue(); + if (val != null) { switch (type) { case INTEGER: NumericUtils.intToPrefixCoded(val.intValue(), 0, bytes); @@ -481,38 +478,60 @@ public class TrieField extends org.apache.solr.schema.FieldType { ft.setIndexed(indexed); ft.setOmitNorms(field.omitNorms()); ft.setIndexOptions(getIndexOptions(field, value.toString())); - - final org.apache.lucene.document.NumericField f = new org.apache.lucene.document.NumericField(field.getName(), precisionStep, ft); + + switch (type) { + case INTEGER: + ft.setNumericType(NumericField.DataType.INT); + break; + case FLOAT: + ft.setNumericType(NumericField.DataType.FLOAT); + break; + case LONG: + ft.setNumericType(NumericField.DataType.LONG); + break; + case DOUBLE: + ft.setNumericType(NumericField.DataType.DOUBLE); + break; + case DATE: + ft.setNumericType(NumericField.DataType.LONG); + break; + default: + throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown type for trie field: " + type); + } + ft.setNumericPrecisionStep(precisionStep); + + final org.apache.lucene.document.NumericField f; + switch (type) { case INTEGER: int i = (value instanceof Number) ? ((Number)value).intValue() : Integer.parseInt(value.toString()); - f.setIntValue(i); + f = new org.apache.lucene.document.NumericField(field.getName(), i, ft); break; case FLOAT: float fl = (value instanceof Number) ? ((Number)value).floatValue() : Float.parseFloat(value.toString()); - f.setFloatValue(fl); + f = new org.apache.lucene.document.NumericField(field.getName(), fl, ft); break; case LONG: long l = (value instanceof Number) ? ((Number)value).longValue() : Long.parseLong(value.toString()); - f.setLongValue(l); + f = new org.apache.lucene.document.NumericField(field.getName(), l, ft); break; case DOUBLE: double d = (value instanceof Number) ? ((Number)value).doubleValue() : Double.parseDouble(value.toString()); - f.setDoubleValue(d); + f = new org.apache.lucene.document.NumericField(field.getName(), d, ft); break; case DATE: Date date = (value instanceof Date) ? ((Date)value) : dateField.parseMath(null, value.toString()); - f.setLongValue(date.getTime()); + f = new org.apache.lucene.document.NumericField(field.getName(), date.getTime(), ft); break; default: throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown type for trie field: " + type); diff --git a/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java b/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java index 37bba854820..c15e3be8f98 100644 --- a/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java +++ b/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java @@ -23,7 +23,7 @@ import java.net.URL; import java.util.*; import java.util.concurrent.atomic.AtomicLong; -import org.apache.lucene.document.BinaryField; +import org.apache.lucene.document.StoredField; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; @@ -420,7 +420,7 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable,SolrIn @Override public void binaryField(FieldInfo fieldInfo, byte[] value, int offset, int length) throws IOException { - doc.add(new BinaryField(fieldInfo.name, value)); + doc.add(new StoredField(fieldInfo.name, value)); } @Override @@ -436,30 +436,30 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable,SolrIn @Override public void intField(FieldInfo fieldInfo, int value) { - FieldType ft = new FieldType(NumericField.TYPE_STORED); + FieldType ft = new FieldType(NumericField.getFieldType(NumericField.DataType.INT, true)); ft.setIndexed(fieldInfo.isIndexed); - doc.add(new NumericField(fieldInfo.name, ft).setIntValue(value)); + doc.add(new NumericField(fieldInfo.name, value, ft)); } @Override public void longField(FieldInfo fieldInfo, long value) { - FieldType ft = new FieldType(NumericField.TYPE_STORED); + FieldType ft = new FieldType(NumericField.getFieldType(NumericField.DataType.LONG, true)); ft.setIndexed(fieldInfo.isIndexed); - doc.add(new NumericField(fieldInfo.name, ft).setLongValue(value)); + doc.add(new NumericField(fieldInfo.name, value, ft)); } @Override public void floatField(FieldInfo fieldInfo, float value) { - FieldType ft = new FieldType(NumericField.TYPE_STORED); + FieldType ft = new FieldType(NumericField.getFieldType(NumericField.DataType.FLOAT, true)); ft.setIndexed(fieldInfo.isIndexed); - doc.add(new NumericField(fieldInfo.name, ft).setFloatValue(value)); + doc.add(new NumericField(fieldInfo.name, value, ft)); } @Override public void doubleField(FieldInfo fieldInfo, double value) { - FieldType ft = new FieldType(NumericField.TYPE_STORED); + FieldType ft = new FieldType(NumericField.getFieldType(NumericField.DataType.DOUBLE, true)); ft.setIndexed(fieldInfo.isIndexed); - doc.add(new NumericField(fieldInfo.name, ft).setDoubleValue(value)); + doc.add(new NumericField(fieldInfo.name, value, ft)); } } diff --git a/solr/core/src/test/org/apache/solr/schema/PolyFieldTest.java b/solr/core/src/test/org/apache/solr/schema/PolyFieldTest.java index b9873084858..fa4df0f3dee 100644 --- a/solr/core/src/test/org/apache/solr/schema/PolyFieldTest.java +++ b/solr/core/src/test/org/apache/solr/schema/PolyFieldTest.java @@ -88,7 +88,8 @@ public class PolyFieldTest extends SolrTestCaseJ4 { //first two fields contain the values, third is just stored and contains the original for (int i = 0; i < 3; i++) { boolean hasValue = fields[i].binaryValue() != null - || fields[i].stringValue() != null; + || fields[i].stringValue() != null + || fields[i].numericValue() != null; assertTrue("Doesn't have a value: " + fields[i], hasValue); } /*assertTrue("first field " + fields[0].tokenStreamValue() + " is not 35.0", pt.getSubType().toExternal(fields[0]).equals(String.valueOf(xy[0])));