From 54a5e34d358fa36ebd8b7393490e66aa4e7d4335 Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Thu, 10 May 2012 11:45:16 +0000 Subject: [PATCH] LUCENE-3077: fix trapping DV sugar field ctors git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1336617 13f79535-47bb-0310-9956-ffa450edef68 --- .../lucene/codecs/DocValuesConsumer.java | 68 +++++++-- .../values/FixedStraightBytesImpl.java | 4 +- .../lucene/document/ByteDocValuesField.java | 49 +++++++ .../document/DerefBytesDocValuesField.java | 66 +++++++++ .../lucene/document/DocValuesField.java | 131 ------------------ .../lucene/document/DoubleDocValuesField.java | 49 +++++++ .../org/apache/lucene/document/Field.java | 30 +++- .../lucene/document/FloatDocValuesField.java | 49 +++++++ .../lucene/document/IntDocValuesField.java | 49 +++++++ .../lucene/document/LongDocValuesField.java | 49 +++++++ .../document/PackedLongDocValuesField.java | 52 +++++++ .../lucene/document/ShortDocValuesField.java | 49 +++++++ .../document/SortedBytesDocValuesField.java | 61 ++++++++ .../document/StraightBytesDocValuesField.java | 65 +++++++++ .../org/apache/lucene/index/DocValues.java | 22 ++- .../java/org/apache/lucene/index/Norm.java | 68 ++++++--- .../org/apache/lucene/index/TypePromoter.java | 2 +- .../search/similarities/Similarity.java | 8 +- .../apache/lucene/index/TestAddIndexes.java | 6 +- .../lucene/index/TestDocValuesIndexing.java | 81 +++++++---- .../lucene/index/TestTypePromotion.java | 109 +++++++++------ .../lucene/search/TestDocValuesScoring.java | 5 +- .../org/apache/lucene/search/TestSort.java | 48 +++++-- .../search/grouping/GroupingSearch.java | 11 +- .../grouping/AllGroupHeadsCollectorTest.java | 40 +++++- .../grouping/AllGroupsCollectorTest.java | 2 +- .../grouping/DistinctValuesCollectorTest.java | 16 ++- .../grouping/GroupFacetCollectorTest.java | 6 +- .../search/grouping/GroupingSearchTest.java | 2 +- .../lucene/search/grouping/TestGrouping.java | 4 +- .../lucene/index/RandomIndexWriter.java | 74 ++++++---- .../org/apache/lucene/util/LineFileDocs.java | 5 +- .../org/apache/lucene/util/_TestUtil.java | 65 ++++++--- 33 files changed, 1011 insertions(+), 334 deletions(-) create mode 100644 lucene/core/src/java/org/apache/lucene/document/ByteDocValuesField.java create mode 100644 lucene/core/src/java/org/apache/lucene/document/DerefBytesDocValuesField.java delete mode 100644 lucene/core/src/java/org/apache/lucene/document/DocValuesField.java create mode 100644 lucene/core/src/java/org/apache/lucene/document/DoubleDocValuesField.java create mode 100644 lucene/core/src/java/org/apache/lucene/document/FloatDocValuesField.java create mode 100644 lucene/core/src/java/org/apache/lucene/document/IntDocValuesField.java create mode 100644 lucene/core/src/java/org/apache/lucene/document/LongDocValuesField.java create mode 100644 lucene/core/src/java/org/apache/lucene/document/PackedLongDocValuesField.java create mode 100644 lucene/core/src/java/org/apache/lucene/document/ShortDocValuesField.java create mode 100644 lucene/core/src/java/org/apache/lucene/document/SortedBytesDocValuesField.java create mode 100644 lucene/core/src/java/org/apache/lucene/document/StraightBytesDocValuesField.java diff --git a/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java b/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java index 5b621395512..94665faced1 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java @@ -18,11 +18,20 @@ package org.apache.lucene.codecs; */ import java.io.IOException; -import org.apache.lucene.document.DocValuesField; +import org.apache.lucene.document.ByteDocValuesField; +import org.apache.lucene.document.DerefBytesDocValuesField; +import org.apache.lucene.document.DoubleDocValuesField; import org.apache.lucene.document.Field; +import org.apache.lucene.document.FloatDocValuesField; +import org.apache.lucene.document.IntDocValuesField; +import org.apache.lucene.document.LongDocValuesField; +import org.apache.lucene.document.PackedLongDocValuesField; +import org.apache.lucene.document.ShortDocValuesField; +import org.apache.lucene.document.SortedBytesDocValuesField; +import org.apache.lucene.document.StraightBytesDocValuesField; import org.apache.lucene.index.DocValues.Source; -import org.apache.lucene.index.DocValues; import org.apache.lucene.index.DocValues.Type; +import org.apache.lucene.index.DocValues; import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.MergeState; import org.apache.lucene.util.Bits; @@ -116,27 +125,46 @@ public abstract class DocValuesConsumer { final Field scratchField; switch(type) { case VAR_INTS: - case FIXED_INTS_16: - case FIXED_INTS_32: - case FIXED_INTS_64: + scratchField = new PackedLongDocValuesField("", (long) 0); + break; case FIXED_INTS_8: - scratchField = new DocValuesField("", (long) 0, type); + scratchField = new ByteDocValuesField("", (byte) 0); + break; + case FIXED_INTS_16: + scratchField = new ShortDocValuesField("", (short) 0); + break; + case FIXED_INTS_32: + scratchField = new IntDocValuesField("", 0); + break; + case FIXED_INTS_64: + scratchField = new LongDocValuesField("", (long) 0); break; case FLOAT_32: + scratchField = new FloatDocValuesField("", 0f); + break; case FLOAT_64: - scratchField = new DocValuesField("", (double) 0, type); + scratchField = new DoubleDocValuesField("", 0d); break; case BYTES_FIXED_STRAIGHT: - case BYTES_FIXED_DEREF: - case BYTES_FIXED_SORTED: + scratchField = new StraightBytesDocValuesField("", new BytesRef(), true); + break; case BYTES_VAR_STRAIGHT: + scratchField = new StraightBytesDocValuesField("", new BytesRef(), false); + break; + case BYTES_FIXED_DEREF: + scratchField = new DerefBytesDocValuesField("", new BytesRef(), true); + break; case BYTES_VAR_DEREF: + scratchField = new DerefBytesDocValuesField("", new BytesRef(), false); + break; + case BYTES_FIXED_SORTED: + scratchField = new SortedBytesDocValuesField("", new BytesRef(), true); + break; case BYTES_VAR_SORTED: - scratchField = new DocValuesField("", new BytesRef(), type); + scratchField = new SortedBytesDocValuesField("", new BytesRef(), false); break; default: - assert false; - scratchField = null; + throw new IllegalStateException("unknown Type: " + type); } for (int i = 0; i < docCount; i++) { if (liveDocs == null || liveDocs.get(i)) { @@ -171,14 +199,24 @@ public abstract class DocValuesConsumer { case BYTES_VAR_STRAIGHT: scratchField.setBytesValue(source.getBytes(sourceDoc, spare)); break; - case FIXED_INTS_16: - case FIXED_INTS_32: - case FIXED_INTS_64: case FIXED_INTS_8: + scratchField.setByteValue((byte) source.getInt(sourceDoc)); + break; + case FIXED_INTS_16: + scratchField.setShortValue((short) source.getInt(sourceDoc)); + break; + case FIXED_INTS_32: + scratchField.setIntValue((int) source.getInt(sourceDoc)); + break; + case FIXED_INTS_64: + scratchField.setLongValue(source.getInt(sourceDoc)); + break; case VAR_INTS: scratchField.setLongValue(source.getInt(sourceDoc)); break; case FLOAT_32: + scratchField.setFloatValue((float) source.getFloat(sourceDoc)); + break; case FLOAT_64: scratchField.setDoubleValue(source.getFloat(sourceDoc)); break; diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/FixedStraightBytesImpl.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/FixedStraightBytesImpl.java index 4ea4b46e257..fd779aed034 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/FixedStraightBytesImpl.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/FixedStraightBytesImpl.java @@ -22,7 +22,7 @@ import java.io.IOException; import org.apache.lucene.codecs.lucene40.values.Bytes.BytesReaderBase; import org.apache.lucene.codecs.lucene40.values.Bytes.BytesSourceBase; import org.apache.lucene.codecs.lucene40.values.Bytes.BytesWriterBase; -import org.apache.lucene.document.DocValuesField; +import org.apache.lucene.document.StraightBytesDocValuesField; import org.apache.lucene.document.Field; import org.apache.lucene.index.DocValues.Source; import org.apache.lucene.index.DocValues.Type; @@ -54,7 +54,7 @@ class FixedStraightBytesImpl { static final int VERSION_CURRENT = VERSION_START; static abstract class FixedBytesWriterBase extends BytesWriterBase { - protected final DocValuesField bytesSpareField = new DocValuesField("", new BytesRef(), Type.BYTES_FIXED_STRAIGHT); + protected final StraightBytesDocValuesField bytesSpareField = new StraightBytesDocValuesField("", new BytesRef(), true); protected int lastDocID = -1; // start at -1 if the first added value is > 0 protected int size = -1; diff --git a/lucene/core/src/java/org/apache/lucene/document/ByteDocValuesField.java b/lucene/core/src/java/org/apache/lucene/document/ByteDocValuesField.java new file mode 100644 index 00000000000..8291286f3b3 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/document/ByteDocValuesField.java @@ -0,0 +1,49 @@ +package org.apache.lucene.document; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.DocValues; + +/** + *

+ * This class provides a {@link Field} that enables storing + * of a per-document byte value for scoring, sorting or value retrieval. Here's an + * example usage: + * + *

+ *   document.add(new ByteDocValuesField(name, (byte) 22));
+ * 
+ * + *

+ * If you also need to store the value, you should add a + * separate {@link StoredField} instance. + * */ + +public class ByteDocValuesField extends Field { + + public static final FieldType TYPE = new FieldType(); + static { + TYPE.setDocValueType(DocValues.Type.FIXED_INTS_8); + TYPE.freeze(); + } + + public ByteDocValuesField(String name, byte value) { + super(name, TYPE); + fieldsData = Byte.valueOf(value); + } +} diff --git a/lucene/core/src/java/org/apache/lucene/document/DerefBytesDocValuesField.java b/lucene/core/src/java/org/apache/lucene/document/DerefBytesDocValuesField.java new file mode 100644 index 00000000000..1e9e68a7d6a --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/document/DerefBytesDocValuesField.java @@ -0,0 +1,66 @@ +package org.apache.lucene.document; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.DocValues; +import org.apache.lucene.util.BytesRef; + +/** + *

+ * This class provides a {@link Field} that enables storing + * of a per-document {@link BytesRef} value. The values are + * stored indirectly, such that many documents sharing the + * same value all point to a single copy of the value, which + * is a good fit when the fields share values. If values + * are (mostly) unique it's better to use {@link + * StraightBytesDocValuesField}. Here's an example usage: + * + *

+ *   document.add(new DerefBytesDocValuesField(name, new BytesRef("hello")));
+ * 
+ * + *

+ * If you also need to store the value, you should add a + * separate {@link StoredField} instance. + * */ + +public class DerefBytesDocValuesField extends Field { + + // TODO: ideally indexer figures out var vs fixed on its own!? + public static final FieldType TYPE_FIXED_LEN = new FieldType(); + static { + TYPE_FIXED_LEN.setDocValueType(DocValues.Type.BYTES_FIXED_DEREF); + TYPE_FIXED_LEN.freeze(); + } + + public static final FieldType TYPE_VAR_LEN = new FieldType(); + static { + TYPE_VAR_LEN.setDocValueType(DocValues.Type.BYTES_VAR_DEREF); + TYPE_VAR_LEN.freeze(); + } + + public DerefBytesDocValuesField(String name, BytesRef bytes) { + super(name, TYPE_VAR_LEN); + fieldsData = bytes; + } + + public DerefBytesDocValuesField(String name, BytesRef bytes, boolean isFixedLength) { + super(name, isFixedLength ? TYPE_FIXED_LEN : TYPE_VAR_LEN); + fieldsData = bytes; + } +} diff --git a/lucene/core/src/java/org/apache/lucene/document/DocValuesField.java b/lucene/core/src/java/org/apache/lucene/document/DocValuesField.java deleted file mode 100644 index 46a101ac189..00000000000 --- a/lucene/core/src/java/org/apache/lucene/document/DocValuesField.java +++ /dev/null @@ -1,131 +0,0 @@ -package org.apache.lucene.document; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.util.EnumSet; -import java.util.HashMap; -import java.util.Map; - -import org.apache.lucene.index.DocValues.Type; // javadocs -import org.apache.lucene.index.DocValues; -import org.apache.lucene.util.BytesRef; - -/** - *

- * This class provides a {@link Field} that enables storing of typed - * per-document values for scoring, sorting or value retrieval. Here's an - * example usage, adding an int value (22): - * - *

- *   document.add(new DocValuesField(name, 22, DocValues.Type.VAR_INTS));
- * 
- * - * For optimal performance, re-use the DocValuesField and - * {@link Document} instance for more than one document: - * - *
- *  DocValuesField field = new DocValuesField(name, 0, DocValues.Type.VAR_INTS);
- *  Document document = new Document();
- *  document.add(field);
- * 
- *  for(all documents) {
- *    ...
- *    field.setValue(value)
- *    writer.addDocument(document);
- *    ...
- *  }
- * 
- * - *

- * If you also need to store the value, you should add a - * separate {@link StoredField} instance. - * */ - -public class DocValuesField extends Field { - - private static final Map types = new HashMap(); - static { - for(DocValues.Type type : DocValues.Type.values()) { - final FieldType ft = new FieldType(); - ft.setDocValueType(type); - ft.freeze(); - types.put(type, ft); - } - } - - private static EnumSet BYTES = EnumSet.of( - Type.BYTES_FIXED_DEREF, - Type.BYTES_FIXED_STRAIGHT, - Type.BYTES_VAR_DEREF, - Type.BYTES_VAR_STRAIGHT, - Type.BYTES_FIXED_SORTED, - Type.BYTES_VAR_SORTED); - - private static EnumSet INTS = EnumSet.of( - Type.VAR_INTS, - Type.FIXED_INTS_8, - Type.FIXED_INTS_16, - Type.FIXED_INTS_32, - Type.FIXED_INTS_64); - - public static FieldType getFieldType(DocValues.Type type) { - return types.get(type); - } - - public DocValuesField(String name, BytesRef bytes, DocValues.Type docValueType) { - super(name, getFieldType(docValueType)); - if (!BYTES.contains(docValueType)) { - throw new IllegalArgumentException("docValueType must be one of: " + BYTES + "; got " + docValueType); - } - fieldsData = bytes; - } - - public DocValuesField(String name, int value, DocValues.Type docValueType) { - super(name, getFieldType(docValueType)); - if (!INTS.contains(docValueType)) { - throw new IllegalArgumentException("docValueType must be one of: " + INTS +"; got " + docValueType); - } - fieldsData = Integer.valueOf(value); - } - - public DocValuesField(String name, long value, DocValues.Type docValueType) { - super(name, getFieldType(docValueType)); - if (!INTS.contains(docValueType)) { - throw new IllegalArgumentException("docValueType must be one of: " + INTS +"; got " + docValueType); - } - fieldsData = Long.valueOf(value); - } - - public DocValuesField(String name, float value, DocValues.Type docValueType) { - super(name, getFieldType(docValueType)); - if (docValueType != DocValues.Type.FLOAT_32 && - docValueType != DocValues.Type.FLOAT_64) { - throw new IllegalArgumentException("docValueType must be FLOAT_32/64; got " + docValueType); - } - fieldsData = Float.valueOf(value); - } - - public DocValuesField(String name, double value, DocValues.Type docValueType) { - super(name, getFieldType(docValueType)); - if (docValueType != DocValues.Type.FLOAT_32 && - docValueType != DocValues.Type.FLOAT_64) { - throw new IllegalArgumentException("docValueType must be FLOAT_32/64; got " + docValueType); - } - fieldsData = Double.valueOf(value); - } -} diff --git a/lucene/core/src/java/org/apache/lucene/document/DoubleDocValuesField.java b/lucene/core/src/java/org/apache/lucene/document/DoubleDocValuesField.java new file mode 100644 index 00000000000..b0021b69262 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/document/DoubleDocValuesField.java @@ -0,0 +1,49 @@ +package org.apache.lucene.document; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.DocValues; + +/** + *

+ * This class provides a {@link Field} that enables storing + * of a per-document double value for scoring, sorting or value retrieval. Here's an + * example usage: + * + *

+ *   document.add(new DoubleDocValuesField(name, 22.0));
+ * 
+ * + *

+ * If you also need to store the value, you should add a + * separate {@link StoredField} instance. + * */ + +public class DoubleDocValuesField extends Field { + + public static final FieldType TYPE = new FieldType(); + static { + TYPE.setDocValueType(DocValues.Type.FLOAT_64); + TYPE.freeze(); + } + + public DoubleDocValuesField(String name, double value) { + super(name, TYPE); + fieldsData = Double.valueOf(value); + } +} diff --git a/lucene/core/src/java/org/apache/lucene/document/Field.java b/lucene/core/src/java/org/apache/lucene/document/Field.java index 6e5840eae5d..9c50dd3c76f 100644 --- a/lucene/core/src/java/org/apache/lucene/document/Field.java +++ b/lucene/core/src/java/org/apache/lucene/document/Field.java @@ -30,7 +30,7 @@ import org.apache.lucene.document.FieldType.NumericType; import org.apache.lucene.index.IndexWriter; // javadocs import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.IndexableFieldType; -import org.apache.lucene.index.Norm; +import org.apache.lucene.index.Norm; // javadocs import org.apache.lucene.util.BytesRef; import org.apache.lucene.index.FieldInvertState; // javadocs @@ -38,7 +38,13 @@ import org.apache.lucene.index.FieldInvertState; // javadocs * Expert: directly create a field for a document. Most * users should use one of the sugar subclasses: {@link * IntField}, {@link LongField}, {@link FloatField}, {@link - * DoubleField}, {@link DocValuesField}, {@link + * DoubleField}, {@link ByteDocValuesField}, {@link + * ShortDocValuesField}, {@link IntDocValuesField}, {@link + * LongDocValuesField}, {@link PackedLongDocValuesField}, + * {@link FloatDocValuesField}, {@link + * DoubleDocValuesField}, {@link SortedBytesDocValuesField}, + * {@link DerefBytesDocValuesField}, {@link + * StraightBytesDocValuesField}, {@link * StringField}, {@link TextField}, {@link StoredField}. * *

A field is a section of a Document. Each field has three @@ -273,6 +279,26 @@ public class Field implements IndexableField { fieldsData = value; } + public void setByteValue(byte value) { + if (!(fieldsData instanceof Byte)) { + throw new IllegalArgumentException("cannot change value type from " + fieldsData.getClass().getSimpleName() + " to Byte"); + } + if (numericTokenStream != null) { + numericTokenStream.setIntValue(value); + } + fieldsData = Byte.valueOf(value); + } + + public void setShortValue(short value) { + if (!(fieldsData instanceof Short)) { + throw new IllegalArgumentException("cannot change value type from " + fieldsData.getClass().getSimpleName() + " to Short"); + } + if (numericTokenStream != null) { + numericTokenStream.setIntValue(value); + } + fieldsData = Short.valueOf(value); + } + public void setIntValue(int value) { if (!(fieldsData instanceof Integer)) { throw new IllegalArgumentException("cannot change value type from " + fieldsData.getClass().getSimpleName() + " to Integer"); diff --git a/lucene/core/src/java/org/apache/lucene/document/FloatDocValuesField.java b/lucene/core/src/java/org/apache/lucene/document/FloatDocValuesField.java new file mode 100644 index 00000000000..3cee1fafe20 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/document/FloatDocValuesField.java @@ -0,0 +1,49 @@ +package org.apache.lucene.document; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.DocValues; + +/** + *

+ * This class provides a {@link Field} that enables storing + * of a per-document float value for scoring, sorting or value retrieval. Here's an + * example usage: + * + *

+ *   document.add(new FloatDocValuesField(name, 22f));
+ * 
+ * + *

+ * If you also need to store the value, you should add a + * separate {@link StoredField} instance. + * */ + +public class FloatDocValuesField extends Field { + + public static final FieldType TYPE = new FieldType(); + static { + TYPE.setDocValueType(DocValues.Type.FLOAT_32); + TYPE.freeze(); + } + + public FloatDocValuesField(String name, float value) { + super(name, TYPE); + fieldsData = Float.valueOf(value); + } +} diff --git a/lucene/core/src/java/org/apache/lucene/document/IntDocValuesField.java b/lucene/core/src/java/org/apache/lucene/document/IntDocValuesField.java new file mode 100644 index 00000000000..f769af3bf84 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/document/IntDocValuesField.java @@ -0,0 +1,49 @@ +package org.apache.lucene.document; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.DocValues; + +/** + *

+ * This class provides a {@link Field} that enables storing + * of a per-document int value for scoring, sorting or value retrieval. Here's an + * example usage: + * + *

+ *   document.add(new IntDocValuesField(name, 22));
+ * 
+ * + *

+ * If you also need to store the value, you should add a + * separate {@link StoredField} instance. + * */ + +public class IntDocValuesField extends Field { + + public static final FieldType TYPE = new FieldType(); + static { + TYPE.setDocValueType(DocValues.Type.FIXED_INTS_32); + TYPE.freeze(); + } + + public IntDocValuesField(String name, int value) { + super(name, TYPE); + fieldsData = Integer.valueOf(value); + } +} diff --git a/lucene/core/src/java/org/apache/lucene/document/LongDocValuesField.java b/lucene/core/src/java/org/apache/lucene/document/LongDocValuesField.java new file mode 100644 index 00000000000..7b629c07f5c --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/document/LongDocValuesField.java @@ -0,0 +1,49 @@ +package org.apache.lucene.document; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.DocValues; + +/** + *

+ * This class provides a {@link Field} that enables storing + * of a per-document long value for scoring, sorting or value retrieval. Here's an + * example usage: + * + *

+ *   document.add(new LongDocValuesField(name, 22L));
+ * 
+ * + *

+ * If you also need to store the value, you should add a + * separate {@link StoredField} instance. + * */ + +public class LongDocValuesField extends Field { + + public static final FieldType TYPE = new FieldType(); + static { + TYPE.setDocValueType(DocValues.Type.FIXED_INTS_64); + TYPE.freeze(); + } + + public LongDocValuesField(String name, long value) { + super(name, TYPE); + fieldsData = Long.valueOf(value); + } +} diff --git a/lucene/core/src/java/org/apache/lucene/document/PackedLongDocValuesField.java b/lucene/core/src/java/org/apache/lucene/document/PackedLongDocValuesField.java new file mode 100644 index 00000000000..c29e0d956a5 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/document/PackedLongDocValuesField.java @@ -0,0 +1,52 @@ +package org.apache.lucene.document; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.AtomicReader; // javadocs + +/** + *

+ * This class provides a {@link Field} that enables storing + * of a per-document long value for scoring, sorting or + * value retrieval. The values are encoded in the index an + * in RAM (when loaded via {@link AtomicReader#docValues}) + * using packed ints. Here's an example usage: + * + *

+ *   document.add(new PackedLongDocValuesField(name, 22L));
+ * 
+ * + *

+ * If you also need to store the value, you should add a + * separate {@link StoredField} instance. + * */ + +public class PackedLongDocValuesField extends Field { + + public static final FieldType TYPE = new FieldType(); + static { + TYPE.setDocValueType(DocValues.Type.VAR_INTS); + TYPE.freeze(); + } + + public PackedLongDocValuesField(String name, long value) { + super(name, TYPE); + fieldsData = Long.valueOf(value); + } +} diff --git a/lucene/core/src/java/org/apache/lucene/document/ShortDocValuesField.java b/lucene/core/src/java/org/apache/lucene/document/ShortDocValuesField.java new file mode 100644 index 00000000000..6d50f6e24a8 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/document/ShortDocValuesField.java @@ -0,0 +1,49 @@ +package org.apache.lucene.document; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.DocValues; + +/** + *

+ * This class provides a {@link Field} that enables storing + * of a per-document short value for scoring, sorting or value retrieval. Here's an + * example usage: + * + *

+ *   document.add(new ShortDocValuesField(name, (short) 22));
+ * 
+ * + *

+ * If you also need to store the value, you should add a + * separate {@link StoredField} instance. + * */ + +public class ShortDocValuesField extends Field { + + public static final FieldType TYPE = new FieldType(); + static { + TYPE.setDocValueType(DocValues.Type.FIXED_INTS_16); + TYPE.freeze(); + } + + public ShortDocValuesField(String name, short value) { + super(name, TYPE); + fieldsData = Short.valueOf(value); + } +} diff --git a/lucene/core/src/java/org/apache/lucene/document/SortedBytesDocValuesField.java b/lucene/core/src/java/org/apache/lucene/document/SortedBytesDocValuesField.java new file mode 100644 index 00000000000..70c0a80494d --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/document/SortedBytesDocValuesField.java @@ -0,0 +1,61 @@ +package org.apache.lucene.document; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.DocValues; +import org.apache.lucene.util.BytesRef; + +/** + *

+ * This class provides a {@link Field} that enables storing + * of a per-document {@link BytesRef} value, indexed for + * sorting. Here's an example usage: + * + *

+ *   document.add(new SortedBytesDocValuesField(name, new BytesRef("hello")));
+ * 
+ * + *

+ * If you also need to store the value, you should add a + * separate {@link StoredField} instance. + * */ + +public class SortedBytesDocValuesField extends Field { + + // TODO: ideally indexer figures out var vs fixed on its own!? + public static final FieldType TYPE_FIXED_LEN = new FieldType(); + static { + TYPE_FIXED_LEN.setDocValueType(DocValues.Type.BYTES_FIXED_SORTED); + TYPE_FIXED_LEN.freeze(); + } + + public static final FieldType TYPE_VAR_LEN = new FieldType(); + static { + TYPE_VAR_LEN.setDocValueType(DocValues.Type.BYTES_VAR_SORTED); + TYPE_VAR_LEN.freeze(); + } + + public SortedBytesDocValuesField(String name, BytesRef bytes) { + this(name, bytes, false); + } + + public SortedBytesDocValuesField(String name, BytesRef bytes, boolean isFixedLength) { + super(name, isFixedLength ? TYPE_FIXED_LEN : TYPE_VAR_LEN); + fieldsData = bytes; + } +} diff --git a/lucene/core/src/java/org/apache/lucene/document/StraightBytesDocValuesField.java b/lucene/core/src/java/org/apache/lucene/document/StraightBytesDocValuesField.java new file mode 100644 index 00000000000..50f7931ef03 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/document/StraightBytesDocValuesField.java @@ -0,0 +1,65 @@ +package org.apache.lucene.document; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.DocValues; +import org.apache.lucene.util.BytesRef; + +/** + *

+ * This class provides a {@link Field} that enables storing + * of a per-document {@link BytesRef} value. The values are + * stored directly with no sharing, which is a good fit when + * the fields don't share (many) values, such as a title + * field. If values may be shared it's better to use {@link + * DerefBytesDocValuesField}. Here's an example usage: + * + *

+ *   document.add(new StraightBytesDocValuesField(name, new BytesRef("hello")));
+ * 
+ * + *

+ * If you also need to store the value, you should add a + * separate {@link StoredField} instance. + * */ + +public class StraightBytesDocValuesField extends Field { + + // TODO: ideally indexer figures out var vs fixed on its own!? + public static final FieldType TYPE_FIXED_LEN = new FieldType(); + static { + TYPE_FIXED_LEN.setDocValueType(DocValues.Type.BYTES_FIXED_STRAIGHT); + TYPE_FIXED_LEN.freeze(); + } + + public static final FieldType TYPE_VAR_LEN = new FieldType(); + static { + TYPE_VAR_LEN.setDocValueType(DocValues.Type.BYTES_VAR_STRAIGHT); + TYPE_VAR_LEN.freeze(); + } + + public StraightBytesDocValuesField(String name, BytesRef bytes) { + super(name, TYPE_VAR_LEN); + fieldsData = bytes; + } + + public StraightBytesDocValuesField(String name, BytesRef bytes, boolean isFixedLength) { + super(name, isFixedLength ? TYPE_FIXED_LEN : TYPE_VAR_LEN); + fieldsData = bytes; + } +} diff --git a/lucene/core/src/java/org/apache/lucene/index/DocValues.java b/lucene/core/src/java/org/apache/lucene/index/DocValues.java index ebf296385fc..d0af6136360 100644 --- a/lucene/core/src/java/org/apache/lucene/index/DocValues.java +++ b/lucene/core/src/java/org/apache/lucene/index/DocValues.java @@ -21,7 +21,16 @@ import java.io.IOException; import java.util.Comparator; import org.apache.lucene.codecs.DocValuesFormat; -import org.apache.lucene.document.DocValuesField; +import org.apache.lucene.document.ByteDocValuesField; // javadocs +import org.apache.lucene.document.DerefBytesDocValuesField; // javadocs +import org.apache.lucene.document.DoubleDocValuesField; // javadocs +import org.apache.lucene.document.FloatDocValuesField; // javadocs +import org.apache.lucene.document.IntDocValuesField; // javadocs +import org.apache.lucene.document.LongDocValuesField; // javadocs +import org.apache.lucene.document.PackedLongDocValuesField; // javadocs +import org.apache.lucene.document.ShortDocValuesField; // javadocs +import org.apache.lucene.document.SortedBytesDocValuesField; // javadocs +import org.apache.lucene.document.StraightBytesDocValuesField; // javadocs import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.packed.PackedInts; @@ -40,7 +49,16 @@ import org.apache.lucene.util.packed.PackedInts; * {@link DocValues} are fully integrated into the {@link DocValuesFormat} API. * * @see Type for limitations and default implementation documentation - * @see DocValuesField for adding values to the index + * @see ByteDocValuesField for adding byte values to the index + * @see ShortDocValuesField for adding short values to the index + * @see IntDocValuesField for adding int values to the index + * @see LongDocValuesField for adding long values to the index + * @see FloatDocValuesField for adding float values to the index + * @see DoubleDocValuesField for adding double values to the index + * @see PackedLongDocValuesField for adding packed long values to the index + * @see SortedBytesDocValuesField for adding sorted {@link BytesRef} values to the index + * @see StraightBytesDocValuesField for adding straight {@link BytesRef} values to the index + * @see DerefBytesDocValuesField for adding deref {@link BytesRef} values to the index * @see DocValuesFormat#docsConsumer(org.apache.lucene.index.PerDocWriteState) for * customization * @lucene.experimental diff --git a/lucene/core/src/java/org/apache/lucene/index/Norm.java b/lucene/core/src/java/org/apache/lucene/index/Norm.java index b99f9827188..9c3c3b28bea 100644 --- a/lucene/core/src/java/org/apache/lucene/index/Norm.java +++ b/lucene/core/src/java/org/apache/lucene/index/Norm.java @@ -16,7 +16,17 @@ package org.apache.lucene.index; * See the License for the specific language governing permissions and * limitations under the License. */ -import org.apache.lucene.document.DocValuesField; +import org.apache.lucene.document.ByteDocValuesField; +import org.apache.lucene.document.DerefBytesDocValuesField; +import org.apache.lucene.document.DoubleDocValuesField; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.FloatDocValuesField; +import org.apache.lucene.document.IntDocValuesField; +import org.apache.lucene.document.LongDocValuesField; +import org.apache.lucene.document.PackedLongDocValuesField; +import org.apache.lucene.document.ShortDocValuesField; +import org.apache.lucene.document.SortedBytesDocValuesField; +import org.apache.lucene.document.StraightBytesDocValuesField; import org.apache.lucene.index.DocValues.Type; import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.util.BytesRef; @@ -33,7 +43,7 @@ import org.apache.lucene.util.BytesRef; * @lucene.internal */ public final class Norm { - private DocValuesField field; + private Field field; private BytesRef spare; /** @@ -81,7 +91,7 @@ public final class Norm { */ public void setShort(short norm) { setType(Type.FIXED_INTS_16); - this.field.setIntValue(norm); + this.field.setShortValue(norm); } @@ -106,7 +116,7 @@ public final class Norm { */ public void setByte(byte norm) { setType(Type.FIXED_INTS_8); - this.field.setIntValue(norm); + this.field.setByteValue(norm); } /** @@ -124,26 +134,46 @@ public final class Norm { throw new IllegalArgumentException("FieldType missmatch - expected "+type+" but was " + field.fieldType().docValueType()); } } else { - switch (type) { - case BYTES_FIXED_DEREF: - case BYTES_FIXED_SORTED: - case BYTES_FIXED_STRAIGHT: - case BYTES_VAR_DEREF: - case BYTES_VAR_SORTED: - case BYTES_VAR_STRAIGHT: - this.field = new DocValuesField("", new BytesRef(), type); - break; - case FIXED_INTS_16: - case FIXED_INTS_32: - case FIXED_INTS_64: - case FIXED_INTS_8: + switch(type) { case VAR_INTS: - this.field = new DocValuesField("", 0, type); + field = new PackedLongDocValuesField("", (long) 0); + break; + case FIXED_INTS_8: + field = new ByteDocValuesField("", (byte) 0); + break; + case FIXED_INTS_16: + field = new ShortDocValuesField("", (short) 0); + break; + case FIXED_INTS_32: + field = new IntDocValuesField("", 0); + break; + case FIXED_INTS_64: + field = new LongDocValuesField("", (byte) 0); break; case FLOAT_32: + field = new FloatDocValuesField("", 0f); + break; case FLOAT_64: - this.field = new DocValuesField("", 0f, type); + field = new DoubleDocValuesField("", 0d); + break; + case BYTES_FIXED_STRAIGHT: + field = new StraightBytesDocValuesField("", new BytesRef(), true); + break; + case BYTES_VAR_STRAIGHT: + field = new StraightBytesDocValuesField("", new BytesRef(), false); + break; + case BYTES_FIXED_DEREF: + field = new DerefBytesDocValuesField("", new BytesRef(), true); + break; + case BYTES_VAR_DEREF: + field = new DerefBytesDocValuesField("", new BytesRef(), false); + break; + case BYTES_FIXED_SORTED: + field = new SortedBytesDocValuesField("", new BytesRef(), true); + break; + case BYTES_VAR_SORTED: + field = new SortedBytesDocValuesField("", new BytesRef(), false); break; default: throw new IllegalArgumentException("unknown Type: " + type); diff --git a/lucene/core/src/java/org/apache/lucene/index/TypePromoter.java b/lucene/core/src/java/org/apache/lucene/index/TypePromoter.java index baed1b94c4f..8c9fec9383c 100644 --- a/lucene/core/src/java/org/apache/lucene/index/TypePromoter.java +++ b/lucene/core/src/java/org/apache/lucene/index/TypePromoter.java @@ -114,8 +114,8 @@ class TypePromoter { return create(FLAGS_MAP.get(promoted.flags & PROMOTE_TO_VAR_SIZE_MASK), VAR_TYPE_VALUE_SIZE); } - return promoted; + return promoted; } /** diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/Similarity.java b/lucene/core/src/java/org/apache/lucene/search/similarities/Similarity.java index 94a58546211..05a28f9a0b9 100644 --- a/lucene/core/src/java/org/apache/lucene/search/similarities/Similarity.java +++ b/lucene/core/src/java/org/apache/lucene/search/similarities/Similarity.java @@ -19,7 +19,8 @@ package org.apache.lucene.search.similarities; import java.io.IOException; -import org.apache.lucene.document.DocValuesField; // javadoc +import org.apache.lucene.document.ByteDocValuesField; // javadoc +import org.apache.lucene.document.FloatDocValuesField; // javadoc import org.apache.lucene.index.AtomicReader; // javadoc import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.FieldInvertState; @@ -29,7 +30,6 @@ import org.apache.lucene.search.CollectionStatistics; import org.apache.lucene.search.Explanation; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.PhraseQuery; -import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TermStatistics; import org.apache.lucene.search.spans.SpanQuery; // javadoc @@ -68,7 +68,9 @@ import org.apache.lucene.util.SmallFloat; // javadoc * {@link CollectionStatistics#maxDoc()} or {@link CollectionStatistics#docCount()}, * depending upon whether the average should reflect field sparsity. *

- * Additional scoring factors can be stored in named {@link DocValuesField}s, and accessed + * Additional scoring factors can be stored in named + * *DocValuesFields (such as {@link + * ByteDocValuesField} or {@link FloatDocValuesField}), and accessed * at query-time with {@link AtomicReader#docValues(String)}. *

* Finally, using index-time boosts (either via folding into the normalization byte or diff --git a/lucene/core/src/test/org/apache/lucene/index/TestAddIndexes.java b/lucene/core/src/test/org/apache/lucene/index/TestAddIndexes.java index 6284669de12..4d282591a39 100755 --- a/lucene/core/src/test/org/apache/lucene/index/TestAddIndexes.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestAddIndexes.java @@ -45,7 +45,7 @@ import org.apache.lucene.codecs.pulsing.Pulsing40PostingsFormat; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; -import org.apache.lucene.document.DocValuesField; +import org.apache.lucene.document.PackedLongDocValuesField; import org.apache.lucene.document.StringField; import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexWriterConfig.OpenMode; @@ -1263,7 +1263,7 @@ public class TestAddIndexes extends LuceneTestCase { RandomIndexWriter w = new RandomIndexWriter(random(), d1); Document doc = new Document(); doc.add(newField("id", "1", StringField.TYPE_STORED)); - doc.add(new DocValuesField("dv", 1, DocValues.Type.VAR_INTS)); + doc.add(new PackedLongDocValuesField("dv", 1)); w.addDocument(doc); IndexReader r1 = w.getReader(); w.close(); @@ -1272,7 +1272,7 @@ public class TestAddIndexes extends LuceneTestCase { w = new RandomIndexWriter(random(), d2); doc = new Document(); doc.add(newField("id", "2", StringField.TYPE_STORED)); - doc.add(new DocValuesField("dv", 2, DocValues.Type.VAR_INTS)); + doc.add(new PackedLongDocValuesField("dv", 2)); w.addDocument(doc); IndexReader r2 = w.getReader(); w.close(); diff --git a/lucene/core/src/test/org/apache/lucene/index/TestDocValuesIndexing.java b/lucene/core/src/test/org/apache/lucene/index/TestDocValuesIndexing.java index 807943f8d0d..462b3b44326 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestDocValuesIndexing.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestDocValuesIndexing.java @@ -20,13 +20,21 @@ import java.io.Closeable; import java.io.IOException; import java.util.*; import java.util.Map.Entry; -import java.util.*; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.codecs.Codec; -import org.apache.lucene.document.DocValuesField; +import org.apache.lucene.document.ByteDocValuesField; +import org.apache.lucene.document.DerefBytesDocValuesField; import org.apache.lucene.document.Document; +import org.apache.lucene.document.DoubleDocValuesField; import org.apache.lucene.document.Field; +import org.apache.lucene.document.FloatDocValuesField; +import org.apache.lucene.document.IntDocValuesField; +import org.apache.lucene.document.LongDocValuesField; +import org.apache.lucene.document.PackedLongDocValuesField; +import org.apache.lucene.document.ShortDocValuesField; +import org.apache.lucene.document.SortedBytesDocValuesField; +import org.apache.lucene.document.StraightBytesDocValuesField; import org.apache.lucene.document.StringField; import org.apache.lucene.document.TextField; import org.apache.lucene.index.DocValues.SortedSource; @@ -67,7 +75,7 @@ public class TestDocValuesIndexing extends LuceneTestCase { IndexWriter writer = new IndexWriter(dir, writerConfig(false)); for (int i = 0; i < 5; i++) { Document doc = new Document(); - doc.add(new DocValuesField("docId", i, DocValues.Type.VAR_INTS)); + doc.add(new PackedLongDocValuesField("docId", i)); doc.add(new TextField("docId", "" + i)); writer.addDocument(doc); } @@ -416,14 +424,9 @@ public class TestDocValuesIndexing extends LuceneTestCase { indexValues(w, numValues, val, numVariantList, false, 7); DirectoryReader r = DirectoryReader.open(w, true); - if (val == Type.VAR_INTS) { - DocValues docValues = getDocValues(r, val.name()); - } DocValues docValues = getDocValues(r, val.name()); assertNotNull(docValues); // make sure we don't get a direct source since they don't support getArray() - if (val == Type.VAR_INTS) { - } Source source = docValues.getSource(); switch (source.getType()) { case FIXED_INTS_8: @@ -567,36 +570,58 @@ public class TestDocValuesIndexing extends LuceneTestCase { final boolean isNumeric = NUMERICS.contains(valueType); FixedBitSet deleted = new FixedBitSet(numValues); Document doc = new Document(); - final DocValuesField valField; + final Field valField; if (isNumeric) { switch (valueType) { case VAR_INTS: - valField = new DocValuesField(valueType.name(), (long) 0, valueType); + valField = new PackedLongDocValuesField(valueType.name(), (long) 0); break; case FIXED_INTS_16: - valField = new DocValuesField(valueType.name(), (short) 0, valueType); + valField = new ShortDocValuesField(valueType.name(), (short) 0); break; case FIXED_INTS_32: - valField = new DocValuesField(valueType.name(), 0, valueType); + valField = new IntDocValuesField(valueType.name(), 0); break; case FIXED_INTS_64: - valField = new DocValuesField(valueType.name(), (long) 0, valueType); + valField = new LongDocValuesField(valueType.name(), (long) 0); break; case FIXED_INTS_8: - valField = new DocValuesField(valueType.name(), (byte) 0, valueType); + valField = new ByteDocValuesField(valueType.name(), (byte) 0); break; case FLOAT_32: - valField = new DocValuesField(valueType.name(), (float) 0, valueType); + valField = new FloatDocValuesField(valueType.name(), (float) 0); break; case FLOAT_64: - valField = new DocValuesField(valueType.name(), (double) 0, valueType); + valField = new DoubleDocValuesField(valueType.name(), (double) 0); break; default: valField = null; fail("unhandled case"); } } else { - valField = new DocValuesField(valueType.name(), new BytesRef(), valueType); + switch (valueType) { + case BYTES_FIXED_STRAIGHT: + valField = new StraightBytesDocValuesField(valueType.name(), new BytesRef(), true); + break; + case BYTES_VAR_STRAIGHT: + valField = new StraightBytesDocValuesField(valueType.name(), new BytesRef(), false); + break; + case BYTES_FIXED_DEREF: + valField = new DerefBytesDocValuesField(valueType.name(), new BytesRef(), true); + break; + case BYTES_VAR_DEREF: + valField = new DerefBytesDocValuesField(valueType.name(), new BytesRef(), false); + break; + case BYTES_FIXED_SORTED: + valField = new SortedBytesDocValuesField(valueType.name(), new BytesRef(), true); + break; + case BYTES_VAR_SORTED: + valField = new SortedBytesDocValuesField(valueType.name(), new BytesRef(), false); + break; + default: + valField = null; + fail("unhandled case"); + } } doc.add(valField); final BytesRef bytesRef = new BytesRef(); @@ -616,7 +641,7 @@ public class TestDocValuesIndexing extends LuceneTestCase { valField.setLongValue((long)i); break; case FIXED_INTS_16: - valField.setIntValue((short)i); + valField.setShortValue((short)i); break; case FIXED_INTS_32: valField.setIntValue(i); @@ -625,7 +650,7 @@ public class TestDocValuesIndexing extends LuceneTestCase { valField.setLongValue((long)i); break; case FIXED_INTS_8: - valField.setIntValue((byte)(0xFF & (i % 128))); + valField.setByteValue((byte)(0xFF & (i % 128))); break; case FLOAT_32: valField.setFloatValue(2.0f * i); @@ -677,7 +702,7 @@ public class TestDocValuesIndexing extends LuceneTestCase { Directory d = newDirectory(); RandomIndexWriter w = new RandomIndexWriter(random(), d); Document doc = new Document(); - DocValuesField f = new DocValuesField("field", 17, Type.VAR_INTS); + Field f = new PackedLongDocValuesField("field", 17); // Index doc values are single-valued so we should not // be able to add same field more than once: doc.add(f); @@ -707,8 +732,8 @@ public class TestDocValuesIndexing extends LuceneTestCase { // Index doc values are single-valued so we should not // be able to add same field more than once: Field f; - doc.add(f = new DocValuesField("field", 17, Type.VAR_INTS)); - doc.add(new DocValuesField("field", 22.0, Type.FLOAT_32)); + doc.add(f = new PackedLongDocValuesField("field", 17)); + doc.add(new FloatDocValuesField("field", 22.0f)); try { w.addDocument(doc); fail("didn't hit expected exception"); @@ -742,10 +767,10 @@ public class TestDocValuesIndexing extends LuceneTestCase { for (int i = 0; i < numDocs; i++) { Document doc = new Document(); doc.add(newField("id", "" + i, TextField.TYPE_STORED)); - String string =fixed ? _TestUtil.randomFixedByteLengthUnicodeString(random(), + String string = fixed ? _TestUtil.randomFixedByteLengthUnicodeString(random(), len) : _TestUtil.randomRealisticUnicodeString(random(), 1, len); BytesRef br = new BytesRef(string); - doc.add(new DocValuesField("field", br, type)); + doc.add(new SortedBytesDocValuesField("field", br, type == Type.BYTES_FIXED_SORTED)); hash.add(br); docToString.put("" + i, string); w.addDocument(doc); @@ -775,7 +800,7 @@ public class TestDocValuesIndexing extends LuceneTestCase { BytesRef br = new BytesRef(string); hash.add(br); docToString.put(id, string); - doc.add( new DocValuesField("field", br, type)); + doc.add(new SortedBytesDocValuesField("field", br, type == Type.BYTES_FIXED_SORTED)); w.addDocument(doc); } w.commit(); @@ -853,8 +878,8 @@ public class TestDocValuesIndexing extends LuceneTestCase { } final Document doc = new Document(); - doc.add(new DocValuesField("stringdv", br, DocValues.Type.BYTES_VAR_SORTED)); - doc.add(new DocValuesField("id", numDocs, DocValues.Type.VAR_INTS)); + doc.add(new SortedBytesDocValuesField("stringdv", br)); + doc.add(new PackedLongDocValuesField("id", numDocs)); docValues.add(br); writer.addDocument(doc); numDocs++; @@ -935,7 +960,7 @@ public class TestDocValuesIndexing extends LuceneTestCase { BytesRef b = new BytesRef(); b.bytes = bytes; b.length = bytes.length; - doc.add(new DocValuesField("field", b, DocValues.Type.BYTES_VAR_DEREF)); + doc.add(new DerefBytesDocValuesField("field", b)); w.addDocument(doc); bytes[0] = 1; w.addDocument(doc); diff --git a/lucene/core/src/test/org/apache/lucene/index/TestTypePromotion.java b/lucene/core/src/test/org/apache/lucene/index/TestTypePromotion.java index 3137b95bc87..f92d489b2c6 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestTypePromotion.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestTypePromotion.java @@ -23,9 +23,18 @@ import java.util.Random; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.codecs.Codec; +import org.apache.lucene.document.ByteDocValuesField; +import org.apache.lucene.document.DerefBytesDocValuesField; import org.apache.lucene.document.Document; +import org.apache.lucene.document.DoubleDocValuesField; import org.apache.lucene.document.Field; -import org.apache.lucene.document.DocValuesField; +import org.apache.lucene.document.FloatDocValuesField; +import org.apache.lucene.document.IntDocValuesField; +import org.apache.lucene.document.LongDocValuesField; +import org.apache.lucene.document.PackedLongDocValuesField; +import org.apache.lucene.document.ShortDocValuesField; +import org.apache.lucene.document.SortedBytesDocValuesField; +import org.apache.lucene.document.StraightBytesDocValuesField; import org.apache.lucene.document.TextField; import org.apache.lucene.index.DocValues.Source; import org.apache.lucene.index.DocValues.Type; @@ -155,7 +164,7 @@ public class TestTypePromotion extends LuceneTestCase { assertEquals(msg + " byteSize: " + bytes.length, values[id], value); break; case Float: - assertEquals(msg, values[id], Double.doubleToRawLongBits(directSource.getFloat(i))); + assertEquals(msg, values[id], Double.doubleToRawLongBits(directSource.getFloat(i))); break; case Int: assertEquals(msg, values[id], directSource.getInt(i)); @@ -172,54 +181,69 @@ public class TestTypePromotion extends LuceneTestCase { public void index(IndexWriter writer, Type valueType, long[] values, int offset, int num) throws CorruptIndexException, IOException { - final DocValuesField valField; - switch (valueType) { - case FIXED_INTS_8: - valField = new DocValuesField("promote", (byte) 0, valueType); - break; - case FIXED_INTS_16: - valField = new DocValuesField("promote", (short) 0, valueType); - break; - case FIXED_INTS_32: - valField = new DocValuesField("promote", 0, valueType); - break; - case VAR_INTS: - valField = new DocValuesField("promote", 0L, valueType); - break; - case FIXED_INTS_64: - valField = new DocValuesField("promote", (long) 0, valueType); - break; - case FLOAT_64: - valField = new DocValuesField("promote", (double) 0, valueType); - break; - case FLOAT_32: - valField = new DocValuesField("promote", (float) 0, valueType); - break; - case BYTES_FIXED_DEREF: - case BYTES_FIXED_SORTED: - case BYTES_FIXED_STRAIGHT: - case BYTES_VAR_DEREF: - case BYTES_VAR_SORTED: - case BYTES_VAR_STRAIGHT: - valField = new DocValuesField("promote", new BytesRef(), valueType); - break; - default: - fail("unexpected value " + valueType); - valField = null; + final Field valField; + + if (VERBOSE) { + System.out.println("TEST: add docs " + offset + "-" + (offset+num) + " valType=" + valueType); + } + + switch(valueType) { + case VAR_INTS: + valField = new PackedLongDocValuesField("promote", (long) 0); + break; + case FIXED_INTS_8: + valField = new ByteDocValuesField("promote", (byte) 0); + break; + case FIXED_INTS_16: + valField = new ShortDocValuesField("promote", (short) 0); + break; + case FIXED_INTS_32: + valField = new IntDocValuesField("promote", 0); + break; + case FIXED_INTS_64: + valField = new LongDocValuesField("promote", (byte) 0); + break; + case FLOAT_32: + valField = new FloatDocValuesField("promote", 0f); + break; + case FLOAT_64: + valField = new DoubleDocValuesField("promote", 0d); + break; + case BYTES_FIXED_STRAIGHT: + valField = new StraightBytesDocValuesField("promote", new BytesRef(), true); + break; + case BYTES_VAR_STRAIGHT: + valField = new StraightBytesDocValuesField("promote", new BytesRef(), false); + break; + case BYTES_FIXED_DEREF: + valField = new DerefBytesDocValuesField("promote", new BytesRef(), true); + break; + case BYTES_VAR_DEREF: + valField = new DerefBytesDocValuesField("promote", new BytesRef(), false); + break; + case BYTES_FIXED_SORTED: + valField = new SortedBytesDocValuesField("promote", new BytesRef(), true); + break; + case BYTES_VAR_SORTED: + valField = new SortedBytesDocValuesField("promote", new BytesRef(), false); + break; + default: + throw new IllegalStateException("unknown Type: " + valueType); } - BytesRef ref = new BytesRef(new byte[] { 1, 2, 3, 4 }); for (int i = offset; i < offset + num; i++) { Document doc = new Document(); doc.add(new Field("id", i + "", TextField.TYPE_STORED)); switch (valueType) { case VAR_INTS: + // TODO: can we do nextLong()? values[i] = random().nextInt(); valField.setLongValue(values[i]); break; case FIXED_INTS_16: + // TODO: negatives too? values[i] = random().nextInt(Short.MAX_VALUE); - valField.setIntValue((short) values[i]); + valField.setShortValue((short) values[i]); break; case FIXED_INTS_32: values[i] = random().nextInt(); @@ -230,7 +254,7 @@ public class TestTypePromotion extends LuceneTestCase { valField.setLongValue(values[i]); break; case FLOAT_64: - double nextDouble = random().nextDouble(); + final double nextDouble = random().nextDouble(); values[i] = Double.doubleToRawLongBits(nextDouble); valField.setDoubleValue(nextDouble); break; @@ -241,7 +265,7 @@ public class TestTypePromotion extends LuceneTestCase { break; case FIXED_INTS_8: values[i] = (byte) i; - valField.setIntValue((byte)values[i]); + valField.setByteValue((byte)values[i]); break; case BYTES_FIXED_DEREF: case BYTES_FIXED_SORTED: @@ -273,6 +297,9 @@ public class TestTypePromotion extends LuceneTestCase { default: fail("unexpected value " + valueType); } + if (VERBOSE) { + System.out.println(" doc " + i + " has val=" + valField); + } doc.add(valField); writer.addDocument(doc); if (random().nextInt(10) == 0) { @@ -289,7 +316,7 @@ public class TestTypePromotion extends LuceneTestCase { runTest(SORTED_BYTES, TestType.Byte); } - public void testPromotInteger() throws IOException { + public void testPromoteInteger() throws IOException { runTest(INTEGERS, TestType.Int); } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestDocValuesScoring.java b/lucene/core/src/test/org/apache/lucene/search/TestDocValuesScoring.java index 70d5dfff620..2aa4b1a7df1 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestDocValuesScoring.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestDocValuesScoring.java @@ -20,13 +20,12 @@ package org.apache.lucene.search; import java.io.IOException; import org.apache.lucene.codecs.Codec; -import org.apache.lucene.document.DocValuesField; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.document.FloatDocValuesField; import org.apache.lucene.document.TextField; import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.DocValues.Source; -import org.apache.lucene.index.DocValues; import org.apache.lucene.index.FieldInvertState; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Norm; @@ -56,7 +55,7 @@ public class TestDocValuesScoring extends LuceneTestCase { Document doc = new Document(); Field field = newField("foo", "", TextField.TYPE_UNSTORED); doc.add(field); - DocValuesField dvField = new DocValuesField("foo_boost", 0.0f, DocValues.Type.FLOAT_32); + Field dvField = new FloatDocValuesField("foo_boost", 0.0f); doc.add(dvField); Field field2 = newField("bar", "", TextField.TYPE_UNSTORED); doc.add(field2); diff --git a/lucene/core/src/test/org/apache/lucene/search/TestSort.java b/lucene/core/src/test/org/apache/lucene/search/TestSort.java index df469992af0..84c55af7b2a 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestSort.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestSort.java @@ -31,10 +31,15 @@ import java.util.concurrent.TimeUnit; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.codecs.Codec; -import org.apache.lucene.document.DocValuesField; +import org.apache.lucene.document.DerefBytesDocValuesField; import org.apache.lucene.document.Document; +import org.apache.lucene.document.DoubleDocValuesField; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.FloatDocValuesField; +import org.apache.lucene.document.PackedLongDocValuesField; +import org.apache.lucene.document.SortedBytesDocValuesField; +import org.apache.lucene.document.StraightBytesDocValuesField; import org.apache.lucene.document.StringField; import org.apache.lucene.document.TextField; import org.apache.lucene.index.AtomicReaderContext; @@ -152,19 +157,40 @@ public class TestSort extends LuceneTestCase { if (data[i][2] != null) { doc.add(new StringField ("int", data[i][2])); if (supportsDocValues) { - doc.add(new DocValuesField("int", Integer.parseInt(data[i][2]), DocValues.Type.VAR_INTS)); + doc.add(new PackedLongDocValuesField("int", Integer.parseInt(data[i][2]))); } } if (data[i][3] != null) { doc.add(new StringField ("float", data[i][3])); if (supportsDocValues) { - doc.add(new DocValuesField("float", Float.parseFloat(data[i][3]), DocValues.Type.FLOAT_32)); + doc.add(new FloatDocValuesField("float", Float.parseFloat(data[i][3]))); } } if (data[i][4] != null) { doc.add(new StringField ("string", data[i][4])); if (supportsDocValues) { - doc.add(new DocValuesField("string", new BytesRef(data[i][4]), stringDVType)); + switch(stringDVType) { + case BYTES_FIXED_SORTED: + doc.add(new SortedBytesDocValuesField("string", new BytesRef(data[i][4]), true)); + break; + case BYTES_VAR_SORTED: + doc.add(new SortedBytesDocValuesField("string", new BytesRef(data[i][4]), false)); + break; + case BYTES_FIXED_STRAIGHT: + doc.add(new StraightBytesDocValuesField("string", new BytesRef(data[i][4]), true)); + break; + case BYTES_VAR_STRAIGHT: + doc.add(new StraightBytesDocValuesField("string", new BytesRef(data[i][4]), false)); + break; + case BYTES_FIXED_DEREF: + doc.add(new DerefBytesDocValuesField("string", new BytesRef(data[i][4]), true)); + break; + case BYTES_VAR_DEREF: + doc.add(new DerefBytesDocValuesField("string", new BytesRef(data[i][4]), false)); + break; + default: + throw new IllegalStateException("unknown type " + stringDVType); + } } } if (data[i][5] != null) doc.add (new StringField ("custom", data[i][5])); @@ -173,7 +199,7 @@ public class TestSort extends LuceneTestCase { if (data[i][8] != null) { doc.add(new StringField ("double", data[i][8])); if (supportsDocValues) { - doc.add(new DocValuesField("double", Double.parseDouble(data[i][8]), DocValues.Type.FLOAT_64)); + doc.add(new DoubleDocValuesField("double", Double.parseDouble(data[i][8]))); } } if (data[i][9] != null) doc.add (new StringField ("short", data[i][9])); @@ -220,12 +246,12 @@ public class TestSort extends LuceneTestCase { //doc.add (new Field ("contents", Integer.toString(i), Field.Store.NO, Field.Index.ANALYZED)); doc.add(new StringField("string", num)); if (supportsDocValues) { - doc.add(new DocValuesField("string", new BytesRef(num), DocValues.Type.BYTES_VAR_SORTED)); + doc.add(new SortedBytesDocValuesField("string", new BytesRef(num))); } String num2 = getRandomCharString(getRandomNumber(1, 4), 48, 50); doc.add(new StringField ("string2", num2)); if (supportsDocValues) { - doc.add(new DocValuesField("string2", new BytesRef(num2), DocValues.Type.BYTES_VAR_SORTED)); + doc.add(new SortedBytesDocValuesField("string2", new BytesRef(num2))); } doc.add (new Field ("tracer2", num2, onlyStored)); for(IndexableField f2 : doc.getFields()) { @@ -239,12 +265,12 @@ public class TestSort extends LuceneTestCase { //doc.add (new Field ("contents", Integer.toString(i), Field.Store.NO, Field.Index.ANALYZED)); doc.add(new StringField("string_fixed", numFixed)); if (supportsDocValues) { - doc.add(new DocValuesField("string_fixed", new BytesRef(numFixed), DocValues.Type.BYTES_FIXED_SORTED)); + doc.add(new SortedBytesDocValuesField("string_fixed", new BytesRef(numFixed), true)); } String num2Fixed = getRandomCharString(fixedLen2, 48, 52); doc.add(new StringField ("string2_fixed", num2Fixed)); if (supportsDocValues) { - doc.add(new DocValuesField("string2_fixed", new BytesRef(num2Fixed), DocValues.Type.BYTES_FIXED_SORTED)); + doc.add(new SortedBytesDocValuesField("string2_fixed", new BytesRef(num2Fixed), true)); } doc.add (new Field ("tracer2_fixed", num2Fixed, onlyStored)); @@ -1371,9 +1397,9 @@ public class TestSort extends LuceneTestCase { } final Document doc = new Document(); - doc.add(new DocValuesField("stringdv", br, DocValues.Type.BYTES_VAR_SORTED)); + doc.add(new SortedBytesDocValuesField("stringdv", br)); doc.add(newField("string", s, StringField.TYPE_UNSTORED)); - doc.add(new DocValuesField("id", numDocs, DocValues.Type.VAR_INTS)); + doc.add(new PackedLongDocValuesField("id", numDocs)); docValues.add(br); writer.addDocument(doc); numDocs++; diff --git a/lucene/grouping/src/java/org/apache/lucene/search/grouping/GroupingSearch.java b/lucene/grouping/src/java/org/apache/lucene/search/grouping/GroupingSearch.java index 07162d91464..222bbf44f27 100644 --- a/lucene/grouping/src/java/org/apache/lucene/search/grouping/GroupingSearch.java +++ b/lucene/grouping/src/java/org/apache/lucene/search/grouping/GroupingSearch.java @@ -17,7 +17,10 @@ package org.apache.lucene.search.grouping; * limitations under the License. */ -import org.apache.lucene.document.DocValuesField; +import java.io.IOException; +import java.util.*; + +import org.apache.lucene.document.DerefBytesDocValuesField; import org.apache.lucene.index.DocValues; import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.search.*; @@ -37,9 +40,6 @@ import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.mutable.MutableValue; -import java.io.IOException; -import java.util.*; - /** * Convenience class to perform grouping in a non distributed environment. * @@ -85,7 +85,8 @@ public class GroupingSearch { /** * Constructs a GroupingSearch instance that groups documents by doc values. - * This constructor can only be used when the groupField is a {@link DocValuesField}. + * This constructor can only be used when the groupField + * is a *DocValuesField (eg, {@link DerefBytesDocValuesField}. * * @param groupField The name of the field to group by that contains doc values * @param docValuesType The doc values type of the specified groupField diff --git a/lucene/grouping/src/test/org/apache/lucene/search/grouping/AllGroupHeadsCollectorTest.java b/lucene/grouping/src/test/org/apache/lucene/search/grouping/AllGroupHeadsCollectorTest.java index e94a84f0434..e4c04e2435f 100644 --- a/lucene/grouping/src/test/org/apache/lucene/search/grouping/AllGroupHeadsCollectorTest.java +++ b/lucene/grouping/src/test/org/apache/lucene/search/grouping/AllGroupHeadsCollectorTest.java @@ -17,14 +17,17 @@ package org.apache.lucene.search.grouping; * limitations under the License. */ +import java.io.IOException; +import java.util.*; + import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.*; import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.DocValues.Type; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.SlowCompositeReaderWrapper; import org.apache.lucene.index.Term; -import org.apache.lucene.index.DocValues.Type; import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.valuesource.BytesRefFieldSource; import org.apache.lucene.search.*; @@ -37,9 +40,6 @@ import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util._TestUtil; -import java.io.IOException; -import java.util.*; - public class AllGroupHeadsCollectorTest extends LuceneTestCase { private static final Type[] vts = new Type[]{ @@ -210,9 +210,21 @@ public class AllGroupHeadsCollectorTest extends LuceneTestCase { Document docNoGroup = new Document(); Field group = newField("group", "", StringField.TYPE_UNSTORED); doc.add(group); - DocValuesField valuesField = null; + Field valuesField = null; if (canUseIDV) { - valuesField = new DocValuesField("group", new BytesRef(), valueType); + switch(valueType) { + case BYTES_VAR_DEREF: + valuesField = new DerefBytesDocValuesField("group", new BytesRef()); + break; + case BYTES_VAR_STRAIGHT: + valuesField = new StraightBytesDocValuesField("group", new BytesRef()); + break; + case BYTES_VAR_SORTED: + valuesField = new SortedBytesDocValuesField("group", new BytesRef()); + break; + default: + fail("unhandled type"); + } doc.add(valuesField); } Field sort1 = newField("sort1", "", StringField.TYPE_UNSTORED); @@ -529,7 +541,21 @@ public class AllGroupHeadsCollectorTest extends LuceneTestCase { private void addGroupField(Document doc, String groupField, String value, boolean canUseIDV, Type valueType) { doc.add(new Field(groupField, value, TextField.TYPE_STORED)); if (canUseIDV) { - doc.add(new DocValuesField(groupField, new BytesRef(value), valueType)); + Field valuesField = null; + switch(valueType) { + case BYTES_VAR_DEREF: + valuesField = new DerefBytesDocValuesField(groupField, new BytesRef(value)); + break; + case BYTES_VAR_STRAIGHT: + valuesField = new StraightBytesDocValuesField(groupField, new BytesRef(value)); + break; + case BYTES_VAR_SORTED: + valuesField = new SortedBytesDocValuesField(groupField, new BytesRef(value)); + break; + default: + fail("unhandled type"); + } + doc.add(valuesField); } } diff --git a/lucene/grouping/src/test/org/apache/lucene/search/grouping/AllGroupsCollectorTest.java b/lucene/grouping/src/test/org/apache/lucene/search/grouping/AllGroupsCollectorTest.java index 27a8f68a311..644b51a43a8 100644 --- a/lucene/grouping/src/test/org/apache/lucene/search/grouping/AllGroupsCollectorTest.java +++ b/lucene/grouping/src/test/org/apache/lucene/search/grouping/AllGroupsCollectorTest.java @@ -123,7 +123,7 @@ public class AllGroupsCollectorTest extends LuceneTestCase { private void addGroupField(Document doc, String groupField, String value, boolean canUseIDV) { doc.add(new Field(groupField, value, TextField.TYPE_STORED)); if (canUseIDV) { - doc.add(new DocValuesField(groupField, new BytesRef(value), Type.BYTES_VAR_SORTED)); + doc.add(new SortedBytesDocValuesField(groupField, new BytesRef(value))); } } diff --git a/lucene/grouping/src/test/org/apache/lucene/search/grouping/DistinctValuesCollectorTest.java b/lucene/grouping/src/test/org/apache/lucene/search/grouping/DistinctValuesCollectorTest.java index 378ab944e93..e121ac1b716 100644 --- a/lucene/grouping/src/test/org/apache/lucene/search/grouping/DistinctValuesCollectorTest.java +++ b/lucene/grouping/src/test/org/apache/lucene/search/grouping/DistinctValuesCollectorTest.java @@ -17,6 +17,9 @@ package org.apache.lucene.search.grouping; * limitations under the License. */ +import java.io.IOException; +import java.util.*; + import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.*; import org.apache.lucene.index.*; @@ -37,9 +40,6 @@ import org.apache.lucene.util._TestUtil; import org.apache.lucene.util.mutable.MutableValue; import org.apache.lucene.util.mutable.MutableValueStr; -import java.io.IOException; -import java.util.*; - public class DistinctValuesCollectorTest extends AbstractGroupingTestCase { private final static NullComparator nullComparator = new NullComparator(); @@ -321,17 +321,19 @@ public class DistinctValuesCollectorTest extends AbstractGroupingTestCase { return; } - DocValuesField valuesField = null; + Field valuesField = null; switch (type) { case VAR_INTS: - valuesField = new DocValuesField(field, Integer.parseInt(value), type); + valuesField = new PackedLongDocValuesField(field, Integer.parseInt(value)); break; case FLOAT_64: - valuesField = new DocValuesField(field, Double.parseDouble(value), type); + valuesField = new DoubleDocValuesField(field, Double.parseDouble(value)); break; case BYTES_VAR_STRAIGHT: + valuesField = new StraightBytesDocValuesField(field, new BytesRef(value)); + break; case BYTES_VAR_SORTED: - valuesField = new DocValuesField(field, new BytesRef(value), type); + valuesField = new SortedBytesDocValuesField(field, new BytesRef(value)); break; } doc.add(valuesField); diff --git a/lucene/grouping/src/test/org/apache/lucene/search/grouping/GroupFacetCollectorTest.java b/lucene/grouping/src/test/org/apache/lucene/search/grouping/GroupFacetCollectorTest.java index 0209c1f8fbc..790ec05c669 100644 --- a/lucene/grouping/src/test/org/apache/lucene/search/grouping/GroupFacetCollectorTest.java +++ b/lucene/grouping/src/test/org/apache/lucene/search/grouping/GroupFacetCollectorTest.java @@ -220,7 +220,7 @@ public class GroupFacetCollectorTest extends AbstractGroupingTestCase { private void addField(Document doc, String field, String value, boolean canUseIDV) { doc.add(new Field(field, value, StringField.TYPE_UNSTORED)); if (canUseIDV) { - doc.add(new DocValuesField(field, new BytesRef(value), DocValues.Type.BYTES_VAR_SORTED)); + doc.add(new SortedBytesDocValuesField(field, new BytesRef(value))); } } @@ -368,7 +368,7 @@ public class GroupFacetCollectorTest extends AbstractGroupingTestCase { Document docNoFacet = new Document(); Document docNoGroupNoFacet = new Document(); Field group = newField("group", "", StringField.TYPE_UNSTORED); - DocValuesField groupDc = new DocValuesField("group", new BytesRef(), DocValues.Type.BYTES_VAR_SORTED); + Field groupDc = new SortedBytesDocValuesField("group", new BytesRef()); if (useDv) { doc.add(groupDc); docNoFacet.add(groupDc); @@ -381,7 +381,7 @@ public class GroupFacetCollectorTest extends AbstractGroupingTestCase { facetFields[0] = newField("facet", "", StringField.TYPE_UNSTORED); doc.add(facetFields[0]); docNoGroup.add(facetFields[0]); - facetFields[1] = new DocValuesField("facet", new BytesRef(), DocValues.Type.BYTES_VAR_SORTED); + facetFields[1] = new SortedBytesDocValuesField("facet", new BytesRef()); doc.add(facetFields[1]); docNoGroup.add(facetFields[1]); } else { diff --git a/lucene/grouping/src/test/org/apache/lucene/search/grouping/GroupingSearchTest.java b/lucene/grouping/src/test/org/apache/lucene/search/grouping/GroupingSearchTest.java index 9058bf89893..b98aff6d4fd 100644 --- a/lucene/grouping/src/test/org/apache/lucene/search/grouping/GroupingSearchTest.java +++ b/lucene/grouping/src/test/org/apache/lucene/search/grouping/GroupingSearchTest.java @@ -168,7 +168,7 @@ public class GroupingSearchTest extends LuceneTestCase { private void addGroupField(Document doc, String groupField, String value, boolean canUseIDV) { doc.add(new Field(groupField, value, TextField.TYPE_STORED)); if (canUseIDV) { - doc.add(new DocValuesField(groupField, new BytesRef(value), DocValues.Type.BYTES_VAR_SORTED)); + doc.add(new SortedBytesDocValuesField(groupField, new BytesRef(value))); } } diff --git a/lucene/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java b/lucene/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java index a54602c2ea8..762b241f898 100644 --- a/lucene/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java +++ b/lucene/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java @@ -175,7 +175,7 @@ public class TestGrouping extends LuceneTestCase { private void addGroupField(Document doc, String groupField, String value, boolean canUseIDV) { doc.add(new Field(groupField, value, TextField.TYPE_STORED)); if (canUseIDV) { - doc.add(new DocValuesField(groupField, new BytesRef(value), Type.BYTES_VAR_SORTED)); + doc.add(new SortedBytesDocValuesField(groupField, new BytesRef(value))); } } @@ -706,7 +706,7 @@ public class TestGrouping extends LuceneTestCase { Document doc = new Document(); Document docNoGroup = new Document(); - DocValuesField idvGroupField = new DocValuesField("group", new BytesRef(), Type.BYTES_VAR_SORTED); + Field idvGroupField = new SortedBytesDocValuesField("group", new BytesRef()); if (canUseIDV) { doc.add(idvGroupField); } diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/RandomIndexWriter.java b/lucene/test-framework/src/java/org/apache/lucene/index/RandomIndexWriter.java index be3bf89e807..5220d4db74d 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/index/RandomIndexWriter.java +++ b/lucene/test-framework/src/java/org/apache/lucene/index/RandomIndexWriter.java @@ -25,9 +25,18 @@ import java.util.Random; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.codecs.Codec; -import org.apache.lucene.document.DocValuesField; +import org.apache.lucene.document.ByteDocValuesField; +import org.apache.lucene.document.DerefBytesDocValuesField; import org.apache.lucene.document.Document; +import org.apache.lucene.document.DoubleDocValuesField; import org.apache.lucene.document.Field; +import org.apache.lucene.document.FloatDocValuesField; +import org.apache.lucene.document.IntDocValuesField; +import org.apache.lucene.document.LongDocValuesField; +import org.apache.lucene.document.PackedLongDocValuesField; +import org.apache.lucene.document.ShortDocValuesField; +import org.apache.lucene.document.SortedBytesDocValuesField; +import org.apache.lucene.document.StraightBytesDocValuesField; import org.apache.lucene.index.IndexWriter; // javadoc import org.apache.lucene.search.Query; import org.apache.lucene.store.Directory; @@ -138,7 +147,7 @@ public class RandomIndexWriter implements Closeable { public void addDocument(final Iterable doc, Analyzer a) throws IOException { if (doDocValues && doc instanceof Document) { - randomPerDocFieldValues(r, (Document) doc); + randomPerDocFieldValues((Document) doc); } if (r.nextInt(5) == 3) { // TODO: maybe, we should simply buffer up added docs @@ -179,11 +188,23 @@ public class RandomIndexWriter implements Closeable { maybeCommit(); } + + private BytesRef getFixedRandomBytes() { + final String randomUnicodeString = _TestUtil.randomFixedByteLengthUnicodeString(r, fixedBytesLength); + BytesRef fixedRef = new BytesRef(randomUnicodeString); + if (fixedRef.length > fixedBytesLength) { + fixedRef = new BytesRef(fixedRef.bytes, 0, fixedBytesLength); + } else { + fixedRef.grow(fixedBytesLength); + fixedRef.length = fixedBytesLength; + } + return fixedRef; + } - private void randomPerDocFieldValues(Random random, Document doc) { + private void randomPerDocFieldValues(Document doc) { DocValues.Type[] values = DocValues.Type.values(); - DocValues.Type type = values[random.nextInt(values.length)]; + DocValues.Type type = values[r.nextInt(values.length)]; String name = "random_" + type.name() + "" + docValuesFieldPrefix; if ("Lucene3x".equals(codec.getName()) || doc.getField(name) != null) { return; @@ -191,44 +212,45 @@ public class RandomIndexWriter implements Closeable { final Field f; switch (type) { case BYTES_FIXED_DEREF: - case BYTES_FIXED_STRAIGHT: - case BYTES_FIXED_SORTED: - //make sure we use a valid unicode string with a fixed size byte length - final String randomUnicodeString = _TestUtil.randomFixedByteLengthUnicodeString(random, fixedBytesLength); - BytesRef fixedRef = new BytesRef(randomUnicodeString); - if (fixedRef.length > fixedBytesLength) { - fixedRef = new BytesRef(fixedRef.bytes, 0, fixedBytesLength); - } else { - fixedRef.grow(fixedBytesLength); - fixedRef.length = fixedBytesLength; - } - f = new DocValuesField(name, fixedRef, type); + f = new DerefBytesDocValuesField(name, getFixedRandomBytes(), true); break; case BYTES_VAR_DEREF: + f = new DerefBytesDocValuesField(name, new BytesRef(_TestUtil.randomUnicodeString(r, 20)), false); + break; + case BYTES_FIXED_STRAIGHT: + f = new StraightBytesDocValuesField(name, getFixedRandomBytes(), true); + break; case BYTES_VAR_STRAIGHT: + f = new StraightBytesDocValuesField(name, new BytesRef(_TestUtil.randomUnicodeString(r, 20)), false); + break; + case BYTES_FIXED_SORTED: + f = new SortedBytesDocValuesField(name, getFixedRandomBytes(), true); + break; case BYTES_VAR_SORTED: - f = new DocValuesField(name, new BytesRef(_TestUtil.randomUnicodeString(random, 20)), type); + f = new SortedBytesDocValuesField(name, new BytesRef(_TestUtil.randomUnicodeString(r, 20)), false); break; case FLOAT_32: - f = new DocValuesField(name, random.nextFloat(), type); + f = new FloatDocValuesField(name, r.nextFloat()); break; case FLOAT_64: - f = new DocValuesField(name, random.nextDouble(), type); + f = new DoubleDocValuesField(name, r.nextDouble()); break; case VAR_INTS: - f = new DocValuesField(name, random.nextLong(), type); + f = new PackedLongDocValuesField(name, r.nextLong()); break; case FIXED_INTS_16: - f = new DocValuesField(name, random.nextInt(Short.MAX_VALUE), type); + // TODO: we should test negatives too? + f = new ShortDocValuesField(name, (short) r.nextInt(Short.MAX_VALUE)); break; case FIXED_INTS_32: - f = new DocValuesField(name, random.nextInt(), type); + f = new IntDocValuesField(name, r.nextInt()); break; case FIXED_INTS_64: - f = new DocValuesField(name, random.nextLong(), type); + f = new LongDocValuesField(name, r.nextLong()); break; - case FIXED_INTS_8: - f = new DocValuesField(name, random.nextInt(128), type); + case FIXED_INTS_8: + // TODO: we should test negatives too? + f = new ByteDocValuesField(name, (byte) r.nextInt(128)); break; default: throw new IllegalArgumentException("no such type: " + type); @@ -267,7 +289,7 @@ public class RandomIndexWriter implements Closeable { */ public void updateDocument(Term t, final Iterable doc) throws IOException { if (doDocValues) { - randomPerDocFieldValues(r, (Document) doc); + randomPerDocFieldValues((Document) doc); } if (r.nextInt(5) == 3) { w.updateDocuments(t, new Iterable>() { diff --git a/lucene/test-framework/src/java/org/apache/lucene/util/LineFileDocs.java b/lucene/test-framework/src/java/org/apache/lucene/util/LineFileDocs.java index 2373e4bd4e5..3f511e06db8 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/util/LineFileDocs.java +++ b/lucene/test-framework/src/java/org/apache/lucene/util/LineFileDocs.java @@ -28,13 +28,12 @@ import java.util.Random; import java.util.concurrent.atomic.AtomicInteger; import java.util.zip.GZIPInputStream; -import org.apache.lucene.document.DocValuesField; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.SortedBytesDocValuesField; import org.apache.lucene.document.StringField; import org.apache.lucene.document.TextField; -import org.apache.lucene.index.DocValues; /** Minimal port of benchmark's LneDocSource + * DocMaker, so tests can enum docs from a line file created @@ -150,7 +149,7 @@ public class LineFileDocs implements Closeable { doc.add(date); if (useDocValues) { - titleDV = new DocValuesField("titleDV", new BytesRef(), DocValues.Type.BYTES_VAR_SORTED); + titleDV = new SortedBytesDocValuesField("titleDV", new BytesRef()); doc.add(titleDV); } else { titleDV = null; diff --git a/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java b/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java index fb2fee81230..28bfc7b31d6 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java +++ b/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java @@ -35,9 +35,18 @@ import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.codecs.lucene40.Lucene40Codec; import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat; -import org.apache.lucene.document.DocValuesField; +import org.apache.lucene.document.ByteDocValuesField; +import org.apache.lucene.document.DerefBytesDocValuesField; import org.apache.lucene.document.Document; +import org.apache.lucene.document.DoubleDocValuesField; import org.apache.lucene.document.Field; +import org.apache.lucene.document.FloatDocValuesField; +import org.apache.lucene.document.IntDocValuesField; +import org.apache.lucene.document.LongDocValuesField; +import org.apache.lucene.document.PackedLongDocValuesField; +import org.apache.lucene.document.ShortDocValuesField; +import org.apache.lucene.document.SortedBytesDocValuesField; +import org.apache.lucene.document.StraightBytesDocValuesField; import org.apache.lucene.index.CheckIndex; import org.apache.lucene.index.ConcurrentMergeScheduler; import org.apache.lucene.index.DocValues; @@ -750,30 +759,50 @@ public class _TestUtil { for(IndexableField f : doc1) { final Field field1 = (Field) f; final Field field2; - if (field1 instanceof DocValuesField) { - final DocValues.Type dvType = field1.fieldType().docValueType(); - switch (dvType) { + final DocValues.Type dvType = field1.fieldType().docValueType(); + if (dvType != null) { + switch(dvType) { case VAR_INTS: - case FIXED_INTS_8: - case FIXED_INTS_16: - case FIXED_INTS_32: - case FIXED_INTS_64: - field2 = new DocValuesField(field1.name(), field1.numericValue().intValue(), dvType); + field2 = new PackedLongDocValuesField(field1.name(), field1.numericValue().longValue()); break; - case BYTES_FIXED_DEREF: - case BYTES_FIXED_STRAIGHT: - case BYTES_VAR_DEREF: - case BYTES_VAR_STRAIGHT: - case BYTES_FIXED_SORTED: - case BYTES_VAR_SORTED: - field2 = new DocValuesField(field1.name(), BytesRef.deepCopyOf(field1.binaryValue()), dvType); + case FIXED_INTS_8: + field2 = new ByteDocValuesField(field1.name(), field1.numericValue().byteValue()); + break; + case FIXED_INTS_16: + field2 = new ShortDocValuesField(field1.name(), field1.numericValue().shortValue()); + break; + case FIXED_INTS_32: + field2 = new IntDocValuesField(field1.name(), field1.numericValue().intValue()); + break; + case FIXED_INTS_64: + field2 = new LongDocValuesField(field1.name(), field1.numericValue().longValue()); break; case FLOAT_32: + field2 = new FloatDocValuesField(field1.name(), field1.numericValue().floatValue()); + break; case FLOAT_64: - field2 = new DocValuesField(field1.name(), field1.numericValue().doubleValue(), dvType); + field2 = new DoubleDocValuesField(field1.name(), field1.numericValue().doubleValue()); + break; + case BYTES_FIXED_STRAIGHT: + field2 = new StraightBytesDocValuesField(field1.name(), field1.binaryValue(), true); + break; + case BYTES_VAR_STRAIGHT: + field2 = new StraightBytesDocValuesField(field1.name(), field1.binaryValue(), false); + break; + case BYTES_FIXED_DEREF: + field2 = new DerefBytesDocValuesField(field1.name(), field1.binaryValue(), true); + break; + case BYTES_VAR_DEREF: + field2 = new DerefBytesDocValuesField(field1.name(), field1.binaryValue(), false); + break; + case BYTES_FIXED_SORTED: + field2 = new SortedBytesDocValuesField(field1.name(), field1.binaryValue(), true); + break; + case BYTES_VAR_SORTED: + field2 = new SortedBytesDocValuesField(field1.name(), field1.binaryValue(), false); break; default: - throw new IllegalArgumentException("don't know how to clone DV field=" + field1); + throw new IllegalStateException("unknown Type: " + dvType); } } else { field2 = new Field(field1.name(), field1.stringValue(), field1.fieldType());