From 6a8b870c58a21de42585c5ea04a017a37dd97f30 Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Tue, 19 Nov 2013 19:24:03 +0000 Subject: [PATCH] LUCENE-5339: add best-effort detection of invalid mixing of different association field types in single indexed field git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5339@1543535 13f79535-47bb-0310-9956-ffa450edef68 --- .../facet/simple/AssociationFacetField.java | 24 ++------ .../lucene/facet/simple/DocumentBuilder.java | 23 ++++++++ .../simple/FloatAssociationFacetField.java | 48 ++++++++++++++++ .../simple/IntAssociationFacetField.java | 57 +++++++++++++++++++ .../facet/simple/SimpleDrillDownQuery.java | 2 +- .../simple/SortedSetDocValuesFacetCounts.java | 2 +- .../simple/SortedSetDocValuesReaderState.java | 2 +- .../simple/TestSortedSetDocValuesFacets.java | 6 +- .../simple/TestTaxonomyFacetAssociations.java | 30 ++++++++-- 9 files changed, 164 insertions(+), 30 deletions(-) create mode 100644 lucene/facet/src/java/org/apache/lucene/facet/simple/FloatAssociationFacetField.java create mode 100644 lucene/facet/src/java/org/apache/lucene/facet/simple/IntAssociationFacetField.java diff --git a/lucene/facet/src/java/org/apache/lucene/facet/simple/AssociationFacetField.java b/lucene/facet/src/java/org/apache/lucene/facet/simple/AssociationFacetField.java index 1d27713da2c..376c09e6a08 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/simple/AssociationFacetField.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/simple/AssociationFacetField.java @@ -31,9 +31,9 @@ public class AssociationFacetField extends Field { TYPE.setIndexed(true); TYPE.freeze(); } - final String dim; - final String[] path; - final BytesRef assoc; + protected final String dim; + protected final String[] path; + protected final BytesRef assoc; public AssociationFacetField(BytesRef assoc, String dim, String... path) { super("dummy", TYPE); @@ -45,18 +45,6 @@ public class AssociationFacetField extends Field { this.path = path; } - /** Utility ctor: associates an int value (translates it - * to 4-byte BytesRef). */ - public AssociationFacetField(int assoc, String dim, String... path) { - this(intToBytesRef(assoc), dim, path); - } - - /** Utility ctor: associates a float value (translates it - * to 4-byte BytesRef). */ - public AssociationFacetField(float assoc, String dim, String... path) { - this(floatToBytesRef(assoc), dim, path); - } - private static BytesRef intToBytesRef(int v) { byte[] bytes = new byte[4]; // big-endian: @@ -67,12 +55,8 @@ public class AssociationFacetField extends Field { return new BytesRef(bytes); } - private static BytesRef floatToBytesRef(float v) { - return intToBytesRef(Float.floatToIntBits(v)); - } - @Override public String toString() { - return "FacetField(dim=" + dim + " path=" + Arrays.toString(path) + ")"; + return "AssociationFacetField(dim=" + dim + " path=" + Arrays.toString(path) + " bytes=" + assoc + ")"; } } diff --git a/lucene/facet/src/java/org/apache/lucene/facet/simple/DocumentBuilder.java b/lucene/facet/src/java/org/apache/lucene/facet/simple/DocumentBuilder.java index ce6e028e4e0..717d9c3de52 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/simple/DocumentBuilder.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/simple/DocumentBuilder.java @@ -23,6 +23,7 @@ import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; import org.apache.lucene.document.BinaryDocValuesField; import org.apache.lucene.document.Field; @@ -51,6 +52,10 @@ public class DocumentBuilder { private final TaxonomyWriter taxoWriter; private final FacetsConfig config; + // Used only for best-effort detection of app mixing + // int/float/bytes in a single indexed field: + private final Map assocDimTypes = new ConcurrentHashMap(); + public DocumentBuilder(TaxonomyWriter taxoWriter, FacetsConfig config) { this.taxoWriter = taxoWriter; this.config = config; @@ -103,6 +108,24 @@ public class DocumentBuilder { assocByField.put(indexFieldName, fields); } fields.add(facetField); + + // Best effort: detect mis-matched types in same + // indexed field: + String type; + if (facetField instanceof IntAssociationFacetField) { + type = "int"; + } else if (facetField instanceof FloatAssociationFacetField) { + type = "float"; + } else { + type = "bytes"; + } + // NOTE: not thread safe, but this is just best effort: + String curType = assocDimTypes.get(indexFieldName); + if (curType == null) { + assocDimTypes.put(indexFieldName, type); + } else if (!curType.equals(type)) { + throw new IllegalArgumentException("mixing incompatible types of AssocationFacetField (" + curType + " and " + type + ") in indexed field \"" + indexFieldName + "\"; use FacetsConfig to change the indexFieldName for each dimension"); + } } } diff --git a/lucene/facet/src/java/org/apache/lucene/facet/simple/FloatAssociationFacetField.java b/lucene/facet/src/java/org/apache/lucene/facet/simple/FloatAssociationFacetField.java new file mode 100644 index 00000000000..eb6c1c57561 --- /dev/null +++ b/lucene/facet/src/java/org/apache/lucene/facet/simple/FloatAssociationFacetField.java @@ -0,0 +1,48 @@ +package org.apache.lucene.facet.simple; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.Arrays; + +import org.apache.lucene.document.Field; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.util.BytesRef; + +/** Associates an arbitrary float with the added facet + * path, encoding the float into a 4-byte BytesRef. */ +public class FloatAssociationFacetField extends AssociationFacetField { + + /** Utility ctor: associates an int value (translates it + * to 4-byte BytesRef). */ + public FloatAssociationFacetField(float assoc, String dim, String... path) { + super(floatToBytesRef(assoc), dim, path); + } + + public static BytesRef floatToBytesRef(float v) { + return IntAssociationFacetField.intToBytesRef(Float.floatToIntBits(v)); + } + + public static float bytesRefToFloat(BytesRef b) { + return Float.intBitsToFloat(IntAssociationFacetField.bytesRefToInt(b)); + } + + @Override + public String toString() { + return "FloatAssociationFacetField(dim=" + dim + " path=" + Arrays.toString(path) + " value=" + bytesRefToFloat(assoc) + ")"; + } +} diff --git a/lucene/facet/src/java/org/apache/lucene/facet/simple/IntAssociationFacetField.java b/lucene/facet/src/java/org/apache/lucene/facet/simple/IntAssociationFacetField.java new file mode 100644 index 00000000000..f946d5ffaa5 --- /dev/null +++ b/lucene/facet/src/java/org/apache/lucene/facet/simple/IntAssociationFacetField.java @@ -0,0 +1,57 @@ +package org.apache.lucene.facet.simple; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.Arrays; + +import org.apache.lucene.document.Field; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.util.BytesRef; + +/** Associates an arbitrary int with the added facet + * path, encoding the int into a 4-byte BytesRef. */ +public class IntAssociationFacetField extends AssociationFacetField { + + /** Utility ctor: associates an int value (translates it + * to 4-byte BytesRef). */ + public IntAssociationFacetField(int assoc, String dim, String... path) { + super(intToBytesRef(assoc), dim, path); + } + + public static BytesRef intToBytesRef(int v) { + byte[] bytes = new byte[4]; + // big-endian: + bytes[0] = (byte) (v >> 24); + bytes[1] = (byte) (v >> 16); + bytes[2] = (byte) (v >> 8); + bytes[3] = (byte) v; + return new BytesRef(bytes); + } + + public static int bytesRefToInt(BytesRef b) { + return ((b.bytes[b.offset]&0xFF) << 24) | + ((b.bytes[b.offset+1]&0xFF) << 16) | + ((b.bytes[b.offset+2]&0xFF) << 8) | + (b.bytes[b.offset+3]&0xFF); + } + + @Override + public String toString() { + return "IntAssociationFacetField(dim=" + dim + " path=" + Arrays.toString(path) + " value=" + bytesRefToInt(assoc) + ")"; + } +} diff --git a/lucene/facet/src/java/org/apache/lucene/facet/simple/SimpleDrillDownQuery.java b/lucene/facet/src/java/org/apache/lucene/facet/simple/SimpleDrillDownQuery.java index 645f2b23696..7bc2d17bc4f 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/simple/SimpleDrillDownQuery.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/simple/SimpleDrillDownQuery.java @@ -53,7 +53,7 @@ import org.apache.lucene.search.TermQuery; public final class SimpleDrillDownQuery extends Query { private static Term term(String field, String dim, String[] path) { - return new Term(field, FacetDocument.pathToString(dim, path)); + return new Term(field, DocumentBuilder.pathToString(dim, path)); } private final FacetsConfig config; diff --git a/lucene/facet/src/java/org/apache/lucene/facet/simple/SortedSetDocValuesFacetCounts.java b/lucene/facet/src/java/org/apache/lucene/facet/simple/SortedSetDocValuesFacetCounts.java index e011c78a8a6..4958ccf227f 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/simple/SortedSetDocValuesFacetCounts.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/simple/SortedSetDocValuesFacetCounts.java @@ -230,7 +230,7 @@ public class SortedSetDocValuesFacetCounts extends Facets { throw new IllegalArgumentException("path must be length=1"); } - int ord = (int) dv.lookupTerm(new BytesRef(FacetDocument.pathToString(dim, path))); + int ord = (int) dv.lookupTerm(new BytesRef(DocumentBuilder.pathToString(dim, path))); if (ord < 0) { return -1; } diff --git a/lucene/facet/src/java/org/apache/lucene/facet/simple/SortedSetDocValuesReaderState.java b/lucene/facet/src/java/org/apache/lucene/facet/simple/SortedSetDocValuesReaderState.java index 03e1ebb50f3..aa752d2ada1 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/simple/SortedSetDocValuesReaderState.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/simple/SortedSetDocValuesReaderState.java @@ -110,7 +110,7 @@ public final class SortedSetDocValuesReaderState { // support arbitrary hierarchy: for(int ord=0;ord