LUCENE-5339: add best-effort detection of invalid mixing of different association field types in single indexed field

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5339@1543535 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2013-11-19 19:24:03 +00:00
parent 1accec983e
commit 6a8b870c58
9 changed files with 164 additions and 30 deletions

View File

@ -31,9 +31,9 @@ public class AssociationFacetField extends Field {
TYPE.setIndexed(true);
TYPE.freeze();
}
final String dim;
final String[] path;
final BytesRef assoc;
protected final String dim;
protected final String[] path;
protected final BytesRef assoc;
public AssociationFacetField(BytesRef assoc, String dim, String... path) {
super("dummy", TYPE);
@ -45,18 +45,6 @@ public class AssociationFacetField extends Field {
this.path = path;
}
/** Utility ctor: associates an int value (translates it
* to 4-byte BytesRef). */
public AssociationFacetField(int assoc, String dim, String... path) {
this(intToBytesRef(assoc), dim, path);
}
/** Utility ctor: associates a float value (translates it
* to 4-byte BytesRef). */
public AssociationFacetField(float assoc, String dim, String... path) {
this(floatToBytesRef(assoc), dim, path);
}
private static BytesRef intToBytesRef(int v) {
byte[] bytes = new byte[4];
// big-endian:
@ -67,12 +55,8 @@ public class AssociationFacetField extends Field {
return new BytesRef(bytes);
}
private static BytesRef floatToBytesRef(float v) {
return intToBytesRef(Float.floatToIntBits(v));
}
@Override
public String toString() {
return "FacetField(dim=" + dim + " path=" + Arrays.toString(path) + ")";
return "AssociationFacetField(dim=" + dim + " path=" + Arrays.toString(path) + " bytes=" + assoc + ")";
}
}

View File

@ -23,6 +23,7 @@ import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.document.Field;
@ -51,6 +52,10 @@ public class DocumentBuilder {
private final TaxonomyWriter taxoWriter;
private final FacetsConfig config;
// Used only for best-effort detection of app mixing
// int/float/bytes in a single indexed field:
private final Map<String,String> assocDimTypes = new ConcurrentHashMap<String,String>();
public DocumentBuilder(TaxonomyWriter taxoWriter, FacetsConfig config) {
this.taxoWriter = taxoWriter;
this.config = config;
@ -103,6 +108,24 @@ public class DocumentBuilder {
assocByField.put(indexFieldName, fields);
}
fields.add(facetField);
// Best effort: detect mis-matched types in same
// indexed field:
String type;
if (facetField instanceof IntAssociationFacetField) {
type = "int";
} else if (facetField instanceof FloatAssociationFacetField) {
type = "float";
} else {
type = "bytes";
}
// NOTE: not thread safe, but this is just best effort:
String curType = assocDimTypes.get(indexFieldName);
if (curType == null) {
assocDimTypes.put(indexFieldName, type);
} else if (!curType.equals(type)) {
throw new IllegalArgumentException("mixing incompatible types of AssocationFacetField (" + curType + " and " + type + ") in indexed field \"" + indexFieldName + "\"; use FacetsConfig to change the indexFieldName for each dimension");
}
}
}

View File

@ -0,0 +1,48 @@
package org.apache.lucene.facet.simple;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.util.Arrays;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.util.BytesRef;
/** Associates an arbitrary float with the added facet
* path, encoding the float into a 4-byte BytesRef. */
public class FloatAssociationFacetField extends AssociationFacetField {
/** Utility ctor: associates an int value (translates it
* to 4-byte BytesRef). */
public FloatAssociationFacetField(float assoc, String dim, String... path) {
super(floatToBytesRef(assoc), dim, path);
}
public static BytesRef floatToBytesRef(float v) {
return IntAssociationFacetField.intToBytesRef(Float.floatToIntBits(v));
}
public static float bytesRefToFloat(BytesRef b) {
return Float.intBitsToFloat(IntAssociationFacetField.bytesRefToInt(b));
}
@Override
public String toString() {
return "FloatAssociationFacetField(dim=" + dim + " path=" + Arrays.toString(path) + " value=" + bytesRefToFloat(assoc) + ")";
}
}

View File

@ -0,0 +1,57 @@
package org.apache.lucene.facet.simple;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.util.Arrays;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.util.BytesRef;
/** Associates an arbitrary int with the added facet
* path, encoding the int into a 4-byte BytesRef. */
public class IntAssociationFacetField extends AssociationFacetField {
/** Utility ctor: associates an int value (translates it
* to 4-byte BytesRef). */
public IntAssociationFacetField(int assoc, String dim, String... path) {
super(intToBytesRef(assoc), dim, path);
}
public static BytesRef intToBytesRef(int v) {
byte[] bytes = new byte[4];
// big-endian:
bytes[0] = (byte) (v >> 24);
bytes[1] = (byte) (v >> 16);
bytes[2] = (byte) (v >> 8);
bytes[3] = (byte) v;
return new BytesRef(bytes);
}
public static int bytesRefToInt(BytesRef b) {
return ((b.bytes[b.offset]&0xFF) << 24) |
((b.bytes[b.offset+1]&0xFF) << 16) |
((b.bytes[b.offset+2]&0xFF) << 8) |
(b.bytes[b.offset+3]&0xFF);
}
@Override
public String toString() {
return "IntAssociationFacetField(dim=" + dim + " path=" + Arrays.toString(path) + " value=" + bytesRefToInt(assoc) + ")";
}
}

View File

@ -53,7 +53,7 @@ import org.apache.lucene.search.TermQuery;
public final class SimpleDrillDownQuery extends Query {
private static Term term(String field, String dim, String[] path) {
return new Term(field, FacetDocument.pathToString(dim, path));
return new Term(field, DocumentBuilder.pathToString(dim, path));
}
private final FacetsConfig config;

View File

@ -230,7 +230,7 @@ public class SortedSetDocValuesFacetCounts extends Facets {
throw new IllegalArgumentException("path must be length=1");
}
int ord = (int) dv.lookupTerm(new BytesRef(FacetDocument.pathToString(dim, path)));
int ord = (int) dv.lookupTerm(new BytesRef(DocumentBuilder.pathToString(dim, path)));
if (ord < 0) {
return -1;
}

View File

@ -110,7 +110,7 @@ public final class SortedSetDocValuesReaderState {
// support arbitrary hierarchy:
for(int ord=0;ord<valueCount;ord++) {
dv.lookupOrd(ord, spare);
String[] components = FacetDocument.stringToPath(spare.utf8ToString());
String[] components = DocumentBuilder.stringToPath(spare.utf8ToString());
if (components.length != 2) {
throw new IllegalArgumentException("this class can only handle 2 level hierarchy (dim/value); got: " + Arrays.toString(components) + " " + spare.utf8ToString());
}

View File

@ -47,21 +47,21 @@ public class TestSortedSetDocValuesFacets extends FacetTestCase {
FacetsConfig config = new FacetsConfig();
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
FacetDocument facetDoc = new FacetDocument(null, config);
DocumentBuilder builder = new DocumentBuilder(null, config);
Document doc = new Document();
doc.add(new SortedSetDocValuesFacetField("a", "foo"));
doc.add(new SortedSetDocValuesFacetField("a", "bar"));
doc.add(new SortedSetDocValuesFacetField("a", "zoo"));
doc.add(new SortedSetDocValuesFacetField("b", "baz"));
writer.addDocument(facetDoc.build(doc));
writer.addDocument(builder.build(doc));
if (random().nextBoolean()) {
writer.commit();
}
doc = new Document();
doc.add(new SortedSetDocValuesFacetField("a", "foo"));
writer.addDocument(facetDoc.build(doc));
writer.addDocument(builder.build(doc));
// NRT open
IndexSearcher searcher = newSearcher(writer.getReader());

View File

@ -40,6 +40,7 @@ import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.IOUtils;
import org.junit.AfterClass;
import org.junit.BeforeClass;
@ -78,11 +79,11 @@ public class TestTaxonomyFacetAssociations extends FacetTestCase {
// every 11th document is added empty, this used to cause the association
// aggregators to go into an infinite loop
if (i % 11 != 0) {
doc.add(new AssociationFacetField(2, "int", "a"));
doc.add(new AssociationFacetField(0.5f, "float", "a"));
doc.add(new IntAssociationFacetField(2, "int", "a"));
doc.add(new FloatAssociationFacetField(0.5f, "float", "a"));
if (i % 2 == 0) { // 50
doc.add(new AssociationFacetField(3, "int", "b"));
doc.add(new AssociationFacetField(0.2f, "float", "b"));
doc.add(new IntAssociationFacetField(3, "int", "b"));
doc.add(new FloatAssociationFacetField(0.2f, "float", "b"));
}
}
writer.addDocument(builder.build(doc));
@ -167,4 +168,25 @@ public class TestTaxonomyFacetAssociations extends FacetTestCase {
// expected
}
}
public void testMixedTypesInSameIndexField() throws Exception {
Directory dir = newDirectory();
Directory taxoDir = newDirectory();
TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
FacetsConfig config = new FacetsConfig();
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
DocumentBuilder builder = new DocumentBuilder(taxoWriter, config);
Document doc = new Document();
doc.add(new IntAssociationFacetField(14, "a", "x"));
doc.add(new FloatAssociationFacetField(55.0f, "b", "y"));
try {
writer.addDocument(builder.build(doc));
fail("did not hit expected exception");
} catch (IllegalArgumentException exc) {
// expected
}
IOUtils.close(writer, taxoWriter, dir, taxoDir);
}
}