mirror of https://github.com/apache/lucene.git
LUCENE-5339: add best-effort detection of invalid mixing of different association field types in single indexed field
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5339@1543535 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
1accec983e
commit
6a8b870c58
|
@ -31,9 +31,9 @@ public class AssociationFacetField extends Field {
|
|||
TYPE.setIndexed(true);
|
||||
TYPE.freeze();
|
||||
}
|
||||
final String dim;
|
||||
final String[] path;
|
||||
final BytesRef assoc;
|
||||
protected final String dim;
|
||||
protected final String[] path;
|
||||
protected final BytesRef assoc;
|
||||
|
||||
public AssociationFacetField(BytesRef assoc, String dim, String... path) {
|
||||
super("dummy", TYPE);
|
||||
|
@ -45,18 +45,6 @@ public class AssociationFacetField extends Field {
|
|||
this.path = path;
|
||||
}
|
||||
|
||||
/** Utility ctor: associates an int value (translates it
|
||||
* to 4-byte BytesRef). */
|
||||
public AssociationFacetField(int assoc, String dim, String... path) {
|
||||
this(intToBytesRef(assoc), dim, path);
|
||||
}
|
||||
|
||||
/** Utility ctor: associates a float value (translates it
|
||||
* to 4-byte BytesRef). */
|
||||
public AssociationFacetField(float assoc, String dim, String... path) {
|
||||
this(floatToBytesRef(assoc), dim, path);
|
||||
}
|
||||
|
||||
private static BytesRef intToBytesRef(int v) {
|
||||
byte[] bytes = new byte[4];
|
||||
// big-endian:
|
||||
|
@ -67,12 +55,8 @@ public class AssociationFacetField extends Field {
|
|||
return new BytesRef(bytes);
|
||||
}
|
||||
|
||||
private static BytesRef floatToBytesRef(float v) {
|
||||
return intToBytesRef(Float.floatToIntBits(v));
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "FacetField(dim=" + dim + " path=" + Arrays.toString(path) + ")";
|
||||
return "AssociationFacetField(dim=" + dim + " path=" + Arrays.toString(path) + " bytes=" + assoc + ")";
|
||||
}
|
||||
}
|
||||
|
|
|
@ -23,6 +23,7 @@ import java.util.Arrays;
|
|||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
|
||||
import org.apache.lucene.document.BinaryDocValuesField;
|
||||
import org.apache.lucene.document.Field;
|
||||
|
@ -51,6 +52,10 @@ public class DocumentBuilder {
|
|||
private final TaxonomyWriter taxoWriter;
|
||||
private final FacetsConfig config;
|
||||
|
||||
// Used only for best-effort detection of app mixing
|
||||
// int/float/bytes in a single indexed field:
|
||||
private final Map<String,String> assocDimTypes = new ConcurrentHashMap<String,String>();
|
||||
|
||||
public DocumentBuilder(TaxonomyWriter taxoWriter, FacetsConfig config) {
|
||||
this.taxoWriter = taxoWriter;
|
||||
this.config = config;
|
||||
|
@ -103,6 +108,24 @@ public class DocumentBuilder {
|
|||
assocByField.put(indexFieldName, fields);
|
||||
}
|
||||
fields.add(facetField);
|
||||
|
||||
// Best effort: detect mis-matched types in same
|
||||
// indexed field:
|
||||
String type;
|
||||
if (facetField instanceof IntAssociationFacetField) {
|
||||
type = "int";
|
||||
} else if (facetField instanceof FloatAssociationFacetField) {
|
||||
type = "float";
|
||||
} else {
|
||||
type = "bytes";
|
||||
}
|
||||
// NOTE: not thread safe, but this is just best effort:
|
||||
String curType = assocDimTypes.get(indexFieldName);
|
||||
if (curType == null) {
|
||||
assocDimTypes.put(indexFieldName, type);
|
||||
} else if (!curType.equals(type)) {
|
||||
throw new IllegalArgumentException("mixing incompatible types of AssocationFacetField (" + curType + " and " + type + ") in indexed field \"" + indexFieldName + "\"; use FacetsConfig to change the indexFieldName for each dimension");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,48 @@
|
|||
package org.apache.lucene.facet.simple;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/** Associates an arbitrary float with the added facet
|
||||
* path, encoding the float into a 4-byte BytesRef. */
|
||||
public class FloatAssociationFacetField extends AssociationFacetField {
|
||||
|
||||
/** Utility ctor: associates an int value (translates it
|
||||
* to 4-byte BytesRef). */
|
||||
public FloatAssociationFacetField(float assoc, String dim, String... path) {
|
||||
super(floatToBytesRef(assoc), dim, path);
|
||||
}
|
||||
|
||||
public static BytesRef floatToBytesRef(float v) {
|
||||
return IntAssociationFacetField.intToBytesRef(Float.floatToIntBits(v));
|
||||
}
|
||||
|
||||
public static float bytesRefToFloat(BytesRef b) {
|
||||
return Float.intBitsToFloat(IntAssociationFacetField.bytesRefToInt(b));
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "FloatAssociationFacetField(dim=" + dim + " path=" + Arrays.toString(path) + " value=" + bytesRefToFloat(assoc) + ")";
|
||||
}
|
||||
}
|
|
@ -0,0 +1,57 @@
|
|||
package org.apache.lucene.facet.simple;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/** Associates an arbitrary int with the added facet
|
||||
* path, encoding the int into a 4-byte BytesRef. */
|
||||
public class IntAssociationFacetField extends AssociationFacetField {
|
||||
|
||||
/** Utility ctor: associates an int value (translates it
|
||||
* to 4-byte BytesRef). */
|
||||
public IntAssociationFacetField(int assoc, String dim, String... path) {
|
||||
super(intToBytesRef(assoc), dim, path);
|
||||
}
|
||||
|
||||
public static BytesRef intToBytesRef(int v) {
|
||||
byte[] bytes = new byte[4];
|
||||
// big-endian:
|
||||
bytes[0] = (byte) (v >> 24);
|
||||
bytes[1] = (byte) (v >> 16);
|
||||
bytes[2] = (byte) (v >> 8);
|
||||
bytes[3] = (byte) v;
|
||||
return new BytesRef(bytes);
|
||||
}
|
||||
|
||||
public static int bytesRefToInt(BytesRef b) {
|
||||
return ((b.bytes[b.offset]&0xFF) << 24) |
|
||||
((b.bytes[b.offset+1]&0xFF) << 16) |
|
||||
((b.bytes[b.offset+2]&0xFF) << 8) |
|
||||
(b.bytes[b.offset+3]&0xFF);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "IntAssociationFacetField(dim=" + dim + " path=" + Arrays.toString(path) + " value=" + bytesRefToInt(assoc) + ")";
|
||||
}
|
||||
}
|
|
@ -53,7 +53,7 @@ import org.apache.lucene.search.TermQuery;
|
|||
public final class SimpleDrillDownQuery extends Query {
|
||||
|
||||
private static Term term(String field, String dim, String[] path) {
|
||||
return new Term(field, FacetDocument.pathToString(dim, path));
|
||||
return new Term(field, DocumentBuilder.pathToString(dim, path));
|
||||
}
|
||||
|
||||
private final FacetsConfig config;
|
||||
|
|
|
@ -230,7 +230,7 @@ public class SortedSetDocValuesFacetCounts extends Facets {
|
|||
throw new IllegalArgumentException("path must be length=1");
|
||||
}
|
||||
|
||||
int ord = (int) dv.lookupTerm(new BytesRef(FacetDocument.pathToString(dim, path)));
|
||||
int ord = (int) dv.lookupTerm(new BytesRef(DocumentBuilder.pathToString(dim, path)));
|
||||
if (ord < 0) {
|
||||
return -1;
|
||||
}
|
||||
|
|
|
@ -110,7 +110,7 @@ public final class SortedSetDocValuesReaderState {
|
|||
// support arbitrary hierarchy:
|
||||
for(int ord=0;ord<valueCount;ord++) {
|
||||
dv.lookupOrd(ord, spare);
|
||||
String[] components = FacetDocument.stringToPath(spare.utf8ToString());
|
||||
String[] components = DocumentBuilder.stringToPath(spare.utf8ToString());
|
||||
if (components.length != 2) {
|
||||
throw new IllegalArgumentException("this class can only handle 2 level hierarchy (dim/value); got: " + Arrays.toString(components) + " " + spare.utf8ToString());
|
||||
}
|
||||
|
|
|
@ -47,21 +47,21 @@ public class TestSortedSetDocValuesFacets extends FacetTestCase {
|
|||
|
||||
FacetsConfig config = new FacetsConfig();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
|
||||
FacetDocument facetDoc = new FacetDocument(null, config);
|
||||
DocumentBuilder builder = new DocumentBuilder(null, config);
|
||||
|
||||
Document doc = new Document();
|
||||
doc.add(new SortedSetDocValuesFacetField("a", "foo"));
|
||||
doc.add(new SortedSetDocValuesFacetField("a", "bar"));
|
||||
doc.add(new SortedSetDocValuesFacetField("a", "zoo"));
|
||||
doc.add(new SortedSetDocValuesFacetField("b", "baz"));
|
||||
writer.addDocument(facetDoc.build(doc));
|
||||
writer.addDocument(builder.build(doc));
|
||||
if (random().nextBoolean()) {
|
||||
writer.commit();
|
||||
}
|
||||
|
||||
doc = new Document();
|
||||
doc.add(new SortedSetDocValuesFacetField("a", "foo"));
|
||||
writer.addDocument(facetDoc.build(doc));
|
||||
writer.addDocument(builder.build(doc));
|
||||
|
||||
// NRT open
|
||||
IndexSearcher searcher = newSearcher(writer.getReader());
|
||||
|
|
|
@ -40,6 +40,7 @@ import org.apache.lucene.search.IndexSearcher;
|
|||
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.BeforeClass;
|
||||
|
||||
|
@ -78,11 +79,11 @@ public class TestTaxonomyFacetAssociations extends FacetTestCase {
|
|||
// every 11th document is added empty, this used to cause the association
|
||||
// aggregators to go into an infinite loop
|
||||
if (i % 11 != 0) {
|
||||
doc.add(new AssociationFacetField(2, "int", "a"));
|
||||
doc.add(new AssociationFacetField(0.5f, "float", "a"));
|
||||
doc.add(new IntAssociationFacetField(2, "int", "a"));
|
||||
doc.add(new FloatAssociationFacetField(0.5f, "float", "a"));
|
||||
if (i % 2 == 0) { // 50
|
||||
doc.add(new AssociationFacetField(3, "int", "b"));
|
||||
doc.add(new AssociationFacetField(0.2f, "float", "b"));
|
||||
doc.add(new IntAssociationFacetField(3, "int", "b"));
|
||||
doc.add(new FloatAssociationFacetField(0.2f, "float", "b"));
|
||||
}
|
||||
}
|
||||
writer.addDocument(builder.build(doc));
|
||||
|
@ -167,4 +168,25 @@ public class TestTaxonomyFacetAssociations extends FacetTestCase {
|
|||
// expected
|
||||
}
|
||||
}
|
||||
|
||||
public void testMixedTypesInSameIndexField() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
Directory taxoDir = newDirectory();
|
||||
|
||||
TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
|
||||
FacetsConfig config = new FacetsConfig();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
|
||||
|
||||
DocumentBuilder builder = new DocumentBuilder(taxoWriter, config);
|
||||
Document doc = new Document();
|
||||
doc.add(new IntAssociationFacetField(14, "a", "x"));
|
||||
doc.add(new FloatAssociationFacetField(55.0f, "b", "y"));
|
||||
try {
|
||||
writer.addDocument(builder.build(doc));
|
||||
fail("did not hit expected exception");
|
||||
} catch (IllegalArgumentException exc) {
|
||||
// expected
|
||||
}
|
||||
IOUtils.close(writer, taxoWriter, dir, taxoDir);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue