detect if cat path uses the delimiter; fix test

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1451202 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2013-02-28 14:01:57 +00:00
parent 29bc4ed434
commit 7c3f4ce119
3 changed files with 54 additions and 6 deletions

View File

@ -121,6 +121,14 @@ public class CategoryPath implements Comparable<CategoryPath> {
return length - other.length;
}
private void noDelimiter(char[] buf, int offset, int len, char delimiter) {
for(int idx=0;idx<len;idx++) {
if (buf[offset+idx] == delimiter) {
throw new IllegalArgumentException("delimiter character U+" + Integer.toHexString(delimiter) + " appears in path");
}
}
}
/**
* Copies the path components to the given {@code char[]}, starting at index
* {@code start}. {@code delimiter} is copied between the path components.
@ -141,10 +149,12 @@ public class CategoryPath implements Comparable<CategoryPath> {
for (int i = 0; i < upto; i++) {
int len = components[i].length();
components[i].getChars(0, len, buf, idx);
noDelimiter(buf, idx, len, delimiter);
idx += len;
buf[idx++] = delimiter;
}
components[upto].getChars(0, components[upto].length(), buf, idx);
noDelimiter(buf, idx, components[upto].length(), delimiter);
return idx + components[upto].length() - start;
}

View File

@ -47,6 +47,7 @@ import org.apache.lucene.search.similarities.DefaultSimilarity;
import org.apache.lucene.search.similarities.PerFieldSimilarityWrapper;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
public class TestDemoFacets extends FacetTestCase {
@ -219,4 +220,27 @@ public class TestDemoFacets extends FacetTestCase {
dir.close();
taxoDir.close();
}
public void testLabelWithDelimiter() throws Exception {
Directory dir = newDirectory();
Directory taxoDir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE);
FacetFields facetFields = new FacetFields(taxoWriter);
Document doc = new Document();
doc.add(newTextField("field", "text", Field.Store.NO));
BytesRef br = new BytesRef(new byte[] {(byte) 0xee, (byte) 0x92, (byte) 0xaa, (byte) 0xef, (byte) 0x9d, (byte) 0x89});
facetFields.addFields(doc, Collections.singletonList(new CategoryPath("dim/" + br.utf8ToString(), '/')));
try {
writer.addDocument(doc);
} catch (IllegalArgumentException iae) {
// expected
}
writer.close();
taxoWriter.close();
dir.close();
taxoDir.close();
}
}

View File

@ -33,6 +33,7 @@ import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.facet.FacetTestCase;
import org.apache.lucene.facet.index.FacetFields;
import org.apache.lucene.facet.params.FacetIndexingParams;
import org.apache.lucene.facet.params.FacetSearchParams;
import org.apache.lucene.facet.search.DrillSideways.DrillSidewaysResult;
import org.apache.lucene.facet.taxonomy.CategoryPath;
@ -58,6 +59,7 @@ import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.InfoStream;
import org.apache.lucene.util._TestUtil;
@ -374,7 +376,15 @@ public class TestDrillSideways extends FacetTestCase {
for(int dim=0;dim<numDims;dim++) {
Set<String> values = new HashSet<String>();
while (values.size() < valueCount) {
String s = _TestUtil.randomRealisticUnicodeString(random());
String s;
while (true) {
s = _TestUtil.randomRealisticUnicodeString(random());
// We cannot include this character else the label
// is silently truncated:
if (s.indexOf(FacetIndexingParams.DEFAULT_FACET_DELIM_CHAR) == -1) {
break;
}
}
//String s = _TestUtil.randomSimpleString(random());
if (s.length() > 0) {
values.add(s);
@ -443,7 +453,7 @@ public class TestDrillSideways extends FacetTestCase {
paths.add(new CategoryPath("dim" + dim, dimValues[dim][dimValue]));
doc.add(new StringField("dim" + dim, dimValues[dim][dimValue], Field.Store.YES));
if (VERBOSE) {
System.out.println(" dim" + dim + "=" + dimValues[dim][dimValue]);
System.out.println(" dim" + dim + "=" + new BytesRef(dimValues[dim][dimValue]));
}
}
int dimValue2 = rawDoc.dims2[dim];
@ -451,7 +461,7 @@ public class TestDrillSideways extends FacetTestCase {
paths.add(new CategoryPath("dim" + dim, dimValues[dim][dimValue2]));
doc.add(new StringField("dim" + dim, dimValues[dim][dimValue2], Field.Store.YES));
if (VERBOSE) {
System.out.println(" dim" + dim + "=" + dimValues[dim][dimValue2]);
System.out.println(" dim" + dim + "=" + new BytesRef(dimValues[dim][dimValue2]));
}
}
}
@ -541,7 +551,11 @@ public class TestDrillSideways extends FacetTestCase {
}
}
if (VERBOSE) {
System.out.println(" dim" + dim + "=" + Arrays.toString(drillDowns[dim]));
BytesRef[] values = new BytesRef[drillDowns[dim].length];
for(int i=0;i<values.length;i++) {
values[i] = new BytesRef(drillDowns[dim][i]);
}
System.out.println(" dim" + dim + "=" + Arrays.toString(values));
}
count++;
}
@ -792,7 +806,7 @@ public class TestDrillSideways extends FacetTestCase {
for(FacetResultNode childNode : fr.getFacetResultNode().subResults) {
actualValues.put(childNode.label.components[1], (int) childNode.value);
if (VERBOSE) {
System.out.println(" " + childNode.label.components[1] + ": " + (int) childNode.value);
System.out.println(" " + new BytesRef(childNode.label.components[1]) + ": " + (int) childNode.value);
}
}
@ -805,7 +819,7 @@ public class TestDrillSideways extends FacetTestCase {
String value = dimValues[dim][i];
if (expected.counts[dim][i] != 0) {
if (VERBOSE) {
System.out.println(" " + value + ": " + expected.counts[dim][i]);
System.out.println(" " + new BytesRef(value) + ": " + expected.counts[dim][i]);
}
assertTrue(actualValues.containsKey(value));
assertEquals(expected.counts[dim][i], actualValues.get(value).intValue());