diff --git a/lucene/core/src/test/org/apache/lucene/index/TestDocValuesIndexing.java b/lucene/core/src/test/org/apache/lucene/index/TestDocValuesIndexing.java index f3e68bf60ed..d79a081d39b 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestDocValuesIndexing.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestDocValuesIndexing.java @@ -28,6 +28,7 @@ import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.NumericDocValuesField; import org.apache.lucene.document.SortedDocValuesField; +import org.apache.lucene.document.SortedSetDocValuesField; import org.apache.lucene.document.TextField; import org.apache.lucene.search.FieldCache; import org.apache.lucene.store.Directory; @@ -372,6 +373,30 @@ public class TestDocValuesIndexing extends LuceneTestCase { iwriter.close(); directory.close(); } + + public void testTooLargeTermSortedSetBytes() throws IOException { + assumeTrue("codec does not support SORTED_SET", defaultCodecSupportsSortedSet()); + Analyzer analyzer = new MockAnalyzer(random()); + + Directory directory = newDirectory(); + // we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1 + IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); + iwc.setMergePolicy(newLogMergePolicy()); + IndexWriter iwriter = new IndexWriter(directory, iwc); + Document doc = new Document(); + byte bytes[] = new byte[100000]; + BytesRef b = new BytesRef(bytes); + random().nextBytes(bytes); + doc.add(new SortedSetDocValuesField("dv", b)); + try { + iwriter.addDocument(doc); + fail("did not get expected exception"); + } catch (IllegalArgumentException expected) { + // expected + } + iwriter.close(); + directory.close(); + } // Two documents across segments public void testMixedTypesDifferentSegments() throws Exception { diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java index 3a73ba28238..8fcfcee13e1 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java @@ -40,6 +40,7 @@ import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; import org.apache.lucene.document.NumericDocValuesField; import org.apache.lucene.document.SortedDocValuesField; +import org.apache.lucene.document.SortedSetDocValuesField; import org.apache.lucene.document.StoredField; import org.apache.lucene.document.StringField; import org.apache.lucene.document.TextField; @@ -1023,6 +1024,10 @@ public class TestIndexWriter extends LuceneTestCase { doc.add(new BinaryDocValuesField("binarydv", new BytesRef("500"))); doc.add(new NumericDocValuesField("numericdv", 500)); doc.add(new SortedDocValuesField("sorteddv", new BytesRef("500"))); + if (defaultCodecSupportsSortedSet()) { + doc.add(new SortedSetDocValuesField("sortedsetdv", new BytesRef("one"))); + doc.add(new SortedSetDocValuesField("sortedsetdv", new BytesRef("two"))); + } w.addDocument(doc); doc = new Document(); doc.add(newStringField(random, "id", "501", Field.Store.NO)); @@ -1030,6 +1035,10 @@ public class TestIndexWriter extends LuceneTestCase { doc.add(new BinaryDocValuesField("binarydv", new BytesRef("501"))); doc.add(new NumericDocValuesField("numericdv", 501)); doc.add(new SortedDocValuesField("sorteddv", new BytesRef("501"))); + if (defaultCodecSupportsSortedSet()) { + doc.add(new SortedSetDocValuesField("sortedsetdv", new BytesRef("two"))); + doc.add(new SortedSetDocValuesField("sortedsetdv", new BytesRef("three"))); + } w.addDocument(doc); w.deleteDocuments(new Term("id", "500")); w.close(); @@ -1057,16 +1066,21 @@ public class TestIndexWriter extends LuceneTestCase { Field binaryDVField = new BinaryDocValuesField("binarydv", new BytesRef()); Field numericDVField = new NumericDocValuesField("numericdv", 0); Field sortedDVField = new SortedDocValuesField("sorteddv", new BytesRef()); + Field sortedSetDVField = new SortedSetDocValuesField("sortedsetdv", new BytesRef()); doc.add(idField); doc.add(newField(random, "field", "some text contents", storedTextType)); doc.add(binaryDVField); doc.add(numericDVField); doc.add(sortedDVField); + if (defaultCodecSupportsSortedSet()) { + doc.add(sortedSetDVField); + } for(int i=0;i<100;i++) { idField.setStringValue(Integer.toString(i)); binaryDVField.setBytesValue(new BytesRef(idField.stringValue())); numericDVField.setLongValue(i); sortedDVField.setBytesValue(new BytesRef(idField.stringValue())); + sortedSetDVField.setBytesValue(new BytesRef(idField.stringValue())); int action = random.nextInt(100); if (action == 17) { w.addIndexes(adder); diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java index 6bf5feb80b2..16f3589f379 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java @@ -33,6 +33,7 @@ import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; import org.apache.lucene.document.NumericDocValuesField; import org.apache.lucene.document.SortedDocValuesField; +import org.apache.lucene.document.SortedSetDocValuesField; import org.apache.lucene.document.StringField; import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexWriterConfig.OpenMode; @@ -143,6 +144,10 @@ public class TestIndexWriterExceptions extends LuceneTestCase { doc.add(new NumericDocValuesField("numericdv", 5)); doc.add(new BinaryDocValuesField("binarydv", new BytesRef("hello"))); doc.add(new SortedDocValuesField("sorteddv", new BytesRef("world"))); + if (defaultCodecSupportsSortedSet()) { + doc.add(new SortedSetDocValuesField("sortedsetdv", new BytesRef("hellllo"))); + doc.add(new SortedSetDocValuesField("sortedsetdv", new BytesRef("again"))); + } doc.add(newField(r, "content7", "aaa bbb ccc ddd", DocCopyIterator.custom4)); diff --git a/lucene/core/src/test/org/apache/lucene/index/TestMultiDocValues.java b/lucene/core/src/test/org/apache/lucene/index/TestMultiDocValues.java index de9ca00693c..78c8974c555 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestMultiDocValues.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestMultiDocValues.java @@ -17,11 +17,14 @@ package org.apache.lucene.index; * limitations under the License. */ +import java.util.ArrayList; + import org.apache.lucene.document.BinaryDocValuesField; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.NumericDocValuesField; import org.apache.lucene.document.SortedDocValuesField; +import org.apache.lucene.document.SortedSetDocValuesField; import org.apache.lucene.store.Directory; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.LuceneTestCase; @@ -189,4 +192,133 @@ public class TestMultiDocValues extends LuceneTestCase { ir2.close(); dir.close(); } + + public void testSortedSet() throws Exception { + assumeTrue("codec does not support SORTED_SET", defaultCodecSupportsSortedSet()); + Directory dir = newDirectory(); + + IndexWriterConfig iwc = newIndexWriterConfig(random(), TEST_VERSION_CURRENT, null); + iwc.setMergePolicy(newLogMergePolicy()); + RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc); + + int numDocs = atLeast(500); + for (int i = 0; i < numDocs; i++) { + Document doc = new Document(); + int numValues = random().nextInt(5); + for (int j = 0; j < numValues; j++) { + doc.add(new SortedSetDocValuesField("bytes", new BytesRef(_TestUtil.randomUnicodeString(random())))); + } + iw.addDocument(doc); + if (random().nextInt(17) == 0) { + iw.commit(); + } + } + DirectoryReader ir = iw.getReader(); + iw.forceMerge(1); + DirectoryReader ir2 = iw.getReader(); + AtomicReader merged = getOnlySegmentReader(ir2); + iw.close(); + + SortedSetDocValues multi = MultiDocValues.getSortedSetValues(ir, "bytes"); + SortedSetDocValues single = merged.getSortedSetDocValues("bytes"); + if (multi == null) { + assertNull(single); + } else { + assertEquals(single.getValueCount(), multi.getValueCount()); + BytesRef actual = new BytesRef(); + BytesRef expected = new BytesRef(); + // check values + for (long i = 0; i < single.getValueCount(); i++) { + single.lookupOrd(i, expected); + multi.lookupOrd(i, actual); + assertEquals(expected, actual); + } + // check ord list + for (int i = 0; i < numDocs; i++) { + single.setDocument(i); + ArrayList expectedList = new ArrayList(); + long ord; + while ((ord = single.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { + expectedList.add(ord); + } + + multi.setDocument(i); + int upto = 0; + while ((ord = multi.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { + assertEquals(expectedList.get(upto).longValue(), ord); + upto++; + } + assertEquals(expectedList.size(), upto); + } + } + + ir.close(); + ir2.close(); + dir.close(); + } + + // tries to make more dups than testSortedSet + public void testSortedSetWithDups() throws Exception { + assumeTrue("codec does not support SORTED_SET", defaultCodecSupportsSortedSet()); + Directory dir = newDirectory(); + + IndexWriterConfig iwc = newIndexWriterConfig(random(), TEST_VERSION_CURRENT, null); + iwc.setMergePolicy(newLogMergePolicy()); + RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc); + + int numDocs = atLeast(500); + for (int i = 0; i < numDocs; i++) { + Document doc = new Document(); + int numValues = random().nextInt(5); + for (int j = 0; j < numValues; j++) { + doc.add(new SortedSetDocValuesField("bytes", new BytesRef(_TestUtil.randomSimpleString(random(), 2)))); + } + iw.addDocument(doc); + if (random().nextInt(17) == 0) { + iw.commit(); + } + } + DirectoryReader ir = iw.getReader(); + iw.forceMerge(1); + DirectoryReader ir2 = iw.getReader(); + AtomicReader merged = getOnlySegmentReader(ir2); + iw.close(); + + SortedSetDocValues multi = MultiDocValues.getSortedSetValues(ir, "bytes"); + SortedSetDocValues single = merged.getSortedSetDocValues("bytes"); + if (multi == null) { + assertNull(single); + } else { + assertEquals(single.getValueCount(), multi.getValueCount()); + BytesRef actual = new BytesRef(); + BytesRef expected = new BytesRef(); + // check values + for (long i = 0; i < single.getValueCount(); i++) { + single.lookupOrd(i, expected); + multi.lookupOrd(i, actual); + assertEquals(expected, actual); + } + // check ord list + for (int i = 0; i < numDocs; i++) { + single.setDocument(i); + ArrayList expectedList = new ArrayList(); + long ord; + while ((ord = single.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { + expectedList.add(ord); + } + + multi.setDocument(i); + int upto = 0; + while ((ord = multi.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { + assertEquals(expectedList.get(upto).longValue(), ord); + upto++; + } + assertEquals(expectedList.size(), upto); + } + } + + ir.close(); + ir2.close(); + dir.close(); + } }