mirror of https://github.com/apache/lucene.git
LUCENE-6207: Fixed consumption of several terms enums on the same sorted (set) doc values instance.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1655693 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
1f131a6b20
commit
b696595cc6
|
@ -546,6 +546,12 @@ Other
|
|||
|
||||
* LUCENE-5915: Remove Pulsing postings format. (Robert Muir)
|
||||
|
||||
======================= Lucene 4.10.4 ======================
|
||||
|
||||
* LUCENE-6207: Fixed consumption of several terms enums on the same
|
||||
sorted (set) doc values instance at the same time.
|
||||
(Tom Shally, Robert Muir, Adrien Grand)
|
||||
|
||||
======================= Lucene 4.10.3 ======================
|
||||
|
||||
Bug fixes
|
||||
|
@ -553,6 +559,7 @@ Bug fixes
|
|||
* LUCENE-3229: Overlapping ordered SpanNearQuery spans should not match.
|
||||
(Ludovic Boutros, Paul Elschot, Greg Dearing, ehatcher)
|
||||
|
||||
|
||||
======================= Lucene 4.10.2 ======================
|
||||
|
||||
Bug fixes
|
||||
|
|
|
@ -29,7 +29,6 @@ import org.apache.lucene.util.BytesRefBuilder;
|
|||
class SortedDocValuesTermsEnum extends TermsEnum {
|
||||
private final SortedDocValues values;
|
||||
private int currentOrd = -1;
|
||||
private BytesRef term;
|
||||
private final BytesRefBuilder scratch;
|
||||
|
||||
/** Creates a new TermsEnum over the provided values */
|
||||
|
@ -44,7 +43,6 @@ class SortedDocValuesTermsEnum extends TermsEnum {
|
|||
if (ord >= 0) {
|
||||
currentOrd = ord;
|
||||
scratch.copyBytes(text);
|
||||
term = scratch.get();
|
||||
return SeekStatus.FOUND;
|
||||
} else {
|
||||
currentOrd = -ord-1;
|
||||
|
@ -52,7 +50,7 @@ class SortedDocValuesTermsEnum extends TermsEnum {
|
|||
return SeekStatus.END;
|
||||
} else {
|
||||
// TODO: hmm can we avoid this "extra" lookup?:
|
||||
term = values.lookupOrd(currentOrd);
|
||||
scratch.copyBytes(values.lookupOrd(currentOrd));
|
||||
return SeekStatus.NOT_FOUND;
|
||||
}
|
||||
}
|
||||
|
@ -64,7 +62,6 @@ class SortedDocValuesTermsEnum extends TermsEnum {
|
|||
if (ord >= 0) {
|
||||
currentOrd = ord;
|
||||
scratch.copyBytes(text);
|
||||
term = scratch.get();
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
|
@ -75,7 +72,7 @@ class SortedDocValuesTermsEnum extends TermsEnum {
|
|||
public void seekExact(long ord) throws IOException {
|
||||
assert ord >= 0 && ord < values.getValueCount();
|
||||
currentOrd = (int) ord;
|
||||
term = values.lookupOrd(currentOrd);
|
||||
scratch.copyBytes(values.lookupOrd(currentOrd));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -84,13 +81,13 @@ class SortedDocValuesTermsEnum extends TermsEnum {
|
|||
if (currentOrd >= values.getValueCount()) {
|
||||
return null;
|
||||
}
|
||||
term = values.lookupOrd(currentOrd);
|
||||
return term;
|
||||
scratch.copyBytes(values.lookupOrd(currentOrd));
|
||||
return scratch.get();
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef term() throws IOException {
|
||||
return term;
|
||||
return scratch.get();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -29,7 +29,6 @@ import org.apache.lucene.util.BytesRefBuilder;
|
|||
class SortedSetDocValuesTermsEnum extends TermsEnum {
|
||||
private final SortedSetDocValues values;
|
||||
private long currentOrd = -1;
|
||||
private BytesRef term;
|
||||
private final BytesRefBuilder scratch;
|
||||
|
||||
/** Creates a new TermsEnum over the provided values */
|
||||
|
@ -44,7 +43,6 @@ class SortedSetDocValuesTermsEnum extends TermsEnum {
|
|||
if (ord >= 0) {
|
||||
currentOrd = ord;
|
||||
scratch.copyBytes(text);
|
||||
term = scratch.get();
|
||||
return SeekStatus.FOUND;
|
||||
} else {
|
||||
currentOrd = -ord-1;
|
||||
|
@ -52,7 +50,7 @@ class SortedSetDocValuesTermsEnum extends TermsEnum {
|
|||
return SeekStatus.END;
|
||||
} else {
|
||||
// TODO: hmm can we avoid this "extra" lookup?:
|
||||
term = values.lookupOrd(currentOrd);
|
||||
scratch.copyBytes(values.lookupOrd(currentOrd));
|
||||
return SeekStatus.NOT_FOUND;
|
||||
}
|
||||
}
|
||||
|
@ -64,7 +62,6 @@ class SortedSetDocValuesTermsEnum extends TermsEnum {
|
|||
if (ord >= 0) {
|
||||
currentOrd = ord;
|
||||
scratch.copyBytes(text);
|
||||
term = scratch.get();
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
|
@ -75,7 +72,7 @@ class SortedSetDocValuesTermsEnum extends TermsEnum {
|
|||
public void seekExact(long ord) throws IOException {
|
||||
assert ord >= 0 && ord < values.getValueCount();
|
||||
currentOrd = (int) ord;
|
||||
term = values.lookupOrd(currentOrd);
|
||||
scratch.copyBytes(values.lookupOrd(currentOrd));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -84,13 +81,13 @@ class SortedSetDocValuesTermsEnum extends TermsEnum {
|
|||
if (currentOrd >= values.getValueCount()) {
|
||||
return null;
|
||||
}
|
||||
term = values.lookupOrd(currentOrd);
|
||||
return term;
|
||||
scratch.copyBytes(values.lookupOrd(currentOrd));
|
||||
return scratch.get();
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef term() throws IOException {
|
||||
return term;
|
||||
return scratch.get();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -37,6 +37,7 @@ import org.apache.lucene.index.DirectoryReader;
|
|||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.SerialMergeScheduler;
|
||||
import org.apache.lucene.index.SortedSetDocValues;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
|
@ -184,10 +185,13 @@ public class TestLucene50DocValuesFormat extends BaseCompressingDocValuesFormatT
|
|||
LeafReader r = context.reader();
|
||||
Terms terms = r.terms("indexed");
|
||||
if (terms != null) {
|
||||
assertEquals(terms.size(), r.getSortedSetDocValues("dv").getValueCount());
|
||||
SortedSetDocValues ssdv = r.getSortedSetDocValues("dv");
|
||||
assertEquals(terms.size(), ssdv.getValueCount());
|
||||
TermsEnum expected = terms.iterator(null);
|
||||
TermsEnum actual = r.getSortedSetDocValues("dv").termsEnum();
|
||||
assertEquals(terms.size(), expected, actual);
|
||||
|
||||
doTestSortedSetEnumAdvanceIndependently(ssdv);
|
||||
}
|
||||
}
|
||||
ir.close();
|
||||
|
|
|
@ -26,6 +26,7 @@ import java.util.ArrayList;
|
|||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.Set;
|
||||
|
@ -57,6 +58,7 @@ import org.apache.lucene.search.TopDocs;
|
|||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
import org.apache.lucene.util.BytesRefHash;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
|
@ -2945,6 +2947,142 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
|
|||
directory.close();
|
||||
}
|
||||
|
||||
public void testSortedEnumAdvanceIndependently() throws IOException {
|
||||
Directory directory = newDirectory();
|
||||
Analyzer analyzer = new MockAnalyzer(random());
|
||||
IndexWriterConfig iwconfig = newIndexWriterConfig(analyzer);
|
||||
iwconfig.setMergePolicy(newLogMergePolicy());
|
||||
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
|
||||
|
||||
Document doc = new Document();
|
||||
SortedDocValuesField field = new SortedDocValuesField("field", new BytesRef("2"));
|
||||
doc.add(field);
|
||||
iwriter.addDocument(doc);
|
||||
field.setBytesValue(new BytesRef("1"));
|
||||
iwriter.addDocument(doc);
|
||||
field.setBytesValue(new BytesRef("3"));
|
||||
iwriter.addDocument(doc);
|
||||
|
||||
iwriter.commit();
|
||||
iwriter.forceMerge(1);
|
||||
|
||||
DirectoryReader ireader = iwriter.getReader();
|
||||
iwriter.close();
|
||||
|
||||
SortedDocValues dv = getOnlySegmentReader(ireader).getSortedDocValues("field");
|
||||
doTestSortedSetEnumAdvanceIndependently(DocValues.singleton(dv));
|
||||
|
||||
ireader.close();
|
||||
directory.close();
|
||||
}
|
||||
|
||||
public void testSortedSetEnumAdvanceIndependently() throws IOException {
|
||||
Directory directory = newDirectory();
|
||||
Analyzer analyzer = new MockAnalyzer(random());
|
||||
IndexWriterConfig iwconfig = newIndexWriterConfig(analyzer);
|
||||
iwconfig.setMergePolicy(newLogMergePolicy());
|
||||
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
|
||||
|
||||
Document doc = new Document();
|
||||
SortedSetDocValuesField field1 = new SortedSetDocValuesField("field", new BytesRef("2"));
|
||||
SortedSetDocValuesField field2 = new SortedSetDocValuesField("field", new BytesRef("3"));
|
||||
doc.add(field1);
|
||||
doc.add(field2);
|
||||
iwriter.addDocument(doc);
|
||||
field1.setBytesValue(new BytesRef("1"));
|
||||
iwriter.addDocument(doc);
|
||||
field2.setBytesValue(new BytesRef("2"));
|
||||
iwriter.addDocument(doc);
|
||||
|
||||
iwriter.commit();
|
||||
iwriter.forceMerge(1);
|
||||
|
||||
DirectoryReader ireader = iwriter.getReader();
|
||||
iwriter.close();
|
||||
|
||||
SortedSetDocValues dv = getOnlySegmentReader(ireader).getSortedSetDocValues("field");
|
||||
doTestSortedSetEnumAdvanceIndependently(dv);
|
||||
|
||||
ireader.close();
|
||||
directory.close();
|
||||
}
|
||||
|
||||
protected void doTestSortedSetEnumAdvanceIndependently(SortedSetDocValues dv) throws IOException {
|
||||
if (dv.getValueCount() < 2) {
|
||||
return;
|
||||
}
|
||||
List<BytesRef> terms = new ArrayList<>();
|
||||
TermsEnum te = dv.termsEnum();
|
||||
terms.add(BytesRef.deepCopyOf(te.next()));
|
||||
terms.add(BytesRef.deepCopyOf(te.next()));
|
||||
|
||||
// Make sure that calls to next() does not modify the term of the other enum
|
||||
TermsEnum enum1 = dv.termsEnum();
|
||||
TermsEnum enum2 = dv.termsEnum();
|
||||
BytesRefBuilder term1 = new BytesRefBuilder();
|
||||
BytesRefBuilder term2 = new BytesRefBuilder();
|
||||
|
||||
term1.copyBytes(enum1.next());
|
||||
term2.copyBytes(enum2.next());
|
||||
term1.copyBytes(enum1.next());
|
||||
|
||||
assertEquals(term1.get(), enum1.term());
|
||||
assertEquals(term2.get(), enum2.term());
|
||||
|
||||
// Same for seekCeil
|
||||
enum1 = dv.termsEnum();
|
||||
enum2 = dv.termsEnum();
|
||||
term1 = new BytesRefBuilder();
|
||||
term2 = new BytesRefBuilder();
|
||||
|
||||
term2.copyBytes(enum2.next());
|
||||
BytesRefBuilder seekTerm = new BytesRefBuilder();
|
||||
seekTerm.append(terms.get(0));
|
||||
seekTerm.append((byte) 0);
|
||||
enum1.seekCeil(seekTerm.get());
|
||||
term1.copyBytes(enum1.term());
|
||||
|
||||
assertEquals(term1.get(), enum1.term());
|
||||
assertEquals(term2.get(), enum2.term());
|
||||
|
||||
// Same for seekCeil on an exact value
|
||||
enum1 = dv.termsEnum();
|
||||
enum2 = dv.termsEnum();
|
||||
term1 = new BytesRefBuilder();
|
||||
term2 = new BytesRefBuilder();
|
||||
|
||||
term2.copyBytes(enum2.next());
|
||||
enum1.seekCeil(terms.get(1));
|
||||
term1.copyBytes(enum1.term());
|
||||
|
||||
assertEquals(term1.get(), enum1.term());
|
||||
assertEquals(term2.get(), enum2.term());
|
||||
|
||||
// Same for seekExact
|
||||
enum1 = dv.termsEnum();
|
||||
enum2 = dv.termsEnum();
|
||||
term1 = new BytesRefBuilder();
|
||||
term2 = new BytesRefBuilder();
|
||||
|
||||
term2.copyBytes(enum2.next());
|
||||
final boolean found = enum1.seekExact(terms.get(1));
|
||||
assertTrue(found);
|
||||
term1.copyBytes(enum1.term());
|
||||
|
||||
// Same for seek by ord
|
||||
enum1 = dv.termsEnum();
|
||||
enum2 = dv.termsEnum();
|
||||
term1 = new BytesRefBuilder();
|
||||
term2 = new BytesRefBuilder();
|
||||
|
||||
term2.copyBytes(enum2.next());
|
||||
enum1.seekExact(1);
|
||||
term1.copyBytes(enum1.term());
|
||||
|
||||
assertEquals(term1.get(), enum1.term());
|
||||
assertEquals(term2.get(), enum2.term());
|
||||
}
|
||||
|
||||
protected boolean codecAcceptsHugeBinaryValues(String field) {
|
||||
return true;
|
||||
}
|
||||
|
@ -2964,4 +3102,5 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
|
|||
protected boolean codecSupportsSortedNumeric() {
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue