LUCENE-6207: Fixed consumption of several terms enums on the same sorted (set) doc values instance.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1655693 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Adrien Grand 2015-01-29 14:56:08 +00:00
parent 1f131a6b20
commit b696595cc6
5 changed files with 161 additions and 17 deletions

View File

@ -546,6 +546,12 @@ Other
* LUCENE-5915: Remove Pulsing postings format. (Robert Muir)
======================= Lucene 4.10.4 ======================
* LUCENE-6207: Fixed consumption of several terms enums on the same
sorted (set) doc values instance at the same time.
(Tom Shally, Robert Muir, Adrien Grand)
======================= Lucene 4.10.3 ======================
Bug fixes
@ -553,6 +559,7 @@ Bug fixes
* LUCENE-3229: Overlapping ordered SpanNearQuery spans should not match.
(Ludovic Boutros, Paul Elschot, Greg Dearing, ehatcher)
======================= Lucene 4.10.2 ======================
Bug fixes

View File

@ -29,7 +29,6 @@ import org.apache.lucene.util.BytesRefBuilder;
class SortedDocValuesTermsEnum extends TermsEnum {
private final SortedDocValues values;
private int currentOrd = -1;
private BytesRef term;
private final BytesRefBuilder scratch;
/** Creates a new TermsEnum over the provided values */
@ -44,7 +43,6 @@ class SortedDocValuesTermsEnum extends TermsEnum {
if (ord >= 0) {
currentOrd = ord;
scratch.copyBytes(text);
term = scratch.get();
return SeekStatus.FOUND;
} else {
currentOrd = -ord-1;
@ -52,7 +50,7 @@ class SortedDocValuesTermsEnum extends TermsEnum {
return SeekStatus.END;
} else {
// TODO: hmm can we avoid this "extra" lookup?:
term = values.lookupOrd(currentOrd);
scratch.copyBytes(values.lookupOrd(currentOrd));
return SeekStatus.NOT_FOUND;
}
}
@ -64,7 +62,6 @@ class SortedDocValuesTermsEnum extends TermsEnum {
if (ord >= 0) {
currentOrd = ord;
scratch.copyBytes(text);
term = scratch.get();
return true;
} else {
return false;
@ -75,7 +72,7 @@ class SortedDocValuesTermsEnum extends TermsEnum {
public void seekExact(long ord) throws IOException {
assert ord >= 0 && ord < values.getValueCount();
currentOrd = (int) ord;
term = values.lookupOrd(currentOrd);
scratch.copyBytes(values.lookupOrd(currentOrd));
}
@Override
@ -84,13 +81,13 @@ class SortedDocValuesTermsEnum extends TermsEnum {
if (currentOrd >= values.getValueCount()) {
return null;
}
term = values.lookupOrd(currentOrd);
return term;
scratch.copyBytes(values.lookupOrd(currentOrd));
return scratch.get();
}
@Override
public BytesRef term() throws IOException {
return term;
return scratch.get();
}
@Override

View File

@ -29,7 +29,6 @@ import org.apache.lucene.util.BytesRefBuilder;
class SortedSetDocValuesTermsEnum extends TermsEnum {
private final SortedSetDocValues values;
private long currentOrd = -1;
private BytesRef term;
private final BytesRefBuilder scratch;
/** Creates a new TermsEnum over the provided values */
@ -44,7 +43,6 @@ class SortedSetDocValuesTermsEnum extends TermsEnum {
if (ord >= 0) {
currentOrd = ord;
scratch.copyBytes(text);
term = scratch.get();
return SeekStatus.FOUND;
} else {
currentOrd = -ord-1;
@ -52,7 +50,7 @@ class SortedSetDocValuesTermsEnum extends TermsEnum {
return SeekStatus.END;
} else {
// TODO: hmm can we avoid this "extra" lookup?:
term = values.lookupOrd(currentOrd);
scratch.copyBytes(values.lookupOrd(currentOrd));
return SeekStatus.NOT_FOUND;
}
}
@ -64,7 +62,6 @@ class SortedSetDocValuesTermsEnum extends TermsEnum {
if (ord >= 0) {
currentOrd = ord;
scratch.copyBytes(text);
term = scratch.get();
return true;
} else {
return false;
@ -75,7 +72,7 @@ class SortedSetDocValuesTermsEnum extends TermsEnum {
public void seekExact(long ord) throws IOException {
assert ord >= 0 && ord < values.getValueCount();
currentOrd = (int) ord;
term = values.lookupOrd(currentOrd);
scratch.copyBytes(values.lookupOrd(currentOrd));
}
@Override
@ -84,13 +81,13 @@ class SortedSetDocValuesTermsEnum extends TermsEnum {
if (currentOrd >= values.getValueCount()) {
return null;
}
term = values.lookupOrd(currentOrd);
return term;
scratch.copyBytes(values.lookupOrd(currentOrd));
return scratch.get();
}
@Override
public BytesRef term() throws IOException {
return term;
return scratch.get();
}
@Override

View File

@ -37,6 +37,7 @@ import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.SerialMergeScheduler;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
@ -184,10 +185,13 @@ public class TestLucene50DocValuesFormat extends BaseCompressingDocValuesFormatT
LeafReader r = context.reader();
Terms terms = r.terms("indexed");
if (terms != null) {
assertEquals(terms.size(), r.getSortedSetDocValues("dv").getValueCount());
SortedSetDocValues ssdv = r.getSortedSetDocValues("dv");
assertEquals(terms.size(), ssdv.getValueCount());
TermsEnum expected = terms.iterator(null);
TermsEnum actual = r.getSortedSetDocValues("dv").termsEnum();
assertEquals(terms.size(), expected, actual);
doTestSortedSetEnumAdvanceIndependently(ssdv);
}
}
ir.close();

View File

@ -26,6 +26,7 @@ import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
@ -57,6 +58,7 @@ import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.BytesRefHash;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.TestUtil;
@ -2945,6 +2947,142 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
directory.close();
}
public void testSortedEnumAdvanceIndependently() throws IOException {
Directory directory = newDirectory();
Analyzer analyzer = new MockAnalyzer(random());
IndexWriterConfig iwconfig = newIndexWriterConfig(analyzer);
iwconfig.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
Document doc = new Document();
SortedDocValuesField field = new SortedDocValuesField("field", new BytesRef("2"));
doc.add(field);
iwriter.addDocument(doc);
field.setBytesValue(new BytesRef("1"));
iwriter.addDocument(doc);
field.setBytesValue(new BytesRef("3"));
iwriter.addDocument(doc);
iwriter.commit();
iwriter.forceMerge(1);
DirectoryReader ireader = iwriter.getReader();
iwriter.close();
SortedDocValues dv = getOnlySegmentReader(ireader).getSortedDocValues("field");
doTestSortedSetEnumAdvanceIndependently(DocValues.singleton(dv));
ireader.close();
directory.close();
}
public void testSortedSetEnumAdvanceIndependently() throws IOException {
Directory directory = newDirectory();
Analyzer analyzer = new MockAnalyzer(random());
IndexWriterConfig iwconfig = newIndexWriterConfig(analyzer);
iwconfig.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
Document doc = new Document();
SortedSetDocValuesField field1 = new SortedSetDocValuesField("field", new BytesRef("2"));
SortedSetDocValuesField field2 = new SortedSetDocValuesField("field", new BytesRef("3"));
doc.add(field1);
doc.add(field2);
iwriter.addDocument(doc);
field1.setBytesValue(new BytesRef("1"));
iwriter.addDocument(doc);
field2.setBytesValue(new BytesRef("2"));
iwriter.addDocument(doc);
iwriter.commit();
iwriter.forceMerge(1);
DirectoryReader ireader = iwriter.getReader();
iwriter.close();
SortedSetDocValues dv = getOnlySegmentReader(ireader).getSortedSetDocValues("field");
doTestSortedSetEnumAdvanceIndependently(dv);
ireader.close();
directory.close();
}
protected void doTestSortedSetEnumAdvanceIndependently(SortedSetDocValues dv) throws IOException {
if (dv.getValueCount() < 2) {
return;
}
List<BytesRef> terms = new ArrayList<>();
TermsEnum te = dv.termsEnum();
terms.add(BytesRef.deepCopyOf(te.next()));
terms.add(BytesRef.deepCopyOf(te.next()));
// Make sure that calls to next() does not modify the term of the other enum
TermsEnum enum1 = dv.termsEnum();
TermsEnum enum2 = dv.termsEnum();
BytesRefBuilder term1 = new BytesRefBuilder();
BytesRefBuilder term2 = new BytesRefBuilder();
term1.copyBytes(enum1.next());
term2.copyBytes(enum2.next());
term1.copyBytes(enum1.next());
assertEquals(term1.get(), enum1.term());
assertEquals(term2.get(), enum2.term());
// Same for seekCeil
enum1 = dv.termsEnum();
enum2 = dv.termsEnum();
term1 = new BytesRefBuilder();
term2 = new BytesRefBuilder();
term2.copyBytes(enum2.next());
BytesRefBuilder seekTerm = new BytesRefBuilder();
seekTerm.append(terms.get(0));
seekTerm.append((byte) 0);
enum1.seekCeil(seekTerm.get());
term1.copyBytes(enum1.term());
assertEquals(term1.get(), enum1.term());
assertEquals(term2.get(), enum2.term());
// Same for seekCeil on an exact value
enum1 = dv.termsEnum();
enum2 = dv.termsEnum();
term1 = new BytesRefBuilder();
term2 = new BytesRefBuilder();
term2.copyBytes(enum2.next());
enum1.seekCeil(terms.get(1));
term1.copyBytes(enum1.term());
assertEquals(term1.get(), enum1.term());
assertEquals(term2.get(), enum2.term());
// Same for seekExact
enum1 = dv.termsEnum();
enum2 = dv.termsEnum();
term1 = new BytesRefBuilder();
term2 = new BytesRefBuilder();
term2.copyBytes(enum2.next());
final boolean found = enum1.seekExact(terms.get(1));
assertTrue(found);
term1.copyBytes(enum1.term());
// Same for seek by ord
enum1 = dv.termsEnum();
enum2 = dv.termsEnum();
term1 = new BytesRefBuilder();
term2 = new BytesRefBuilder();
term2.copyBytes(enum2.next());
enum1.seekExact(1);
term1.copyBytes(enum1.term());
assertEquals(term1.get(), enum1.term());
assertEquals(term2.get(), enum2.term());
}
protected boolean codecAcceptsHugeBinaryValues(String field) {
return true;
}
@ -2964,4 +3102,5 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
protected boolean codecSupportsSortedNumeric() {
return true;
}
}