mirror of https://github.com/apache/lucene.git
add hack to begin incremental cutover
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene4765@1444835 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
b4893ceae2
commit
75b1609b8b
|
@ -847,4 +847,67 @@ public class DocTermOrds {
|
|||
termsEnum.seekExact(ord);
|
||||
return termsEnum.term();
|
||||
}
|
||||
|
||||
/** Returns a SortedSetDocValues view of this instance */
|
||||
public SortedSetDocValues iterator(TermsEnum termsEnum) throws IOException {
|
||||
return new Iterator(termsEnum);
|
||||
}
|
||||
|
||||
// nocommit: make private (just public to enable hack to cutover gradually)
|
||||
public class Iterator extends SortedSetDocValues {
|
||||
final TermsEnum te;
|
||||
final TermOrdsIterator in = new TermOrdsIterator(); // nocommit: don't wrap this other iterator
|
||||
final int buffer[] = new int[5];
|
||||
int bufferUpto;
|
||||
int bufferLength;
|
||||
|
||||
Iterator(TermsEnum te) {
|
||||
this.te = te;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long nextOrd() {
|
||||
while (bufferUpto == bufferLength) {
|
||||
if (bufferLength < buffer.length) {
|
||||
return NO_MORE_ORDS;
|
||||
} else {
|
||||
bufferLength = in.read(buffer);
|
||||
bufferUpto = 0;
|
||||
}
|
||||
}
|
||||
int next = buffer[bufferUpto];
|
||||
bufferUpto++;
|
||||
return next;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setDocument(int docID) {
|
||||
in.reset(docID);
|
||||
bufferUpto = 0;
|
||||
bufferLength = in.read(buffer);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void lookupOrd(long ord, BytesRef result) {
|
||||
BytesRef ref = null;
|
||||
try {
|
||||
ref = DocTermOrds.this.lookupTerm(te, (int) ord);
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
result.bytes = ref.bytes;
|
||||
result.offset = ref.offset;
|
||||
result.length = ref.length;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getValueCount() {
|
||||
return numTerms();
|
||||
}
|
||||
|
||||
// nocommit: just a hack for gradual cutover
|
||||
public DocTermOrds getParent() {
|
||||
return DocTermOrds.this;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -29,6 +29,7 @@ import org.apache.lucene.index.AtomicReader;
|
|||
import org.apache.lucene.index.BinaryDocValues;
|
||||
import org.apache.lucene.index.DocTermOrds;
|
||||
import org.apache.lucene.index.SortedDocValues;
|
||||
import org.apache.lucene.index.SortedSetDocValues;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
@ -602,7 +603,7 @@ public interface FieldCache {
|
|||
* @return a {@link DocTermOrds} instance
|
||||
* @throws IOException If any error occurs.
|
||||
*/
|
||||
public DocTermOrds getDocTermOrds(AtomicReader reader, String field) throws IOException;
|
||||
public SortedSetDocValues getDocTermOrds(AtomicReader reader, String field) throws IOException;
|
||||
|
||||
/**
|
||||
* EXPERT: A unique Identifier/Description for each item in the FieldCache.
|
||||
|
|
|
@ -34,6 +34,7 @@ import org.apache.lucene.index.IndexReader;
|
|||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.index.SegmentReader;
|
||||
import org.apache.lucene.index.SortedDocValues;
|
||||
import org.apache.lucene.index.SortedSetDocValues;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
|
@ -1303,8 +1304,17 @@ class FieldCacheImpl implements FieldCache {
|
|||
}
|
||||
}
|
||||
|
||||
public DocTermOrds getDocTermOrds(AtomicReader reader, String field) throws IOException {
|
||||
return (DocTermOrds) caches.get(DocTermOrds.class).get(reader, new CacheKey(field, null), false);
|
||||
public SortedSetDocValues getDocTermOrds(AtomicReader reader, String field) throws IOException {
|
||||
SortedSetDocValues dv = reader.getSortedSetDocValues(field);
|
||||
if (dv != null) {
|
||||
return dv;
|
||||
}
|
||||
|
||||
// nocommit: actually if they have a SortedDV (either indexed as DV or cached), we should return an impl
|
||||
// over that: its like a specialized single-value case of this thing...
|
||||
|
||||
DocTermOrds dto = (DocTermOrds) caches.get(DocTermOrds.class).get(reader, new CacheKey(field, null), false);
|
||||
return dto.iterator(dto.getOrdTermsEnum(reader));
|
||||
}
|
||||
|
||||
static final class DocTermOrdsCache extends Cache {
|
||||
|
|
|
@ -253,44 +253,29 @@ public class TestFieldCache extends LuceneTestCase {
|
|||
terms = cache.getTerms(reader, "bogusfield");
|
||||
|
||||
// getDocTermOrds
|
||||
DocTermOrds termOrds = cache.getDocTermOrds(reader, "theRandomUnicodeMultiValuedField");
|
||||
TermsEnum termsEnum = termOrds.getOrdTermsEnum(reader);
|
||||
assertSame("Second request to cache return same DocTermOrds", termOrds, cache.getDocTermOrds(reader, "theRandomUnicodeMultiValuedField"));
|
||||
DocTermOrds.TermOrdsIterator reuse = null;
|
||||
SortedSetDocValues termOrds = cache.getDocTermOrds(reader, "theRandomUnicodeMultiValuedField");
|
||||
// nocommit: test this with reflection or something, that its really from the same DTO
|
||||
// assertSame("Second request to cache return same DocTermOrds", termOrds, cache.getDocTermOrds(reader, "theRandomUnicodeMultiValuedField"));
|
||||
for (int i = 0; i < NUM_DOCS; i++) {
|
||||
reuse = termOrds.lookup(i, reuse);
|
||||
final int[] buffer = new int[5];
|
||||
termOrds.setDocument(i);
|
||||
// This will remove identical terms. A DocTermOrds doesn't return duplicate ords for a docId
|
||||
List<BytesRef> values = new ArrayList<BytesRef>(new LinkedHashSet<BytesRef>(Arrays.asList(multiValued[i])));
|
||||
for (;;) {
|
||||
int chunk = reuse.read(buffer);
|
||||
if (chunk == 0) {
|
||||
for (int ord = 0; ord < values.size(); ord++) {
|
||||
BytesRef term = values.get(ord);
|
||||
assertNull(String.format(Locale.ROOT, "Document[%d] misses field must be null. Has value %s for ord %d", i, term, ord), term);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
for(int idx=0; idx < chunk; idx++) {
|
||||
int key = buffer[idx];
|
||||
termsEnum.seekExact((long) key);
|
||||
String actual = termsEnum.term().utf8ToString();
|
||||
String expected = values.get(idx).utf8ToString();
|
||||
if (!expected.equals(actual)) {
|
||||
reuse = termOrds.lookup(i, reuse);
|
||||
reuse.read(buffer);
|
||||
}
|
||||
assertTrue(String.format(Locale.ROOT, "Expected value %s for doc %d and ord %d, but was %s", expected, i, idx, actual), expected.equals(actual));
|
||||
}
|
||||
|
||||
if (chunk <= buffer.length) {
|
||||
for (BytesRef v : values) {
|
||||
if (v == null) {
|
||||
// why does this test use null values... instead of an empty list: confusing
|
||||
break;
|
||||
}
|
||||
long ord = termOrds.nextOrd();
|
||||
assert ord != SortedSetDocValues.NO_MORE_ORDS;
|
||||
BytesRef scratch = new BytesRef();
|
||||
termOrds.lookupOrd(ord, scratch);
|
||||
assertEquals(v, scratch);
|
||||
}
|
||||
assertEquals(SortedSetDocValues.NO_MORE_ORDS, termOrds.nextOrd());
|
||||
}
|
||||
|
||||
// test bad field
|
||||
// nocommit: what exactly does this test?
|
||||
termOrds = cache.getDocTermOrds(reader, "bogusfield");
|
||||
|
||||
FieldCache.DEFAULT.purge(reader);
|
||||
|
|
|
@ -283,7 +283,9 @@ public abstract class TermGroupFacetCollector extends AbstractGroupFacetCollecto
|
|||
|
||||
reuse = null;
|
||||
groupFieldTermsIndex = FieldCache.DEFAULT.getTermsIndex(context.reader(), groupField);
|
||||
facetFieldDocTermOrds = FieldCache.DEFAULT.getDocTermOrds(context.reader(), facetField);
|
||||
// nocommit: cut over
|
||||
DocTermOrds.Iterator iterator = (DocTermOrds.Iterator) FieldCache.DEFAULT.getDocTermOrds(context.reader(), facetField);
|
||||
facetFieldDocTermOrds = iterator.getParent();
|
||||
facetOrdTermsEnum = facetFieldDocTermOrds.getOrdTermsEnum(context.reader());
|
||||
// [facetFieldDocTermOrds.numTerms() + 1] for all possible facet values and docs not containing facet field
|
||||
segmentFacetCounts = new int[facetFieldDocTermOrds.numTerms() + 1];
|
||||
|
|
|
@ -100,7 +100,9 @@ abstract class TermsCollector extends Collector {
|
|||
|
||||
@Override
|
||||
public void setNextReader(AtomicReaderContext context) throws IOException {
|
||||
docTermOrds = FieldCache.DEFAULT.getDocTermOrds(context.reader(), field);
|
||||
// nocommit: cut over
|
||||
DocTermOrds.Iterator iterator = (DocTermOrds.Iterator) FieldCache.DEFAULT.getDocTermOrds(context.reader(), field);
|
||||
docTermOrds = iterator.getParent();
|
||||
docTermsEnum = docTermOrds.getOrdTermsEnum(context.reader());
|
||||
reuse = null; // LUCENE-3377 needs to be fixed first then this statement can be removed...
|
||||
}
|
||||
|
|
|
@ -234,7 +234,9 @@ abstract class TermsWithScoreCollector extends Collector {
|
|||
|
||||
@Override
|
||||
public void setNextReader(AtomicReaderContext context) throws IOException {
|
||||
fromDocTermOrds = FieldCache.DEFAULT.getDocTermOrds(context.reader(), field);
|
||||
// nocommit: cut over
|
||||
DocTermOrds.Iterator iterator = (DocTermOrds.Iterator) FieldCache.DEFAULT.getDocTermOrds(context.reader(), field);
|
||||
fromDocTermOrds = iterator.getParent();
|
||||
docTermsEnum = fromDocTermOrds.getOrdTermsEnum(context.reader());
|
||||
reuse = null; // LUCENE-3377 needs to be fixed first then this statement can be removed...
|
||||
}
|
||||
|
|
|
@ -505,7 +505,9 @@ public class TestJoinUtil extends LuceneTestCase {
|
|||
|
||||
@Override
|
||||
public void setNextReader(AtomicReaderContext context) throws IOException {
|
||||
docTermOrds = FieldCache.DEFAULT.getDocTermOrds(context.reader(), fromField);
|
||||
// nocommit: cut over
|
||||
DocTermOrds.Iterator iterator = (DocTermOrds.Iterator) FieldCache.DEFAULT.getDocTermOrds(context.reader(), fromField);
|
||||
docTermOrds = iterator.getParent();
|
||||
docTermsEnum = docTermOrds.getOrdTermsEnum(context.reader());
|
||||
reuse = null;
|
||||
}
|
||||
|
@ -629,7 +631,8 @@ public class TestJoinUtil extends LuceneTestCase {
|
|||
@Override
|
||||
public void setNextReader(AtomicReaderContext context) throws IOException {
|
||||
docBase = context.docBase;
|
||||
docTermOrds = FieldCache.DEFAULT.getDocTermOrds(context.reader(), toField);
|
||||
DocTermOrds.Iterator iterator = (DocTermOrds.Iterator) FieldCache.DEFAULT.getDocTermOrds(context.reader(), toField);
|
||||
docTermOrds = iterator.getParent();
|
||||
docTermsEnum = docTermOrds.getOrdTermsEnum(context.reader());
|
||||
reuse = null;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue