add hack to begin incremental cutover

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene4765@1444835 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2013-02-11 15:43:24 +00:00
parent b4893ceae2
commit 75b1609b8b
8 changed files with 105 additions and 37 deletions

View File

@ -847,4 +847,67 @@ public class DocTermOrds {
termsEnum.seekExact(ord);
return termsEnum.term();
}
/** Returns a SortedSetDocValues view of this instance */
public SortedSetDocValues iterator(TermsEnum termsEnum) throws IOException {
return new Iterator(termsEnum);
}
// nocommit: make private (just public to enable hack to cutover gradually)
public class Iterator extends SortedSetDocValues {
final TermsEnum te;
final TermOrdsIterator in = new TermOrdsIterator(); // nocommit: don't wrap this other iterator
final int buffer[] = new int[5];
int bufferUpto;
int bufferLength;
Iterator(TermsEnum te) {
this.te = te;
}
@Override
public long nextOrd() {
while (bufferUpto == bufferLength) {
if (bufferLength < buffer.length) {
return NO_MORE_ORDS;
} else {
bufferLength = in.read(buffer);
bufferUpto = 0;
}
}
int next = buffer[bufferUpto];
bufferUpto++;
return next;
}
@Override
public void setDocument(int docID) {
in.reset(docID);
bufferUpto = 0;
bufferLength = in.read(buffer);
}
@Override
public void lookupOrd(long ord, BytesRef result) {
BytesRef ref = null;
try {
ref = DocTermOrds.this.lookupTerm(te, (int) ord);
} catch (IOException e) {
throw new RuntimeException(e);
}
result.bytes = ref.bytes;
result.offset = ref.offset;
result.length = ref.length;
}
@Override
public long getValueCount() {
return numTerms();
}
// nocommit: just a hack for gradual cutover
public DocTermOrds getParent() {
return DocTermOrds.this;
}
}
}

View File

@ -29,6 +29,7 @@ import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DocTermOrds;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.Bits;
@ -602,7 +603,7 @@ public interface FieldCache {
* @return a {@link DocTermOrds} instance
* @throws IOException If any error occurs.
*/
public DocTermOrds getDocTermOrds(AtomicReader reader, String field) throws IOException;
public SortedSetDocValues getDocTermOrds(AtomicReader reader, String field) throws IOException;
/**
* EXPERT: A unique Identifier/Description for each item in the FieldCache.

View File

@ -34,6 +34,7 @@ import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SegmentReader;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.ArrayUtil;
@ -1303,8 +1304,17 @@ class FieldCacheImpl implements FieldCache {
}
}
public DocTermOrds getDocTermOrds(AtomicReader reader, String field) throws IOException {
return (DocTermOrds) caches.get(DocTermOrds.class).get(reader, new CacheKey(field, null), false);
public SortedSetDocValues getDocTermOrds(AtomicReader reader, String field) throws IOException {
SortedSetDocValues dv = reader.getSortedSetDocValues(field);
if (dv != null) {
return dv;
}
// nocommit: actually if they have a SortedDV (either indexed as DV or cached), we should return an impl
// over that: its like a specialized single-value case of this thing...
DocTermOrds dto = (DocTermOrds) caches.get(DocTermOrds.class).get(reader, new CacheKey(field, null), false);
return dto.iterator(dto.getOrdTermsEnum(reader));
}
static final class DocTermOrdsCache extends Cache {

View File

@ -253,44 +253,29 @@ public class TestFieldCache extends LuceneTestCase {
terms = cache.getTerms(reader, "bogusfield");
// getDocTermOrds
DocTermOrds termOrds = cache.getDocTermOrds(reader, "theRandomUnicodeMultiValuedField");
TermsEnum termsEnum = termOrds.getOrdTermsEnum(reader);
assertSame("Second request to cache return same DocTermOrds", termOrds, cache.getDocTermOrds(reader, "theRandomUnicodeMultiValuedField"));
DocTermOrds.TermOrdsIterator reuse = null;
SortedSetDocValues termOrds = cache.getDocTermOrds(reader, "theRandomUnicodeMultiValuedField");
// nocommit: test this with reflection or something, that its really from the same DTO
// assertSame("Second request to cache return same DocTermOrds", termOrds, cache.getDocTermOrds(reader, "theRandomUnicodeMultiValuedField"));
for (int i = 0; i < NUM_DOCS; i++) {
reuse = termOrds.lookup(i, reuse);
final int[] buffer = new int[5];
termOrds.setDocument(i);
// This will remove identical terms. A DocTermOrds doesn't return duplicate ords for a docId
List<BytesRef> values = new ArrayList<BytesRef>(new LinkedHashSet<BytesRef>(Arrays.asList(multiValued[i])));
for (;;) {
int chunk = reuse.read(buffer);
if (chunk == 0) {
for (int ord = 0; ord < values.size(); ord++) {
BytesRef term = values.get(ord);
assertNull(String.format(Locale.ROOT, "Document[%d] misses field must be null. Has value %s for ord %d", i, term, ord), term);
}
break;
}
for(int idx=0; idx < chunk; idx++) {
int key = buffer[idx];
termsEnum.seekExact((long) key);
String actual = termsEnum.term().utf8ToString();
String expected = values.get(idx).utf8ToString();
if (!expected.equals(actual)) {
reuse = termOrds.lookup(i, reuse);
reuse.read(buffer);
}
assertTrue(String.format(Locale.ROOT, "Expected value %s for doc %d and ord %d, but was %s", expected, i, idx, actual), expected.equals(actual));
}
if (chunk <= buffer.length) {
for (BytesRef v : values) {
if (v == null) {
// why does this test use null values... instead of an empty list: confusing
break;
}
long ord = termOrds.nextOrd();
assert ord != SortedSetDocValues.NO_MORE_ORDS;
BytesRef scratch = new BytesRef();
termOrds.lookupOrd(ord, scratch);
assertEquals(v, scratch);
}
assertEquals(SortedSetDocValues.NO_MORE_ORDS, termOrds.nextOrd());
}
// test bad field
// nocommit: what exactly does this test?
termOrds = cache.getDocTermOrds(reader, "bogusfield");
FieldCache.DEFAULT.purge(reader);

View File

@ -283,7 +283,9 @@ public abstract class TermGroupFacetCollector extends AbstractGroupFacetCollecto
reuse = null;
groupFieldTermsIndex = FieldCache.DEFAULT.getTermsIndex(context.reader(), groupField);
facetFieldDocTermOrds = FieldCache.DEFAULT.getDocTermOrds(context.reader(), facetField);
// nocommit: cut over
DocTermOrds.Iterator iterator = (DocTermOrds.Iterator) FieldCache.DEFAULT.getDocTermOrds(context.reader(), facetField);
facetFieldDocTermOrds = iterator.getParent();
facetOrdTermsEnum = facetFieldDocTermOrds.getOrdTermsEnum(context.reader());
// [facetFieldDocTermOrds.numTerms() + 1] for all possible facet values and docs not containing facet field
segmentFacetCounts = new int[facetFieldDocTermOrds.numTerms() + 1];

View File

@ -100,7 +100,9 @@ abstract class TermsCollector extends Collector {
@Override
public void setNextReader(AtomicReaderContext context) throws IOException {
docTermOrds = FieldCache.DEFAULT.getDocTermOrds(context.reader(), field);
// nocommit: cut over
DocTermOrds.Iterator iterator = (DocTermOrds.Iterator) FieldCache.DEFAULT.getDocTermOrds(context.reader(), field);
docTermOrds = iterator.getParent();
docTermsEnum = docTermOrds.getOrdTermsEnum(context.reader());
reuse = null; // LUCENE-3377 needs to be fixed first then this statement can be removed...
}

View File

@ -234,7 +234,9 @@ abstract class TermsWithScoreCollector extends Collector {
@Override
public void setNextReader(AtomicReaderContext context) throws IOException {
fromDocTermOrds = FieldCache.DEFAULT.getDocTermOrds(context.reader(), field);
// nocommit: cut over
DocTermOrds.Iterator iterator = (DocTermOrds.Iterator) FieldCache.DEFAULT.getDocTermOrds(context.reader(), field);
fromDocTermOrds = iterator.getParent();
docTermsEnum = fromDocTermOrds.getOrdTermsEnum(context.reader());
reuse = null; // LUCENE-3377 needs to be fixed first then this statement can be removed...
}

View File

@ -505,7 +505,9 @@ public class TestJoinUtil extends LuceneTestCase {
@Override
public void setNextReader(AtomicReaderContext context) throws IOException {
docTermOrds = FieldCache.DEFAULT.getDocTermOrds(context.reader(), fromField);
// nocommit: cut over
DocTermOrds.Iterator iterator = (DocTermOrds.Iterator) FieldCache.DEFAULT.getDocTermOrds(context.reader(), fromField);
docTermOrds = iterator.getParent();
docTermsEnum = docTermOrds.getOrdTermsEnum(context.reader());
reuse = null;
}
@ -629,7 +631,8 @@ public class TestJoinUtil extends LuceneTestCase {
@Override
public void setNextReader(AtomicReaderContext context) throws IOException {
docBase = context.docBase;
docTermOrds = FieldCache.DEFAULT.getDocTermOrds(context.reader(), toField);
DocTermOrds.Iterator iterator = (DocTermOrds.Iterator) FieldCache.DEFAULT.getDocTermOrds(context.reader(), toField);
docTermOrds = iterator.getParent();
docTermsEnum = docTermOrds.getOrdTermsEnum(context.reader());
reuse = null;
}