clear some nocommits and clean up sortedbytesmerge a bit

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene4547@1441005 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2013-01-31 15:07:51 +00:00
parent 8a2a727873
commit 5b38004a55
5 changed files with 24 additions and 51 deletions

View File

@ -309,8 +309,8 @@ public abstract class DocValuesConsumer implements Closeable {
} }
} }
// nocommit we can unload the bits to disk to reduce // TODO: we can unload the bits/packed ints to disk to reduce
// transient ram spike... // transient ram spike... most of these just require iterators
} }
// Second pass: merge only the live terms // Second pass: merge only the live terms
@ -337,7 +337,11 @@ public abstract class DocValuesConsumer implements Closeable {
lastOrds[readerId] = sourceOrd; lastOrds[readerId] = sourceOrd;
top.ordDeltas.add(delta); top.ordDeltas.add(delta);
lastTerm = BytesRef.deepCopyOf(top.scratch); if (lastTerm == null) {
lastTerm = BytesRef.deepCopyOf(top.scratch);
} else {
lastTerm.copyBytes(top.scratch);
}
ord++; ord++;
} }
@ -360,28 +364,6 @@ public abstract class DocValuesConsumer implements Closeable {
state.liveTerms = null; state.liveTerms = null;
} }
} }
/*
public void finish(SortedDocValuesConsumer consumer) throws IOException {
// Third pass: write merged result
for(BytesRef term : mergedTerms) {
consumer.addValue(term);
}
for(SegmentState segState : segStates) {
Bits liveDocs = segState.reader.getLiveDocs();
int maxDoc = segState.reader.maxDoc();
for(int docID=0;docID<maxDoc;docID++) {
if (liveDocs == null || liveDocs.get(docID)) {
int segOrd = segState.values.getOrd(docID);
int mergedOrd = segState.segOrdToMergedOrd[segOrd];
consumer.addDoc(mergedOrd);
}
}
}
}
*/
} }
/** /**
@ -472,7 +454,7 @@ public abstract class DocValuesConsumer implements Closeable {
} }
assert nextIsSet; assert nextIsSet;
nextIsSet = false; nextIsSet = false;
// nocommit make a mutable number // TODO make a mutable number
return nextValue; return nextValue;
} }

View File

@ -763,8 +763,6 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
private FieldNumbers getFieldNumberMap() throws IOException { private FieldNumbers getFieldNumberMap() throws IOException {
final FieldNumbers map = new FieldNumbers(); final FieldNumbers map = new FieldNumbers();
// nocommit for a 4.0 index that has inconsistent DV
// types ... this will throw exc on init of IW?
for(SegmentInfoPerCommit info : segmentInfos) { for(SegmentInfoPerCommit info : segmentInfos) {
for(FieldInfo fi : getFieldInfos(info.info)) { for(FieldInfo fi : getFieldInfos(info.info)) {
map.addOrGet(fi.name, fi.number, fi.getDocValuesType()); map.addOrGet(fi.name, fi.number, fi.getDocValuesType());

View File

@ -30,7 +30,8 @@ import org.apache.lucene.store.IndexInput;
* *
* @lucene.internal * @lucene.internal
**/ **/
// nocommit: make this simply a big ass array and nothing more. // TODO: refactor this, byteblockpool, fst.bytestore, and any
// other "shift/mask big arrays". there are too many of these classes!
public final class PagedBytes { public final class PagedBytes {
private final List<byte[]> blocks = new ArrayList<byte[]>(); private final List<byte[]> blocks = new ArrayList<byte[]>();
private final List<Integer> blockEnd = new ArrayList<Integer>(); private final List<Integer> blockEnd = new ArrayList<Integer>();
@ -106,7 +107,7 @@ public final class PagedBytes {
* *
* @lucene.internal * @lucene.internal
**/ **/
// nocommit: move this shit and any other vint bogusness to fieldcacheimpl! // TODO: this really needs to be refactored into fieldcacheimpl
public void fill(BytesRef b, long start) { public void fill(BytesRef b, long start) {
final int index = (int) (start >> blockBits); final int index = (int) (start >> blockBits);
final int offset = (int) (start & blockMask); final int offset = (int) (start & blockMask);
@ -217,7 +218,7 @@ public final class PagedBytes {
/** Copy bytes in, writing the length as a 1 or 2 byte /** Copy bytes in, writing the length as a 1 or 2 byte
* vInt prefix. */ * vInt prefix. */
// nocommit: move this shit and any other vint bogusness to fieldcacheimpl! // TODO: this really needs to be refactored into fieldcacheimpl!
public long copyUsingLengthPrefix(BytesRef bytes) { public long copyUsingLengthPrefix(BytesRef bytes) {
if (bytes.length >= 32768) { if (bytes.length >= 32768) {
throw new IllegalArgumentException("max length is 32767 (got " + bytes.length + ")"); throw new IllegalArgumentException("max length is 32767 (got " + bytes.length + ")");

View File

@ -65,8 +65,8 @@ public class TestPerFieldDocValuesFormat extends BaseDocValuesFormatTestCase {
} }
// just a simple trivial test // just a simple trivial test
// nocommit: if we are going to pass down suffixes to segmentread/writestate, // TODO: we should come up with a test that somehow checks that segment suffix
// then they should be respected by *all* codec apis! // is respected by all codec apis (not just docvalues and postings)
public void testTwoFieldsTwoFormats() throws IOException { public void testTwoFieldsTwoFormats() throws IOException {
Analyzer analyzer = new MockAnalyzer(random()); Analyzer analyzer = new MockAnalyzer(random());

View File

@ -17,13 +17,12 @@ package org.apache.lucene.index;
* limitations under the License. * limitations under the License.
*/ */
import java.util.Random;
import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.document.SortedDocValuesField; import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.store.BaseDirectoryWrapper; import org.apache.lucene.store.BaseDirectoryWrapper;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.ByteArrayDataOutput;
import org.apache.lucene.store.MockDirectoryWrapper; import org.apache.lucene.store.MockDirectoryWrapper;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase;
@ -34,7 +33,7 @@ import org.junit.Ignore;
import com.carrotsearch.randomizedtesting.annotations.TimeoutSuite; import com.carrotsearch.randomizedtesting.annotations.TimeoutSuite;
@TimeoutSuite(millis = 80 * TimeUnits.HOUR) @TimeoutSuite(millis = 80 * TimeUnits.HOUR)
@Ignore("takes ?? minutes") @Ignore("very slow")
public class Test2BSortedDocValues extends LuceneTestCase { public class Test2BSortedDocValues extends LuceneTestCase {
// indexes Integer.MAX_VALUE docs with a fixed binary field // indexes Integer.MAX_VALUE docs with a fixed binary field
@ -94,10 +93,7 @@ public class Test2BSortedDocValues extends LuceneTestCase {
} }
// indexes Integer.MAX_VALUE docs with a fixed binary field // indexes Integer.MAX_VALUE docs with a fixed binary field
// nocommit: this must be some kind of worst case for BytesRefHash / its hash fn... // TODO: must use random.nextBytes (like Test2BTerms) to avoid BytesRefHash probing issues
// or there is some other perf bug...VERY slow!
// if you cut this test to use random.nextBytes its much faster, but still quite slow...
// and its not unrealistic for users to index something thats already in sorted order?
public void test2BOrds() throws Exception { public void test2BOrds() throws Exception {
BaseDirectoryWrapper dir = newFSDirectory(_TestUtil.getTempDir("2BOrds")); BaseDirectoryWrapper dir = newFSDirectory(_TestUtil.getTempDir("2BOrds"));
if (dir instanceof MockDirectoryWrapper) { if (dir instanceof MockDirectoryWrapper) {
@ -118,11 +114,11 @@ public class Test2BSortedDocValues extends LuceneTestCase {
SortedDocValuesField dvField = new SortedDocValuesField("dv", data); SortedDocValuesField dvField = new SortedDocValuesField("dv", data);
doc.add(dvField); doc.add(dvField);
long seed = random().nextLong();
Random random = new Random(seed);
for (int i = 0; i < Integer.MAX_VALUE; i++) { for (int i = 0; i < Integer.MAX_VALUE; i++) {
bytes[0] = (byte)(i >> 24); random.nextBytes(bytes);
bytes[1] = (byte)(i >> 16);
bytes[2] = (byte)(i >> 8);
bytes[3] = (byte) i;
w.addDocument(doc); w.addDocument(doc);
if (i % 100000 == 0) { if (i % 100000 == 0) {
System.out.println("indexed: " + i); System.out.println("indexed: " + i);
@ -137,19 +133,15 @@ public class Test2BSortedDocValues extends LuceneTestCase {
System.out.flush(); System.out.flush();
DirectoryReader r = DirectoryReader.open(dir); DirectoryReader r = DirectoryReader.open(dir);
int expectedValue = 0; random.setSeed(seed);
for (AtomicReaderContext context : r.leaves()) { for (AtomicReaderContext context : r.leaves()) {
AtomicReader reader = context.reader(); AtomicReader reader = context.reader();
BytesRef scratch = new BytesRef(); BytesRef scratch = new BytesRef();
BinaryDocValues dv = reader.getSortedDocValues("dv"); BinaryDocValues dv = reader.getSortedDocValues("dv");
for (int i = 0; i < reader.maxDoc(); i++) { for (int i = 0; i < reader.maxDoc(); i++) {
bytes[0] = (byte)(expectedValue >> 24); random.nextBytes(bytes);
bytes[1] = (byte)(expectedValue >> 16);
bytes[2] = (byte)(expectedValue >> 8);
bytes[3] = (byte) expectedValue;
dv.get(i, scratch); dv.get(i, scratch);
assertEquals(data, scratch); assertEquals(data, scratch);
expectedValue++;
} }
} }