mirror of https://github.com/apache/lucene.git
clear some nocommits and clean up sortedbytesmerge a bit
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene4547@1441005 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
8a2a727873
commit
5b38004a55
|
@ -309,8 +309,8 @@ public abstract class DocValuesConsumer implements Closeable {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// nocommit we can unload the bits to disk to reduce
|
// TODO: we can unload the bits/packed ints to disk to reduce
|
||||||
// transient ram spike...
|
// transient ram spike... most of these just require iterators
|
||||||
}
|
}
|
||||||
|
|
||||||
// Second pass: merge only the live terms
|
// Second pass: merge only the live terms
|
||||||
|
@ -337,7 +337,11 @@ public abstract class DocValuesConsumer implements Closeable {
|
||||||
lastOrds[readerId] = sourceOrd;
|
lastOrds[readerId] = sourceOrd;
|
||||||
top.ordDeltas.add(delta);
|
top.ordDeltas.add(delta);
|
||||||
|
|
||||||
lastTerm = BytesRef.deepCopyOf(top.scratch);
|
if (lastTerm == null) {
|
||||||
|
lastTerm = BytesRef.deepCopyOf(top.scratch);
|
||||||
|
} else {
|
||||||
|
lastTerm.copyBytes(top.scratch);
|
||||||
|
}
|
||||||
ord++;
|
ord++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -360,28 +364,6 @@ public abstract class DocValuesConsumer implements Closeable {
|
||||||
state.liveTerms = null;
|
state.liveTerms = null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
public void finish(SortedDocValuesConsumer consumer) throws IOException {
|
|
||||||
|
|
||||||
// Third pass: write merged result
|
|
||||||
for(BytesRef term : mergedTerms) {
|
|
||||||
consumer.addValue(term);
|
|
||||||
}
|
|
||||||
|
|
||||||
for(SegmentState segState : segStates) {
|
|
||||||
Bits liveDocs = segState.reader.getLiveDocs();
|
|
||||||
int maxDoc = segState.reader.maxDoc();
|
|
||||||
for(int docID=0;docID<maxDoc;docID++) {
|
|
||||||
if (liveDocs == null || liveDocs.get(docID)) {
|
|
||||||
int segOrd = segState.values.getOrd(docID);
|
|
||||||
int mergedOrd = segState.segOrdToMergedOrd[segOrd];
|
|
||||||
consumer.addDoc(mergedOrd);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -472,7 +454,7 @@ public abstract class DocValuesConsumer implements Closeable {
|
||||||
}
|
}
|
||||||
assert nextIsSet;
|
assert nextIsSet;
|
||||||
nextIsSet = false;
|
nextIsSet = false;
|
||||||
// nocommit make a mutable number
|
// TODO make a mutable number
|
||||||
return nextValue;
|
return nextValue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -763,8 +763,6 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
|
||||||
private FieldNumbers getFieldNumberMap() throws IOException {
|
private FieldNumbers getFieldNumberMap() throws IOException {
|
||||||
final FieldNumbers map = new FieldNumbers();
|
final FieldNumbers map = new FieldNumbers();
|
||||||
|
|
||||||
// nocommit for a 4.0 index that has inconsistent DV
|
|
||||||
// types ... this will throw exc on init of IW?
|
|
||||||
for(SegmentInfoPerCommit info : segmentInfos) {
|
for(SegmentInfoPerCommit info : segmentInfos) {
|
||||||
for(FieldInfo fi : getFieldInfos(info.info)) {
|
for(FieldInfo fi : getFieldInfos(info.info)) {
|
||||||
map.addOrGet(fi.name, fi.number, fi.getDocValuesType());
|
map.addOrGet(fi.name, fi.number, fi.getDocValuesType());
|
||||||
|
|
|
@ -30,7 +30,8 @@ import org.apache.lucene.store.IndexInput;
|
||||||
*
|
*
|
||||||
* @lucene.internal
|
* @lucene.internal
|
||||||
**/
|
**/
|
||||||
// nocommit: make this simply a big ass array and nothing more.
|
// TODO: refactor this, byteblockpool, fst.bytestore, and any
|
||||||
|
// other "shift/mask big arrays". there are too many of these classes!
|
||||||
public final class PagedBytes {
|
public final class PagedBytes {
|
||||||
private final List<byte[]> blocks = new ArrayList<byte[]>();
|
private final List<byte[]> blocks = new ArrayList<byte[]>();
|
||||||
private final List<Integer> blockEnd = new ArrayList<Integer>();
|
private final List<Integer> blockEnd = new ArrayList<Integer>();
|
||||||
|
@ -106,7 +107,7 @@ public final class PagedBytes {
|
||||||
*
|
*
|
||||||
* @lucene.internal
|
* @lucene.internal
|
||||||
**/
|
**/
|
||||||
// nocommit: move this shit and any other vint bogusness to fieldcacheimpl!
|
// TODO: this really needs to be refactored into fieldcacheimpl
|
||||||
public void fill(BytesRef b, long start) {
|
public void fill(BytesRef b, long start) {
|
||||||
final int index = (int) (start >> blockBits);
|
final int index = (int) (start >> blockBits);
|
||||||
final int offset = (int) (start & blockMask);
|
final int offset = (int) (start & blockMask);
|
||||||
|
@ -217,7 +218,7 @@ public final class PagedBytes {
|
||||||
|
|
||||||
/** Copy bytes in, writing the length as a 1 or 2 byte
|
/** Copy bytes in, writing the length as a 1 or 2 byte
|
||||||
* vInt prefix. */
|
* vInt prefix. */
|
||||||
// nocommit: move this shit and any other vint bogusness to fieldcacheimpl!
|
// TODO: this really needs to be refactored into fieldcacheimpl!
|
||||||
public long copyUsingLengthPrefix(BytesRef bytes) {
|
public long copyUsingLengthPrefix(BytesRef bytes) {
|
||||||
if (bytes.length >= 32768) {
|
if (bytes.length >= 32768) {
|
||||||
throw new IllegalArgumentException("max length is 32767 (got " + bytes.length + ")");
|
throw new IllegalArgumentException("max length is 32767 (got " + bytes.length + ")");
|
||||||
|
|
|
@ -65,8 +65,8 @@ public class TestPerFieldDocValuesFormat extends BaseDocValuesFormatTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
// just a simple trivial test
|
// just a simple trivial test
|
||||||
// nocommit: if we are going to pass down suffixes to segmentread/writestate,
|
// TODO: we should come up with a test that somehow checks that segment suffix
|
||||||
// then they should be respected by *all* codec apis!
|
// is respected by all codec apis (not just docvalues and postings)
|
||||||
public void testTwoFieldsTwoFormats() throws IOException {
|
public void testTwoFieldsTwoFormats() throws IOException {
|
||||||
Analyzer analyzer = new MockAnalyzer(random());
|
Analyzer analyzer = new MockAnalyzer(random());
|
||||||
|
|
||||||
|
|
|
@ -17,13 +17,12 @@ package org.apache.lucene.index;
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
import java.util.Random;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.MockAnalyzer;
|
import org.apache.lucene.analysis.MockAnalyzer;
|
||||||
import org.apache.lucene.document.BinaryDocValuesField;
|
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.SortedDocValuesField;
|
import org.apache.lucene.document.SortedDocValuesField;
|
||||||
import org.apache.lucene.store.BaseDirectoryWrapper;
|
import org.apache.lucene.store.BaseDirectoryWrapper;
|
||||||
import org.apache.lucene.store.ByteArrayDataInput;
|
|
||||||
import org.apache.lucene.store.ByteArrayDataOutput;
|
|
||||||
import org.apache.lucene.store.MockDirectoryWrapper;
|
import org.apache.lucene.store.MockDirectoryWrapper;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
@ -34,7 +33,7 @@ import org.junit.Ignore;
|
||||||
import com.carrotsearch.randomizedtesting.annotations.TimeoutSuite;
|
import com.carrotsearch.randomizedtesting.annotations.TimeoutSuite;
|
||||||
|
|
||||||
@TimeoutSuite(millis = 80 * TimeUnits.HOUR)
|
@TimeoutSuite(millis = 80 * TimeUnits.HOUR)
|
||||||
@Ignore("takes ?? minutes")
|
@Ignore("very slow")
|
||||||
public class Test2BSortedDocValues extends LuceneTestCase {
|
public class Test2BSortedDocValues extends LuceneTestCase {
|
||||||
|
|
||||||
// indexes Integer.MAX_VALUE docs with a fixed binary field
|
// indexes Integer.MAX_VALUE docs with a fixed binary field
|
||||||
|
@ -94,10 +93,7 @@ public class Test2BSortedDocValues extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
// indexes Integer.MAX_VALUE docs with a fixed binary field
|
// indexes Integer.MAX_VALUE docs with a fixed binary field
|
||||||
// nocommit: this must be some kind of worst case for BytesRefHash / its hash fn...
|
// TODO: must use random.nextBytes (like Test2BTerms) to avoid BytesRefHash probing issues
|
||||||
// or there is some other perf bug...VERY slow!
|
|
||||||
// if you cut this test to use random.nextBytes its much faster, but still quite slow...
|
|
||||||
// and its not unrealistic for users to index something thats already in sorted order?
|
|
||||||
public void test2BOrds() throws Exception {
|
public void test2BOrds() throws Exception {
|
||||||
BaseDirectoryWrapper dir = newFSDirectory(_TestUtil.getTempDir("2BOrds"));
|
BaseDirectoryWrapper dir = newFSDirectory(_TestUtil.getTempDir("2BOrds"));
|
||||||
if (dir instanceof MockDirectoryWrapper) {
|
if (dir instanceof MockDirectoryWrapper) {
|
||||||
|
@ -118,11 +114,11 @@ public class Test2BSortedDocValues extends LuceneTestCase {
|
||||||
SortedDocValuesField dvField = new SortedDocValuesField("dv", data);
|
SortedDocValuesField dvField = new SortedDocValuesField("dv", data);
|
||||||
doc.add(dvField);
|
doc.add(dvField);
|
||||||
|
|
||||||
|
long seed = random().nextLong();
|
||||||
|
Random random = new Random(seed);
|
||||||
|
|
||||||
for (int i = 0; i < Integer.MAX_VALUE; i++) {
|
for (int i = 0; i < Integer.MAX_VALUE; i++) {
|
||||||
bytes[0] = (byte)(i >> 24);
|
random.nextBytes(bytes);
|
||||||
bytes[1] = (byte)(i >> 16);
|
|
||||||
bytes[2] = (byte)(i >> 8);
|
|
||||||
bytes[3] = (byte) i;
|
|
||||||
w.addDocument(doc);
|
w.addDocument(doc);
|
||||||
if (i % 100000 == 0) {
|
if (i % 100000 == 0) {
|
||||||
System.out.println("indexed: " + i);
|
System.out.println("indexed: " + i);
|
||||||
|
@ -137,19 +133,15 @@ public class Test2BSortedDocValues extends LuceneTestCase {
|
||||||
System.out.flush();
|
System.out.flush();
|
||||||
|
|
||||||
DirectoryReader r = DirectoryReader.open(dir);
|
DirectoryReader r = DirectoryReader.open(dir);
|
||||||
int expectedValue = 0;
|
random.setSeed(seed);
|
||||||
for (AtomicReaderContext context : r.leaves()) {
|
for (AtomicReaderContext context : r.leaves()) {
|
||||||
AtomicReader reader = context.reader();
|
AtomicReader reader = context.reader();
|
||||||
BytesRef scratch = new BytesRef();
|
BytesRef scratch = new BytesRef();
|
||||||
BinaryDocValues dv = reader.getSortedDocValues("dv");
|
BinaryDocValues dv = reader.getSortedDocValues("dv");
|
||||||
for (int i = 0; i < reader.maxDoc(); i++) {
|
for (int i = 0; i < reader.maxDoc(); i++) {
|
||||||
bytes[0] = (byte)(expectedValue >> 24);
|
random.nextBytes(bytes);
|
||||||
bytes[1] = (byte)(expectedValue >> 16);
|
|
||||||
bytes[2] = (byte)(expectedValue >> 8);
|
|
||||||
bytes[3] = (byte) expectedValue;
|
|
||||||
dv.get(i, scratch);
|
dv.get(i, scratch);
|
||||||
assertEquals(data, scratch);
|
assertEquals(data, scratch);
|
||||||
expectedValue++;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue