mirror of https://github.com/apache/lucene.git
SOLR-2452: merge with trunk up to r1134392
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/solr2452@1134403 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
commit
c98779dd12
|
@ -82,7 +82,7 @@
|
||||||
<module name="lucene" />
|
<module name="lucene" />
|
||||||
<option name="TEST_OBJECT" value="package" />
|
<option name="TEST_OBJECT" value="package" />
|
||||||
<option name="WORKING_DIRECTORY" value="file://$PROJECT_DIR$/lucene/build" />
|
<option name="WORKING_DIRECTORY" value="file://$PROJECT_DIR$/lucene/build" />
|
||||||
<option name="VM_PARAMETERS" value="-ea -Dlucene.version=4.0-SNAPSHOT -DtempDir=temp -Dtests.linedocsfile=europarl.lines.txt.gz" />
|
<option name="VM_PARAMETERS" value="-ea -Dlucene.version=4.0-SNAPSHOT -DtempDir=temp" />
|
||||||
<option name="TEST_SEARCH_SCOPE"><value defaultName="singleModule" /></option>
|
<option name="TEST_SEARCH_SCOPE"><value defaultName="singleModule" /></option>
|
||||||
</configuration>
|
</configuration>
|
||||||
<configuration default="false" name="memory contrib" type="JUnit" factoryName="JUnit">
|
<configuration default="false" name="memory contrib" type="JUnit" factoryName="JUnit">
|
||||||
|
|
|
@ -64,6 +64,10 @@
|
||||||
<version>${project.version}</version>
|
<version>${project.version}</version>
|
||||||
<scope>test</scope>
|
<scope>test</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.google.guava</groupId>
|
||||||
|
<artifactId>guava</artifactId>
|
||||||
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>javax.servlet</groupId>
|
<groupId>javax.servlet</groupId>
|
||||||
<artifactId>servlet-api</artifactId>
|
<artifactId>servlet-api</artifactId>
|
||||||
|
|
|
@ -152,10 +152,6 @@
|
||||||
<artifactId>servlet-api</artifactId>
|
<artifactId>servlet-api</artifactId>
|
||||||
<scope>provided</scope>
|
<scope>provided</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
|
||||||
<groupId>com.google.guava</groupId>
|
|
||||||
<artifactId>guava</artifactId>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>junit</groupId>
|
<groupId>junit</groupId>
|
||||||
<artifactId>junit</artifactId>
|
<artifactId>junit</artifactId>
|
||||||
|
|
|
@ -403,6 +403,31 @@ New features
|
||||||
that it inverts the index to compute the ords, but differs in that
|
that it inverts the index to compute the ords, but differs in that
|
||||||
it's able to handle multi-valued fields and does not hold the term
|
it's able to handle multi-valued fields and does not hold the term
|
||||||
bytes in RAM. (Mike McCandless)
|
bytes in RAM. (Mike McCandless)
|
||||||
|
|
||||||
|
* LUCENE-3108, LUCENE-2935, LUCENE-2168, LUCENE-1231: Changes from
|
||||||
|
IndexDocValues (ColumnStrideFields):
|
||||||
|
|
||||||
|
- IndexWriter now supports typesafe dense per-document values stored in
|
||||||
|
a column like storage. IndexDocValues are stored on a per-document
|
||||||
|
basis where each documents field can hold exactly one value of a given
|
||||||
|
type. IndexDocValues are provided via Fieldable and can be used in
|
||||||
|
conjunction with stored and indexed values.
|
||||||
|
|
||||||
|
- IndexDocValues provides an entirely RAM resident document id to value
|
||||||
|
mapping per field as well as a DocIdSetIterator based disk-resident
|
||||||
|
sequential access API relying on filesystem-caches.
|
||||||
|
|
||||||
|
- Both APIs are exposed via IndexReader and the Codec / Flex API allowing
|
||||||
|
expert users to integrate customized IndexDocValues reader and writer
|
||||||
|
implementations by extending existing Codecs.
|
||||||
|
|
||||||
|
- IndexDocValues provides implementations for primitive datatypes like int,
|
||||||
|
long, float, double and arrays of byte. Byte based implementations further
|
||||||
|
provide storage variants like straight or dereferenced stored bytes, fixed
|
||||||
|
and variable length bytes as well as index time sorted based on user-provided
|
||||||
|
comparators.
|
||||||
|
|
||||||
|
(Mike McCandless, Simon Willnauer)
|
||||||
|
|
||||||
Optimizations
|
Optimizations
|
||||||
|
|
||||||
|
@ -423,6 +448,9 @@ Optimizations
|
||||||
|
|
||||||
* LUCENE-2897: Apply deleted terms while flushing a segment. We still
|
* LUCENE-2897: Apply deleted terms while flushing a segment. We still
|
||||||
buffer deleted terms to later apply to past segments. (Mike McCandless)
|
buffer deleted terms to later apply to past segments. (Mike McCandless)
|
||||||
|
|
||||||
|
* LUCENE-1736: DateTools.java general improvements.
|
||||||
|
(David Smiley via Steve Rowe)
|
||||||
|
|
||||||
Bug fixes
|
Bug fixes
|
||||||
|
|
||||||
|
@ -434,7 +462,7 @@ Bug fixes
|
||||||
deletions, provided they share some segments. (yonik)
|
deletions, provided they share some segments. (yonik)
|
||||||
|
|
||||||
* LUCENE-2645: Fix false assertion error when same token was added one
|
* LUCENE-2645: Fix false assertion error when same token was added one
|
||||||
after another with 0 posIncr. (Kurosaka Teruhiko via Mike
|
after another with 0 posIncr. (David Smiley, Kurosaka Teruhiko via Mike
|
||||||
McCandless)
|
McCandless)
|
||||||
|
|
||||||
======================= Lucene 3.x (not yet released) ================
|
======================= Lucene 3.x (not yet released) ================
|
||||||
|
@ -465,6 +493,11 @@ Bug fixes
|
||||||
* LUCENE-3102: CachingCollector.replay was failing to call setScorer
|
* LUCENE-3102: CachingCollector.replay was failing to call setScorer
|
||||||
per-segment (Martijn van Groningen via Mike McCandless)
|
per-segment (Martijn van Groningen via Mike McCandless)
|
||||||
|
|
||||||
|
* LUCENE-3183: Fix rare corner case where seeking to empty term
|
||||||
|
(field="", term="") with terms index interval 1 could hit
|
||||||
|
ArrayIndexOutOfBoundsException (selckin, Robert Muir, Mike
|
||||||
|
McCandless)
|
||||||
|
|
||||||
New Features
|
New Features
|
||||||
|
|
||||||
* LUCENE-3140: Added experimental FST implementation to Lucene.
|
* LUCENE-3140: Added experimental FST implementation to Lucene.
|
||||||
|
|
|
@ -79,6 +79,12 @@ API Changes
|
||||||
First/SecondPassGroupingCollector. (Martijn van Groningen, Mike
|
First/SecondPassGroupingCollector. (Martijn van Groningen, Mike
|
||||||
McCandless)
|
McCandless)
|
||||||
|
|
||||||
|
Bug Fixes
|
||||||
|
|
||||||
|
* LUCENE-3185: Fix bug in NRTCachingDirectory.deleteFile that would
|
||||||
|
always throw exception and sometimes fail to actually delete the
|
||||||
|
file. (Mike McCandless)
|
||||||
|
|
||||||
Build
|
Build
|
||||||
|
|
||||||
* LUCENE-3149: Upgrade contrib/icu's ICU jar file to ICU 4.8.
|
* LUCENE-3149: Upgrade contrib/icu's ICU jar file to ICU 4.8.
|
||||||
|
|
|
@ -165,7 +165,7 @@ public class TokenSources {
|
||||||
this.tokens = tokens;
|
this.tokens = tokens;
|
||||||
termAtt = addAttribute(CharTermAttribute.class);
|
termAtt = addAttribute(CharTermAttribute.class);
|
||||||
offsetAtt = addAttribute(OffsetAttribute.class);
|
offsetAtt = addAttribute(OffsetAttribute.class);
|
||||||
posincAtt = (PositionIncrementAttribute) addAttribute(PositionIncrementAttribute.class);
|
posincAtt = addAttribute(PositionIncrementAttribute.class);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -32,7 +32,7 @@ import java.util.Comparator;
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.FieldSelector;
|
import org.apache.lucene.document.FieldSelector;
|
||||||
import org.apache.lucene.index.*;
|
import org.apache.lucene.index.*;
|
||||||
import org.apache.lucene.index.IndexReader.ReaderContext;
|
import org.apache.lucene.index.codecs.PerDocValues;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.util.BitVector;
|
import org.apache.lucene.util.BitVector;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
@ -487,4 +487,9 @@ public class InstantiatedIndexReader extends IndexReader {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public PerDocValues perDocValues() throws IOException {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -52,6 +52,7 @@ import org.apache.lucene.index.TermPositionVector;
|
||||||
import org.apache.lucene.index.TermVectorMapper;
|
import org.apache.lucene.index.TermVectorMapper;
|
||||||
import org.apache.lucene.index.FieldInvertState;
|
import org.apache.lucene.index.FieldInvertState;
|
||||||
import org.apache.lucene.index.IndexReader.ReaderContext;
|
import org.apache.lucene.index.IndexReader.ReaderContext;
|
||||||
|
import org.apache.lucene.index.codecs.PerDocValues;
|
||||||
import org.apache.lucene.search.Collector;
|
import org.apache.lucene.search.Collector;
|
||||||
import org.apache.lucene.search.IndexSearcher;
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
|
@ -1278,6 +1279,11 @@ public class MemoryIndex {
|
||||||
|
|
||||||
return Collections.unmodifiableSet(fields.keySet());
|
return Collections.unmodifiableSet(fields.keySet());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public PerDocValues perDocValues() throws IOException {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -20,13 +20,19 @@ package org.apache.lucene.index.codecs.appending;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.PerDocWriteState;
|
||||||
import org.apache.lucene.index.SegmentInfo;
|
import org.apache.lucene.index.SegmentInfo;
|
||||||
import org.apache.lucene.index.SegmentReadState;
|
import org.apache.lucene.index.SegmentReadState;
|
||||||
import org.apache.lucene.index.SegmentWriteState;
|
import org.apache.lucene.index.SegmentWriteState;
|
||||||
import org.apache.lucene.index.codecs.Codec;
|
import org.apache.lucene.index.codecs.Codec;
|
||||||
|
import org.apache.lucene.index.codecs.DocValuesConsumer;
|
||||||
|
import org.apache.lucene.index.codecs.DefaultDocValuesProducer;
|
||||||
import org.apache.lucene.index.codecs.FieldsConsumer;
|
import org.apache.lucene.index.codecs.FieldsConsumer;
|
||||||
import org.apache.lucene.index.codecs.FieldsProducer;
|
import org.apache.lucene.index.codecs.FieldsProducer;
|
||||||
import org.apache.lucene.index.codecs.FixedGapTermsIndexReader;
|
import org.apache.lucene.index.codecs.FixedGapTermsIndexReader;
|
||||||
|
import org.apache.lucene.index.codecs.PerDocConsumer;
|
||||||
|
import org.apache.lucene.index.codecs.DefaultDocValuesConsumer;
|
||||||
|
import org.apache.lucene.index.codecs.PerDocValues;
|
||||||
import org.apache.lucene.index.codecs.standard.StandardCodec;
|
import org.apache.lucene.index.codecs.standard.StandardCodec;
|
||||||
import org.apache.lucene.index.codecs.PostingsReaderBase;
|
import org.apache.lucene.index.codecs.PostingsReaderBase;
|
||||||
import org.apache.lucene.index.codecs.standard.StandardPostingsReader;
|
import org.apache.lucene.index.codecs.standard.StandardPostingsReader;
|
||||||
|
@ -127,15 +133,27 @@ public class AppendingCodec extends Codec {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void files(Directory dir, SegmentInfo segmentInfo, String codecId, Set<String> files)
|
public void files(Directory dir, SegmentInfo segmentInfo, int codecId, Set<String> files)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
StandardPostingsReader.files(dir, segmentInfo, codecId, files);
|
StandardPostingsReader.files(dir, segmentInfo, codecId, files);
|
||||||
BlockTermsReader.files(dir, segmentInfo, codecId, files);
|
BlockTermsReader.files(dir, segmentInfo, codecId, files);
|
||||||
FixedGapTermsIndexReader.files(dir, segmentInfo, codecId, files);
|
FixedGapTermsIndexReader.files(dir, segmentInfo, codecId, files);
|
||||||
|
DefaultDocValuesConsumer.files(dir, segmentInfo, codecId, files);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void getExtensions(Set<String> extensions) {
|
public void getExtensions(Set<String> extensions) {
|
||||||
StandardCodec.getStandardExtensions(extensions);
|
StandardCodec.getStandardExtensions(extensions);
|
||||||
|
DefaultDocValuesConsumer.getDocValuesExtensions(extensions);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public PerDocConsumer docsConsumer(PerDocWriteState state) throws IOException {
|
||||||
|
return new DefaultDocValuesConsumer(state, BytesRef.getUTF8SortedAsUnicodeComparator());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public PerDocValues docsProducer(SegmentReadState state) throws IOException {
|
||||||
|
return new DefaultDocValuesProducer(state.segmentInfo, state.dir, state.fieldInfos, state.codecId);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -33,7 +33,7 @@ public class AppendingTermsDictReader extends BlockTermsReader {
|
||||||
public AppendingTermsDictReader(TermsIndexReaderBase indexReader,
|
public AppendingTermsDictReader(TermsIndexReaderBase indexReader,
|
||||||
Directory dir, FieldInfos fieldInfos, String segment,
|
Directory dir, FieldInfos fieldInfos, String segment,
|
||||||
PostingsReaderBase postingsReader, int readBufferSize,
|
PostingsReaderBase postingsReader, int readBufferSize,
|
||||||
int termsCacheSize, String codecId) throws IOException {
|
int termsCacheSize, int codecId) throws IOException {
|
||||||
super(indexReader, dir, fieldInfos, segment, postingsReader, readBufferSize,
|
super(indexReader, dir, fieldInfos, segment, postingsReader, readBufferSize,
|
||||||
termsCacheSize, codecId);
|
termsCacheSize, codecId);
|
||||||
}
|
}
|
||||||
|
|
|
@ -30,7 +30,7 @@ import org.apache.lucene.util.CodecUtil;
|
||||||
public class AppendingTermsIndexReader extends FixedGapTermsIndexReader {
|
public class AppendingTermsIndexReader extends FixedGapTermsIndexReader {
|
||||||
|
|
||||||
public AppendingTermsIndexReader(Directory dir, FieldInfos fieldInfos,
|
public AppendingTermsIndexReader(Directory dir, FieldInfos fieldInfos,
|
||||||
String segment, int indexDivisor, Comparator<BytesRef> termComp, String codecId)
|
String segment, int indexDivisor, Comparator<BytesRef> termComp, int codecId)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
super(dir, fieldInfos, segment, indexDivisor, termComp, codecId);
|
super(dir, fieldInfos, segment, indexDivisor, termComp, codecId);
|
||||||
}
|
}
|
||||||
|
|
|
@ -97,7 +97,7 @@ public class HighFreqTerms {
|
||||||
private static void usage() {
|
private static void usage() {
|
||||||
System.out
|
System.out
|
||||||
.println("\n\n"
|
.println("\n\n"
|
||||||
+ "java org.apache.lucene.misc.HighFreqTerms <index dir> [-t][number_terms] [field]\n\t -t: include totalTermFreq\n\n");
|
+ "java org.apache.lucene.misc.HighFreqTerms <index dir> [-t] [number_terms] [field]\n\t -t: include totalTermFreq\n\n");
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
|
|
|
@ -89,10 +89,10 @@ public class NRTCachingDirectory extends Directory {
|
||||||
private final long maxCachedBytes;
|
private final long maxCachedBytes;
|
||||||
|
|
||||||
private static final boolean VERBOSE = false;
|
private static final boolean VERBOSE = false;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* We will cache a newly created output if 1) it's a
|
* We will cache a newly created output if 1) it's a
|
||||||
* flush or a merge and the estimated size of the merged segmnt is <=
|
* flush or a merge and the estimated size of the merged segment is <=
|
||||||
* maxMergeSizeMB, and 2) the total cached bytes is <=
|
* maxMergeSizeMB, and 2) the total cached bytes is <=
|
||||||
* maxCachedMB */
|
* maxCachedMB */
|
||||||
public NRTCachingDirectory(Directory delegate, double maxMergeSizeMB, double maxCachedMB) {
|
public NRTCachingDirectory(Directory delegate, double maxMergeSizeMB, double maxCachedMB) {
|
||||||
|
@ -101,6 +101,36 @@ public class NRTCachingDirectory extends Directory {
|
||||||
maxCachedBytes = (long) (maxCachedMB*1024*1024);
|
maxCachedBytes = (long) (maxCachedMB*1024*1024);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public LockFactory getLockFactory() {
|
||||||
|
return delegate.getLockFactory();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setLockFactory(LockFactory lf) throws IOException {
|
||||||
|
delegate.setLockFactory(lf);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getLockID() {
|
||||||
|
return delegate.getLockID();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Lock makeLock(String name) {
|
||||||
|
return delegate.makeLock(name);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void clearLock(String name) throws IOException {
|
||||||
|
delegate.clearLock(name);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return "NRTCachingDirectory(" + delegate + "; maxCacheMB=" + (maxCachedBytes/1024/1024.) + " maxMergeSizeMB=" + (maxMergeSizeBytes/1024/1024.) + ")";
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public synchronized String[] listAll() throws IOException {
|
public synchronized String[] listAll() throws IOException {
|
||||||
final Set<String> files = new HashSet<String>();
|
final Set<String> files = new HashSet<String>();
|
||||||
|
@ -108,7 +138,9 @@ public class NRTCachingDirectory extends Directory {
|
||||||
files.add(f);
|
files.add(f);
|
||||||
}
|
}
|
||||||
for(String f : delegate.listAll()) {
|
for(String f : delegate.listAll()) {
|
||||||
assert !files.contains(f);
|
// Cannot do this -- if lucene calls createOutput but
|
||||||
|
// file already exists then this falsely trips:
|
||||||
|
//assert !files.contains(f): "file \"" + f + "\" is in both dirs";
|
||||||
files.add(f);
|
files.add(f);
|
||||||
}
|
}
|
||||||
return files.toArray(new String[files.size()]);
|
return files.toArray(new String[files.size()]);
|
||||||
|
@ -136,12 +168,15 @@ public class NRTCachingDirectory extends Directory {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public synchronized void deleteFile(String name) throws IOException {
|
public synchronized void deleteFile(String name) throws IOException {
|
||||||
// Delete from both, in case we are currently uncaching:
|
|
||||||
if (VERBOSE) {
|
if (VERBOSE) {
|
||||||
System.out.println("nrtdir.deleteFile name=" + name);
|
System.out.println("nrtdir.deleteFile name=" + name);
|
||||||
}
|
}
|
||||||
cache.deleteFile(name);
|
if (cache.fileExists(name)) {
|
||||||
delegate.deleteFile(name);
|
assert !delegate.fileExists(name);
|
||||||
|
cache.deleteFile(name);
|
||||||
|
} else {
|
||||||
|
delegate.deleteFile(name);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -207,17 +242,7 @@ public class NRTCachingDirectory extends Directory {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
/** Close this directory, which flushes any cached files
|
||||||
public Lock makeLock(String name) {
|
|
||||||
return delegate.makeLock(name);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void clearLock(String name) throws IOException {
|
|
||||||
delegate.clearLock(name);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Close thius directory, which flushes any cached files
|
|
||||||
* to the delegate and then closes the delegate. */
|
* to the delegate and then closes the delegate. */
|
||||||
@Override
|
@Override
|
||||||
public void close() throws IOException {
|
public void close() throws IOException {
|
||||||
|
@ -277,4 +302,3 @@ public class NRTCachingDirectory extends Directory {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -111,4 +111,12 @@ public class TestNRTCachingDirectory extends LuceneTestCase {
|
||||||
conf.setMergeScheduler(cachedFSDir.getMergeScheduler());
|
conf.setMergeScheduler(cachedFSDir.getMergeScheduler());
|
||||||
IndexWriter writer = new IndexWriter(cachedFSDir, conf);
|
IndexWriter writer = new IndexWriter(cachedFSDir, conf);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testDeleteFile() throws Exception {
|
||||||
|
Directory dir = new NRTCachingDirectory(newDirectory(), 2.0, 25.0);
|
||||||
|
dir.createOutput("foo.txt").close();
|
||||||
|
dir.deleteFile("foo.txt");
|
||||||
|
assertEquals(0, dir.listAll().length);
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -40,7 +40,6 @@ import org.apache.lucene.util.Version;
|
||||||
* become <code>h?user</code> when using this parser and thus no match would be found (i.e.
|
* become <code>h?user</code> when using this parser and thus no match would be found (i.e.
|
||||||
* using this parser will be no improvement over QueryParser in such cases).
|
* using this parser will be no improvement over QueryParser in such cases).
|
||||||
*
|
*
|
||||||
* @version $Revision$, $Date$
|
|
||||||
*/
|
*/
|
||||||
public class AnalyzingQueryParser extends org.apache.lucene.queryParser.QueryParser {
|
public class AnalyzingQueryParser extends org.apache.lucene.queryParser.QueryParser {
|
||||||
|
|
||||||
|
|
|
@ -29,7 +29,6 @@ import org.apache.lucene.queryParser.ParseException;
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @version $Revision$, $Date$
|
|
||||||
*/
|
*/
|
||||||
public class TestAnalyzingQueryParser extends LuceneTestCase {
|
public class TestAnalyzingQueryParser extends LuceneTestCase {
|
||||||
|
|
||||||
|
|
|
@ -1859,7 +1859,7 @@ document.write("Last Published: " + document.lastModified);
|
||||||
(if compression is enabled, the algorithm used is ZLIB),
|
(if compression is enabled, the algorithm used is ZLIB),
|
||||||
only available for indexes until Lucene version 2.9.x</li>
|
only available for indexes until Lucene version 2.9.x</li>
|
||||||
|
|
||||||
<li>4th to 6th bits (mask: 0x7<<3) define the type of a
|
<li>4th to 6th bit (mask: 0x7<<3) define the type of a
|
||||||
numeric field: <ul>
|
numeric field: <ul>
|
||||||
|
|
||||||
<li>all bits in mask are cleared if no numeric field at all</li>
|
<li>all bits in mask are cleared if no numeric field at all</li>
|
||||||
|
@ -1868,7 +1868,7 @@ document.write("Last Published: " + document.lastModified);
|
||||||
|
|
||||||
<li>2<<3: Value is Long</li>
|
<li>2<<3: Value is Long</li>
|
||||||
|
|
||||||
<li>3<<3: Value is Int as Float (as of Integer.intBitsToFloat)</li>
|
<li>3<<3: Value is Int as Float (as of Float.intBitsToFloat)</li>
|
||||||
|
|
||||||
<li>4<<3: Value is Long as Double (as of Double.longBitsToDouble)</li>
|
<li>4<<3: Value is Long as Double (as of Double.longBitsToDouble)</li>
|
||||||
|
|
||||||
|
|
Binary file not shown.
|
@ -18,7 +18,9 @@ package org.apache.lucene.document;
|
||||||
import org.apache.lucene.search.PhraseQuery; // for javadocs
|
import org.apache.lucene.search.PhraseQuery; // for javadocs
|
||||||
import org.apache.lucene.search.spans.SpanQuery; // for javadocs
|
import org.apache.lucene.search.spans.SpanQuery; // for javadocs
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.index.FieldInvertState;
|
import org.apache.lucene.index.FieldInvertState; // for javadocs
|
||||||
|
import org.apache.lucene.index.values.PerDocFieldValues;
|
||||||
|
import org.apache.lucene.index.values.ValueType;
|
||||||
import org.apache.lucene.util.StringHelper; // for javadocs
|
import org.apache.lucene.util.StringHelper; // for javadocs
|
||||||
|
|
||||||
|
|
||||||
|
@ -47,6 +49,8 @@ public abstract class AbstractField implements Fieldable {
|
||||||
// length/offset for all primitive types
|
// length/offset for all primitive types
|
||||||
protected int binaryLength;
|
protected int binaryLength;
|
||||||
protected int binaryOffset;
|
protected int binaryOffset;
|
||||||
|
protected PerDocFieldValues docValues;
|
||||||
|
|
||||||
|
|
||||||
protected AbstractField()
|
protected AbstractField()
|
||||||
{
|
{
|
||||||
|
@ -289,4 +293,20 @@ public abstract class AbstractField implements Fieldable {
|
||||||
result.append('>');
|
result.append('>');
|
||||||
return result.toString();
|
return result.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public PerDocFieldValues getDocValues() {
|
||||||
|
return docValues;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setDocValues(PerDocFieldValues docValues) {
|
||||||
|
this.docValues = docValues;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean hasDocValues() {
|
||||||
|
return docValues != null && docValues.type() != null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public ValueType docValuesType() {
|
||||||
|
return docValues == null? null : docValues.type();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -17,14 +17,15 @@ package org.apache.lucene.document;
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.search.NumericRangeQuery; // for javadocs
|
||||||
|
import org.apache.lucene.util.NumericUtils; // for javadocs
|
||||||
|
|
||||||
import java.text.ParseException;
|
import java.text.ParseException;
|
||||||
import java.text.SimpleDateFormat;
|
import java.text.SimpleDateFormat;
|
||||||
import java.util.Calendar;
|
import java.util.Calendar;
|
||||||
import java.util.Date;
|
import java.util.Date;
|
||||||
import java.util.TimeZone;
|
|
||||||
import java.util.Locale;
|
import java.util.Locale;
|
||||||
import org.apache.lucene.search.NumericRangeQuery; // for javadocs
|
import java.util.TimeZone;
|
||||||
import org.apache.lucene.util.NumericUtils; // for javadocs
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Provides support for converting dates to strings and vice-versa.
|
* Provides support for converting dates to strings and vice-versa.
|
||||||
|
@ -47,38 +48,27 @@ import org.apache.lucene.util.NumericUtils; // for javadocs
|
||||||
*/
|
*/
|
||||||
public class DateTools {
|
public class DateTools {
|
||||||
|
|
||||||
private static final class DateFormats {
|
final static TimeZone GMT = TimeZone.getTimeZone("GMT");
|
||||||
final static TimeZone GMT = TimeZone.getTimeZone("GMT");
|
|
||||||
|
|
||||||
final SimpleDateFormat YEAR_FORMAT = new SimpleDateFormat("yyyy", Locale.US);
|
private static final ThreadLocal<Calendar> TL_CAL = new ThreadLocal<Calendar>() {
|
||||||
final SimpleDateFormat MONTH_FORMAT = new SimpleDateFormat("yyyyMM", Locale.US);
|
|
||||||
final SimpleDateFormat DAY_FORMAT = new SimpleDateFormat("yyyyMMdd", Locale.US);
|
|
||||||
final SimpleDateFormat HOUR_FORMAT = new SimpleDateFormat("yyyyMMddHH", Locale.US);
|
|
||||||
final SimpleDateFormat MINUTE_FORMAT = new SimpleDateFormat("yyyyMMddHHmm", Locale.US);
|
|
||||||
final SimpleDateFormat SECOND_FORMAT = new SimpleDateFormat("yyyyMMddHHmmss", Locale.US);
|
|
||||||
final SimpleDateFormat MILLISECOND_FORMAT = new SimpleDateFormat("yyyyMMddHHmmssSSS", Locale.US);
|
|
||||||
{
|
|
||||||
// times need to be normalized so the value doesn't depend on the
|
|
||||||
// location the index is created/used:
|
|
||||||
YEAR_FORMAT.setTimeZone(GMT);
|
|
||||||
MONTH_FORMAT.setTimeZone(GMT);
|
|
||||||
DAY_FORMAT.setTimeZone(GMT);
|
|
||||||
HOUR_FORMAT.setTimeZone(GMT);
|
|
||||||
MINUTE_FORMAT.setTimeZone(GMT);
|
|
||||||
SECOND_FORMAT.setTimeZone(GMT);
|
|
||||||
MILLISECOND_FORMAT.setTimeZone(GMT);
|
|
||||||
}
|
|
||||||
|
|
||||||
final Calendar calInstance = Calendar.getInstance(GMT, Locale.US);
|
|
||||||
}
|
|
||||||
|
|
||||||
private static final ThreadLocal<DateFormats> FORMATS = new ThreadLocal<DateFormats>() {
|
|
||||||
@Override
|
@Override
|
||||||
protected DateFormats initialValue() {
|
protected Calendar initialValue() {
|
||||||
return new DateFormats();
|
return Calendar.getInstance(GMT, Locale.US);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
//indexed by format length
|
||||||
|
private static final ThreadLocal<SimpleDateFormat[]> TL_FORMATS = new ThreadLocal<SimpleDateFormat[]>() {
|
||||||
|
@Override
|
||||||
|
protected SimpleDateFormat[] initialValue() {
|
||||||
|
SimpleDateFormat[] arr = new SimpleDateFormat[Resolution.MILLISECOND.formatLen+1];
|
||||||
|
for (Resolution resolution : Resolution.values()) {
|
||||||
|
arr[resolution.formatLen] = (SimpleDateFormat)resolution.format.clone();
|
||||||
|
}
|
||||||
|
return arr;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
// cannot create, the class has static methods only
|
// cannot create, the class has static methods only
|
||||||
private DateTools() {}
|
private DateTools() {}
|
||||||
|
|
||||||
|
@ -105,22 +95,8 @@ public class DateTools {
|
||||||
* depending on <code>resolution</code>; using GMT as timezone
|
* depending on <code>resolution</code>; using GMT as timezone
|
||||||
*/
|
*/
|
||||||
public static String timeToString(long time, Resolution resolution) {
|
public static String timeToString(long time, Resolution resolution) {
|
||||||
final DateFormats formats = FORMATS.get();
|
final Date date = new Date(round(time, resolution));
|
||||||
|
return TL_FORMATS.get()[resolution.formatLen].format(date);
|
||||||
formats.calInstance.setTimeInMillis(round(time, resolution));
|
|
||||||
final Date date = formats.calInstance.getTime();
|
|
||||||
|
|
||||||
switch (resolution) {
|
|
||||||
case YEAR: return formats.YEAR_FORMAT.format(date);
|
|
||||||
case MONTH:return formats.MONTH_FORMAT.format(date);
|
|
||||||
case DAY: return formats.DAY_FORMAT.format(date);
|
|
||||||
case HOUR: return formats.HOUR_FORMAT.format(date);
|
|
||||||
case MINUTE: return formats.MINUTE_FORMAT.format(date);
|
|
||||||
case SECOND: return formats.SECOND_FORMAT.format(date);
|
|
||||||
case MILLISECOND: return formats.MILLISECOND_FORMAT.format(date);
|
|
||||||
}
|
|
||||||
|
|
||||||
throw new IllegalArgumentException("unknown resolution " + resolution);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -148,24 +124,11 @@ public class DateTools {
|
||||||
* expected format
|
* expected format
|
||||||
*/
|
*/
|
||||||
public static Date stringToDate(String dateString) throws ParseException {
|
public static Date stringToDate(String dateString) throws ParseException {
|
||||||
final DateFormats formats = FORMATS.get();
|
try {
|
||||||
|
return TL_FORMATS.get()[dateString.length()].parse(dateString);
|
||||||
if (dateString.length() == 4) {
|
} catch (Exception e) {
|
||||||
return formats.YEAR_FORMAT.parse(dateString);
|
throw new ParseException("Input is not a valid date string: " + dateString, 0);
|
||||||
} else if (dateString.length() == 6) {
|
|
||||||
return formats.MONTH_FORMAT.parse(dateString);
|
|
||||||
} else if (dateString.length() == 8) {
|
|
||||||
return formats.DAY_FORMAT.parse(dateString);
|
|
||||||
} else if (dateString.length() == 10) {
|
|
||||||
return formats.HOUR_FORMAT.parse(dateString);
|
|
||||||
} else if (dateString.length() == 12) {
|
|
||||||
return formats.MINUTE_FORMAT.parse(dateString);
|
|
||||||
} else if (dateString.length() == 14) {
|
|
||||||
return formats.SECOND_FORMAT.parse(dateString);
|
|
||||||
} else if (dateString.length() == 17) {
|
|
||||||
return formats.MILLISECOND_FORMAT.parse(dateString);
|
|
||||||
}
|
}
|
||||||
throw new ParseException("Input is not valid date string: " + dateString, 0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -191,44 +154,25 @@ public class DateTools {
|
||||||
* @return the date with all values more precise than <code>resolution</code>
|
* @return the date with all values more precise than <code>resolution</code>
|
||||||
* set to 0 or 1, expressed as milliseconds since January 1, 1970, 00:00:00 GMT
|
* set to 0 or 1, expressed as milliseconds since January 1, 1970, 00:00:00 GMT
|
||||||
*/
|
*/
|
||||||
|
@SuppressWarnings("fallthrough")
|
||||||
public static long round(long time, Resolution resolution) {
|
public static long round(long time, Resolution resolution) {
|
||||||
final Calendar calInstance = FORMATS.get().calInstance;
|
final Calendar calInstance = TL_CAL.get();
|
||||||
calInstance.setTimeInMillis(time);
|
calInstance.setTimeInMillis(time);
|
||||||
|
|
||||||
switch (resolution) {
|
switch (resolution) {
|
||||||
|
//NOTE: switch statement fall-through is deliberate
|
||||||
case YEAR:
|
case YEAR:
|
||||||
calInstance.set(Calendar.MONTH, 0);
|
calInstance.set(Calendar.MONTH, 0);
|
||||||
calInstance.set(Calendar.DAY_OF_MONTH, 1);
|
|
||||||
calInstance.set(Calendar.HOUR_OF_DAY, 0);
|
|
||||||
calInstance.set(Calendar.MINUTE, 0);
|
|
||||||
calInstance.set(Calendar.SECOND, 0);
|
|
||||||
calInstance.set(Calendar.MILLISECOND, 0);
|
|
||||||
break;
|
|
||||||
case MONTH:
|
case MONTH:
|
||||||
calInstance.set(Calendar.DAY_OF_MONTH, 1);
|
calInstance.set(Calendar.DAY_OF_MONTH, 1);
|
||||||
calInstance.set(Calendar.HOUR_OF_DAY, 0);
|
|
||||||
calInstance.set(Calendar.MINUTE, 0);
|
|
||||||
calInstance.set(Calendar.SECOND, 0);
|
|
||||||
calInstance.set(Calendar.MILLISECOND, 0);
|
|
||||||
break;
|
|
||||||
case DAY:
|
case DAY:
|
||||||
calInstance.set(Calendar.HOUR_OF_DAY, 0);
|
calInstance.set(Calendar.HOUR_OF_DAY, 0);
|
||||||
calInstance.set(Calendar.MINUTE, 0);
|
|
||||||
calInstance.set(Calendar.SECOND, 0);
|
|
||||||
calInstance.set(Calendar.MILLISECOND, 0);
|
|
||||||
break;
|
|
||||||
case HOUR:
|
case HOUR:
|
||||||
calInstance.set(Calendar.MINUTE, 0);
|
calInstance.set(Calendar.MINUTE, 0);
|
||||||
calInstance.set(Calendar.SECOND, 0);
|
|
||||||
calInstance.set(Calendar.MILLISECOND, 0);
|
|
||||||
break;
|
|
||||||
case MINUTE:
|
case MINUTE:
|
||||||
calInstance.set(Calendar.SECOND, 0);
|
calInstance.set(Calendar.SECOND, 0);
|
||||||
calInstance.set(Calendar.MILLISECOND, 0);
|
|
||||||
break;
|
|
||||||
case SECOND:
|
case SECOND:
|
||||||
calInstance.set(Calendar.MILLISECOND, 0);
|
calInstance.set(Calendar.MILLISECOND, 0);
|
||||||
break;
|
|
||||||
case MILLISECOND:
|
case MILLISECOND:
|
||||||
// don't cut off anything
|
// don't cut off anything
|
||||||
break;
|
break;
|
||||||
|
@ -241,7 +185,18 @@ public class DateTools {
|
||||||
/** Specifies the time granularity. */
|
/** Specifies the time granularity. */
|
||||||
public static enum Resolution {
|
public static enum Resolution {
|
||||||
|
|
||||||
YEAR, MONTH, DAY, HOUR, MINUTE, SECOND, MILLISECOND;
|
YEAR(4), MONTH(6), DAY(8), HOUR(10), MINUTE(12), SECOND(14), MILLISECOND(17);
|
||||||
|
|
||||||
|
final int formatLen;
|
||||||
|
final SimpleDateFormat format;//should be cloned before use, since it's not threadsafe
|
||||||
|
|
||||||
|
Resolution(int formatLen) {
|
||||||
|
this.formatLen = formatLen;
|
||||||
|
// formatLen 10's place: 11111111
|
||||||
|
// formatLen 1's place: 12345678901234567
|
||||||
|
this.format = new SimpleDateFormat("yyyyMMddHHmmssSSS".substring(0,formatLen),Locale.US);
|
||||||
|
this.format.setTimeZone(GMT);
|
||||||
|
}
|
||||||
|
|
||||||
/** this method returns the name of the resolution
|
/** this method returns the name of the resolution
|
||||||
* in lowercase (for backwards compatibility) */
|
* in lowercase (for backwards compatibility) */
|
||||||
|
|
|
@ -18,6 +18,9 @@ package org.apache.lucene.document;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.index.FieldInvertState; // for javadocs
|
import org.apache.lucene.index.FieldInvertState; // for javadocs
|
||||||
|
import org.apache.lucene.index.values.IndexDocValues;
|
||||||
|
import org.apache.lucene.index.values.PerDocFieldValues;
|
||||||
|
import org.apache.lucene.index.values.ValueType;
|
||||||
import org.apache.lucene.search.PhraseQuery; // for javadocs
|
import org.apache.lucene.search.PhraseQuery; // for javadocs
|
||||||
import org.apache.lucene.search.spans.SpanQuery; // for javadocs
|
import org.apache.lucene.search.spans.SpanQuery; // for javadocs
|
||||||
|
|
||||||
|
@ -206,4 +209,29 @@ public interface Fieldable {
|
||||||
* fail with an exception.
|
* fail with an exception.
|
||||||
*/
|
*/
|
||||||
void setOmitTermFreqAndPositions(boolean omitTermFreqAndPositions);
|
void setOmitTermFreqAndPositions(boolean omitTermFreqAndPositions);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the {@link PerDocFieldValues}
|
||||||
|
*/
|
||||||
|
public PerDocFieldValues getDocValues();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets the {@link PerDocFieldValues} for this field. If
|
||||||
|
* {@link PerDocFieldValues} is set this field will store per-document values
|
||||||
|
*
|
||||||
|
* @see IndexDocValues
|
||||||
|
*/
|
||||||
|
public void setDocValues(PerDocFieldValues docValues);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns <code>true</code> iff {@link PerDocFieldValues} are set on this
|
||||||
|
* field.
|
||||||
|
*/
|
||||||
|
public boolean hasDocValues();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the {@link ValueType} of the set {@link PerDocFieldValues} or
|
||||||
|
* <code>null</code> if not set.
|
||||||
|
*/
|
||||||
|
public ValueType docValuesType();
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,286 @@
|
||||||
|
package org.apache.lucene.document;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
import java.io.Reader;
|
||||||
|
import java.util.Comparator;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
import org.apache.lucene.document.Field.Index;
|
||||||
|
import org.apache.lucene.document.Field.Store;
|
||||||
|
import org.apache.lucene.document.Field.TermVector;
|
||||||
|
import org.apache.lucene.index.values.PerDocFieldValues;
|
||||||
|
import org.apache.lucene.index.values.ValueType;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* <p>
|
||||||
|
* This class provides a {@link AbstractField} that enables storing of typed
|
||||||
|
* per-document values for scoring, sorting or value retrieval. Here's an
|
||||||
|
* example usage, adding an int value:
|
||||||
|
*
|
||||||
|
* <pre>
|
||||||
|
* document.add(new IndexDocValuesField(name).setInt(value));
|
||||||
|
* </pre>
|
||||||
|
*
|
||||||
|
* For optimal performance, re-use the <code>DocValuesField</code> and
|
||||||
|
* {@link Document} instance for more than one document:
|
||||||
|
*
|
||||||
|
* <pre>
|
||||||
|
* IndexDocValuesField field = new IndexDocValuesField(name);
|
||||||
|
* Document document = new Document();
|
||||||
|
* document.add(field);
|
||||||
|
*
|
||||||
|
* for(all documents) {
|
||||||
|
* ...
|
||||||
|
* field.setInt(value)
|
||||||
|
* writer.addDocument(document);
|
||||||
|
* ...
|
||||||
|
* }
|
||||||
|
* </pre>
|
||||||
|
*
|
||||||
|
* <p>
|
||||||
|
* If doc values are stored in addition to an indexed ({@link Index}) or stored
|
||||||
|
* ({@link Store}) value it's recommended to use the {@link IndexDocValuesField}'s
|
||||||
|
* {@link #set(AbstractField)} API:
|
||||||
|
*
|
||||||
|
* <pre>
|
||||||
|
* IndexDocValuesField field = new IndexDocValuesField(name);
|
||||||
|
* Field indexedField = new Field(name, stringValue, Stored.NO, Indexed.ANALYZED);
|
||||||
|
* Document document = new Document();
|
||||||
|
* document.add(indexedField);
|
||||||
|
* field.set(indexedField);
|
||||||
|
* for(all documents) {
|
||||||
|
* ...
|
||||||
|
* field.setInt(value)
|
||||||
|
* writer.addDocument(document);
|
||||||
|
* ...
|
||||||
|
* }
|
||||||
|
* </pre>
|
||||||
|
*
|
||||||
|
* */
|
||||||
|
public class IndexDocValuesField extends AbstractField implements PerDocFieldValues {
|
||||||
|
|
||||||
|
protected BytesRef bytes;
|
||||||
|
protected double doubleValue;
|
||||||
|
protected long longValue;
|
||||||
|
protected ValueType type;
|
||||||
|
protected Comparator<BytesRef> bytesComparator;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a new {@link IndexDocValuesField} with the given name.
|
||||||
|
*/
|
||||||
|
public IndexDocValuesField(String name) {
|
||||||
|
super(name, Store.NO, Index.NO, TermVector.NO);
|
||||||
|
setDocValues(this);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a {@link IndexDocValuesField} prototype
|
||||||
|
*/
|
||||||
|
IndexDocValuesField() {
|
||||||
|
this("");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets the given <code>long</code> value and sets the field's {@link ValueType} to
|
||||||
|
* {@link ValueType#INTS} unless already set. If you want to change the
|
||||||
|
* default type use {@link #setType(ValueType)}.
|
||||||
|
*/
|
||||||
|
public void setInt(long value) {
|
||||||
|
if (type == null) {
|
||||||
|
type = ValueType.INTS;
|
||||||
|
}
|
||||||
|
longValue = value;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets the given <code>float</code> value and sets the field's {@link ValueType}
|
||||||
|
* to {@link ValueType#FLOAT_32} unless already set. If you want to
|
||||||
|
* change the type use {@link #setType(ValueType)}.
|
||||||
|
*/
|
||||||
|
public void setFloat(float value) {
|
||||||
|
if (type == null) {
|
||||||
|
type = ValueType.FLOAT_32;
|
||||||
|
}
|
||||||
|
doubleValue = value;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets the given <code>double</code> value and sets the field's {@link ValueType}
|
||||||
|
* to {@link ValueType#FLOAT_64} unless already set. If you want to
|
||||||
|
* change the default type use {@link #setType(ValueType)}.
|
||||||
|
*/
|
||||||
|
public void setFloat(double value) {
|
||||||
|
if (type == null) {
|
||||||
|
type = ValueType.FLOAT_64;
|
||||||
|
}
|
||||||
|
doubleValue = value;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets the given {@link BytesRef} value and the field's {@link ValueType}. The
|
||||||
|
* comparator for this field is set to <code>null</code>. If a
|
||||||
|
* <code>null</code> comparator is set the default comparator for the given
|
||||||
|
* {@link ValueType} is used.
|
||||||
|
*/
|
||||||
|
public void setBytes(BytesRef value, ValueType type) {
|
||||||
|
setBytes(value, type, null);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets the given {@link BytesRef} value, the field's {@link ValueType} and the
|
||||||
|
* field's comparator. If the {@link Comparator} is set to <code>null</code>
|
||||||
|
* the default for the given {@link ValueType} is used instead.
|
||||||
|
*
|
||||||
|
* @throws IllegalArgumentException
|
||||||
|
* if the value or the type are null
|
||||||
|
*/
|
||||||
|
public void setBytes(BytesRef value, ValueType type, Comparator<BytesRef> comp) {
|
||||||
|
if (value == null) {
|
||||||
|
throw new IllegalArgumentException("value must not be null");
|
||||||
|
}
|
||||||
|
setType(type);
|
||||||
|
if (bytes == null) {
|
||||||
|
bytes = new BytesRef(value);
|
||||||
|
} else {
|
||||||
|
bytes.copy(value);
|
||||||
|
}
|
||||||
|
bytesComparator = comp;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the set {@link BytesRef} or <code>null</code> if not set.
|
||||||
|
*/
|
||||||
|
public BytesRef getBytes() {
|
||||||
|
return bytes;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the set {@link BytesRef} comparator or <code>null</code> if not set
|
||||||
|
*/
|
||||||
|
public Comparator<BytesRef> bytesComparator() {
|
||||||
|
return bytesComparator;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the set floating point value or <code>0.0d</code> if not set.
|
||||||
|
*/
|
||||||
|
public double getFloat() {
|
||||||
|
return doubleValue;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the set <code>long</code> value of <code>0</code> if not set.
|
||||||
|
*/
|
||||||
|
public long getInt() {
|
||||||
|
return longValue;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets the {@link BytesRef} comparator for this field. If the field has a
|
||||||
|
* numeric {@link ValueType} the comparator will be ignored.
|
||||||
|
*/
|
||||||
|
public void setBytesComparator(Comparator<BytesRef> comp) {
|
||||||
|
this.bytesComparator = comp;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets the {@link ValueType} for this field.
|
||||||
|
*/
|
||||||
|
public void setType(ValueType type) {
|
||||||
|
if (type == null) {
|
||||||
|
throw new IllegalArgumentException("Type must not be null");
|
||||||
|
}
|
||||||
|
this.type = type;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the field's {@link ValueType}
|
||||||
|
*/
|
||||||
|
public ValueType type() {
|
||||||
|
return type;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns always <code>null</code>
|
||||||
|
*/
|
||||||
|
public Reader readerValue() {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns always <code>null</code>
|
||||||
|
*/
|
||||||
|
public String stringValue() {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns always <code>null</code>
|
||||||
|
*/
|
||||||
|
public TokenStream tokenStreamValue() {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets this {@link IndexDocValuesField} to the given {@link AbstractField} and
|
||||||
|
* returns the given field. Any modifications to this instance will be visible
|
||||||
|
* to the given field.
|
||||||
|
*/
|
||||||
|
public <T extends AbstractField> T set(T field) {
|
||||||
|
field.setDocValues(this);
|
||||||
|
return field;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets a new {@link PerDocFieldValues} instance on the given field with the
|
||||||
|
* given type and returns it.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public static <T extends AbstractField> T set(T field, ValueType type) {
|
||||||
|
if (field instanceof IndexDocValuesField)
|
||||||
|
return field;
|
||||||
|
final IndexDocValuesField valField = new IndexDocValuesField();
|
||||||
|
switch (type) {
|
||||||
|
case BYTES_FIXED_DEREF:
|
||||||
|
case BYTES_FIXED_SORTED:
|
||||||
|
case BYTES_FIXED_STRAIGHT:
|
||||||
|
case BYTES_VAR_DEREF:
|
||||||
|
case BYTES_VAR_SORTED:
|
||||||
|
case BYTES_VAR_STRAIGHT:
|
||||||
|
BytesRef ref = field.isBinary() ? new BytesRef(field.getBinaryValue(),
|
||||||
|
field.getBinaryOffset(), field.getBinaryLength()) : new BytesRef(
|
||||||
|
field.stringValue());
|
||||||
|
valField.setBytes(ref, type);
|
||||||
|
break;
|
||||||
|
case INTS:
|
||||||
|
valField.setInt(Long.parseLong(field.stringValue()));
|
||||||
|
break;
|
||||||
|
case FLOAT_32:
|
||||||
|
valField.setFloat(Float.parseFloat(field.stringValue()));
|
||||||
|
break;
|
||||||
|
case FLOAT_64:
|
||||||
|
valField.setFloat(Double.parseDouble(field.stringValue()));
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
throw new IllegalArgumentException("unknown type: " + type);
|
||||||
|
}
|
||||||
|
return valField.set(field);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -27,6 +27,9 @@ import org.apache.lucene.document.AbstractField; // for javadocs
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.index.codecs.CodecProvider;
|
import org.apache.lucene.index.codecs.CodecProvider;
|
||||||
import org.apache.lucene.index.codecs.DefaultSegmentInfosWriter;
|
import org.apache.lucene.index.codecs.DefaultSegmentInfosWriter;
|
||||||
|
import org.apache.lucene.index.codecs.PerDocValues;
|
||||||
|
import org.apache.lucene.index.values.IndexDocValues;
|
||||||
|
import org.apache.lucene.index.values.ValuesEnum;
|
||||||
import org.apache.lucene.util.Bits;
|
import org.apache.lucene.util.Bits;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
|
||||||
|
@ -195,6 +198,9 @@ public class CheckIndex {
|
||||||
|
|
||||||
/** Status for testing of term vectors (null if term vectors could not be tested). */
|
/** Status for testing of term vectors (null if term vectors could not be tested). */
|
||||||
public TermVectorStatus termVectorStatus;
|
public TermVectorStatus termVectorStatus;
|
||||||
|
|
||||||
|
/** Status for testing of DocValues (null if DocValues could not be tested). */
|
||||||
|
public DocValuesStatus docValuesStatus;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -254,6 +260,15 @@ public class CheckIndex {
|
||||||
/** Exception thrown during term vector test (null on success) */
|
/** Exception thrown during term vector test (null on success) */
|
||||||
public Throwable error = null;
|
public Throwable error = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static final class DocValuesStatus {
|
||||||
|
/** Number of documents tested. */
|
||||||
|
public int docCount;
|
||||||
|
/** Total number of docValues tested. */
|
||||||
|
public long totalValueFields;
|
||||||
|
/** Exception thrown during doc values test (null on success) */
|
||||||
|
public Throwable error = null;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Create a new CheckIndex on the directory. */
|
/** Create a new CheckIndex on the directory. */
|
||||||
|
@ -499,6 +514,8 @@ public class CheckIndex {
|
||||||
|
|
||||||
// Test Term Vectors
|
// Test Term Vectors
|
||||||
segInfoStat.termVectorStatus = testTermVectors(info, reader, nf);
|
segInfoStat.termVectorStatus = testTermVectors(info, reader, nf);
|
||||||
|
|
||||||
|
segInfoStat.docValuesStatus = testDocValues(info, reader);
|
||||||
|
|
||||||
// Rethrow the first exception we encountered
|
// Rethrow the first exception we encountered
|
||||||
// This will cause stats for failed segments to be incremented properly
|
// This will cause stats for failed segments to be incremented properly
|
||||||
|
@ -510,6 +527,8 @@ public class CheckIndex {
|
||||||
throw new RuntimeException("Stored Field test failed");
|
throw new RuntimeException("Stored Field test failed");
|
||||||
} else if (segInfoStat.termVectorStatus.error != null) {
|
} else if (segInfoStat.termVectorStatus.error != null) {
|
||||||
throw new RuntimeException("Term Vector test failed");
|
throw new RuntimeException("Term Vector test failed");
|
||||||
|
} else if (segInfoStat.docValuesStatus.error != null) {
|
||||||
|
throw new RuntimeException("DocValues test failed");
|
||||||
}
|
}
|
||||||
|
|
||||||
msg("");
|
msg("");
|
||||||
|
@ -920,6 +939,60 @@ public class CheckIndex {
|
||||||
|
|
||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private Status.DocValuesStatus testDocValues(SegmentInfo info,
|
||||||
|
SegmentReader reader) {
|
||||||
|
final Status.DocValuesStatus status = new Status.DocValuesStatus();
|
||||||
|
try {
|
||||||
|
if (infoStream != null) {
|
||||||
|
infoStream.print(" test: DocValues........");
|
||||||
|
}
|
||||||
|
final FieldInfos fieldInfos = info.getFieldInfos();
|
||||||
|
for (FieldInfo fieldInfo : fieldInfos) {
|
||||||
|
if (fieldInfo.hasDocValues()) {
|
||||||
|
status.totalValueFields++;
|
||||||
|
final PerDocValues perDocValues = reader.perDocValues();
|
||||||
|
final IndexDocValues docValues = perDocValues.docValues(fieldInfo.name);
|
||||||
|
if (docValues == null) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
final ValuesEnum values = docValues.getEnum();
|
||||||
|
while (values.nextDoc() != ValuesEnum.NO_MORE_DOCS) {
|
||||||
|
switch (fieldInfo.docValues) {
|
||||||
|
case BYTES_FIXED_DEREF:
|
||||||
|
case BYTES_FIXED_SORTED:
|
||||||
|
case BYTES_FIXED_STRAIGHT:
|
||||||
|
case BYTES_VAR_DEREF:
|
||||||
|
case BYTES_VAR_SORTED:
|
||||||
|
case BYTES_VAR_STRAIGHT:
|
||||||
|
values.bytes();
|
||||||
|
break;
|
||||||
|
case FLOAT_32:
|
||||||
|
case FLOAT_64:
|
||||||
|
values.getFloat();
|
||||||
|
break;
|
||||||
|
case INTS:
|
||||||
|
values.getInt();
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
throw new IllegalArgumentException("Field: " + fieldInfo.name
|
||||||
|
+ " - no such DocValues type: " + fieldInfo.docValues);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
msg("OK [" + status.docCount + " total doc Count; Num DocValues Fields "
|
||||||
|
+ status.totalValueFields);
|
||||||
|
} catch (Throwable e) {
|
||||||
|
msg("ERROR [" + String.valueOf(e.getMessage()) + "]");
|
||||||
|
status.error = e;
|
||||||
|
if (infoStream != null) {
|
||||||
|
e.printStackTrace(infoStream);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test term vectors for a segment.
|
* Test term vectors for a segment.
|
||||||
|
|
|
@ -158,7 +158,7 @@ public class CompoundFileReader extends Directory {
|
||||||
throw new IOException("Stream closed");
|
throw new IOException("Stream closed");
|
||||||
|
|
||||||
id = IndexFileNames.stripSegmentName(id);
|
id = IndexFileNames.stripSegmentName(id);
|
||||||
FileEntry entry = entries.get(id);
|
final FileEntry entry = entries.get(id);
|
||||||
if (entry == null)
|
if (entry == null)
|
||||||
throw new IOException("No sub-file with id " + id + " found (files: " + entries.keySet() + ")");
|
throw new IOException("No sub-file with id " + id + " found (files: " + entries.keySet() + ")");
|
||||||
|
|
||||||
|
|
|
@ -35,6 +35,7 @@ import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.store.Lock;
|
import org.apache.lucene.store.Lock;
|
||||||
import org.apache.lucene.store.LockObtainFailedException;
|
import org.apache.lucene.store.LockObtainFailedException;
|
||||||
import org.apache.lucene.index.codecs.CodecProvider;
|
import org.apache.lucene.index.codecs.CodecProvider;
|
||||||
|
import org.apache.lucene.index.codecs.PerDocValues;
|
||||||
import org.apache.lucene.util.Bits;
|
import org.apache.lucene.util.Bits;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.MapBackedSet;
|
import org.apache.lucene.util.MapBackedSet;
|
||||||
|
@ -951,8 +952,8 @@ class DirectoryReader extends IndexReader implements Cloneable {
|
||||||
Collections.sort(commits);
|
Collections.sort(commits);
|
||||||
|
|
||||||
return commits;
|
return commits;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static final class ReaderCommit extends IndexCommit {
|
private static final class ReaderCommit extends IndexCommit {
|
||||||
private String segmentsFileName;
|
private String segmentsFileName;
|
||||||
Collection<String> files;
|
Collection<String> files;
|
||||||
|
@ -1022,4 +1023,9 @@ class DirectoryReader extends IndexReader implements Cloneable {
|
||||||
throw new UnsupportedOperationException("This IndexCommit does not support deletions");
|
throw new UnsupportedOperationException("This IndexCommit does not support deletions");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public PerDocValues perDocValues() throws IOException {
|
||||||
|
throw new UnsupportedOperationException("please use MultiPerDocValues#getPerDocs, or wrap your IndexReader with SlowMultiReaderWrapper, if you really need a top level Fields");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -27,6 +27,10 @@ import java.util.Map;
|
||||||
|
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.Fieldable;
|
import org.apache.lucene.document.Fieldable;
|
||||||
|
import org.apache.lucene.index.DocumentsWriterPerThread.DocState;
|
||||||
|
import org.apache.lucene.index.codecs.Codec;
|
||||||
|
import org.apache.lucene.index.codecs.PerDocConsumer;
|
||||||
|
import org.apache.lucene.index.codecs.DocValuesConsumer;
|
||||||
import org.apache.lucene.util.ArrayUtil;
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
|
|
||||||
|
|
||||||
|
@ -80,6 +84,9 @@ final class DocFieldProcessor extends DocConsumer {
|
||||||
// FieldInfo.storePayload.
|
// FieldInfo.storePayload.
|
||||||
final String fileName = IndexFileNames.segmentFileName(state.segmentName, "", IndexFileNames.FIELD_INFOS_EXTENSION);
|
final String fileName = IndexFileNames.segmentFileName(state.segmentName, "", IndexFileNames.FIELD_INFOS_EXTENSION);
|
||||||
state.fieldInfos.write(state.directory, fileName);
|
state.fieldInfos.write(state.directory, fileName);
|
||||||
|
for (DocValuesConsumer consumers : docValues.values()) {
|
||||||
|
consumers.finish(state.numDocs);
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -100,6 +107,14 @@ final class DocFieldProcessor extends DocConsumer {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for(PerDocConsumer consumer : perDocConsumers.values()) {
|
||||||
|
try {
|
||||||
|
consumer.close(); // TODO add abort to PerDocConsumer!
|
||||||
|
} catch (IOException e) {
|
||||||
|
// ignore on abort!
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
fieldsWriter.abort();
|
fieldsWriter.abort();
|
||||||
} catch (Throwable t) {
|
} catch (Throwable t) {
|
||||||
|
@ -150,6 +165,15 @@ final class DocFieldProcessor extends DocConsumer {
|
||||||
fieldHash = new DocFieldProcessorPerField[2];
|
fieldHash = new DocFieldProcessorPerField[2];
|
||||||
hashMask = 1;
|
hashMask = 1;
|
||||||
totalFieldCount = 0;
|
totalFieldCount = 0;
|
||||||
|
for(PerDocConsumer consumer : perDocConsumers.values()) {
|
||||||
|
try {
|
||||||
|
consumer.close();
|
||||||
|
} catch (IOException e) {
|
||||||
|
// ignore and continue closing remaining consumers
|
||||||
|
}
|
||||||
|
}
|
||||||
|
perDocConsumers.clear();
|
||||||
|
docValues.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
private void rehash() {
|
private void rehash() {
|
||||||
|
@ -215,7 +239,7 @@ final class DocFieldProcessor extends DocConsumer {
|
||||||
// easily add it
|
// easily add it
|
||||||
FieldInfo fi = fieldInfos.addOrUpdate(fieldName, field.isIndexed(), field.isTermVectorStored(),
|
FieldInfo fi = fieldInfos.addOrUpdate(fieldName, field.isIndexed(), field.isTermVectorStored(),
|
||||||
field.isStorePositionWithTermVector(), field.isStoreOffsetWithTermVector(),
|
field.isStorePositionWithTermVector(), field.isStoreOffsetWithTermVector(),
|
||||||
field.getOmitNorms(), false, field.getOmitTermFreqAndPositions());
|
field.getOmitNorms(), false, field.getOmitTermFreqAndPositions(), field.docValuesType());
|
||||||
|
|
||||||
fp = new DocFieldProcessorPerField(this, fi);
|
fp = new DocFieldProcessorPerField(this, fi);
|
||||||
fp.next = fieldHash[hashPos];
|
fp.next = fieldHash[hashPos];
|
||||||
|
@ -227,7 +251,7 @@ final class DocFieldProcessor extends DocConsumer {
|
||||||
} else {
|
} else {
|
||||||
fieldInfos.addOrUpdate(fp.fieldInfo.name, field.isIndexed(), field.isTermVectorStored(),
|
fieldInfos.addOrUpdate(fp.fieldInfo.name, field.isIndexed(), field.isTermVectorStored(),
|
||||||
field.isStorePositionWithTermVector(), field.isStoreOffsetWithTermVector(),
|
field.isStorePositionWithTermVector(), field.isStoreOffsetWithTermVector(),
|
||||||
field.getOmitNorms(), false, field.getOmitTermFreqAndPositions());
|
field.getOmitNorms(), false, field.getOmitTermFreqAndPositions(), field.docValuesType());
|
||||||
}
|
}
|
||||||
|
|
||||||
if (thisFieldGen != fp.lastGen) {
|
if (thisFieldGen != fp.lastGen) {
|
||||||
|
@ -251,6 +275,10 @@ final class DocFieldProcessor extends DocConsumer {
|
||||||
if (field.isStored()) {
|
if (field.isStored()) {
|
||||||
fieldsWriter.addField(field, fp.fieldInfo);
|
fieldsWriter.addField(field, fp.fieldInfo);
|
||||||
}
|
}
|
||||||
|
if (field.hasDocValues()) {
|
||||||
|
final DocValuesConsumer docValuesConsumer = docValuesConsumer(docState, fp.fieldInfo);
|
||||||
|
docValuesConsumer.add(docState.docID, field.getDocValues());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// If we are writing vectors then we must visit
|
// If we are writing vectors then we must visit
|
||||||
|
@ -286,4 +314,36 @@ final class DocFieldProcessor extends DocConsumer {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
final private Map<String, DocValuesConsumer> docValues = new HashMap<String, DocValuesConsumer>();
|
||||||
|
final private Map<Integer, PerDocConsumer> perDocConsumers = new HashMap<Integer, PerDocConsumer>();
|
||||||
|
|
||||||
|
DocValuesConsumer docValuesConsumer(DocState docState, FieldInfo fieldInfo)
|
||||||
|
throws IOException {
|
||||||
|
DocValuesConsumer docValuesConsumer = docValues.get(fieldInfo.name);
|
||||||
|
if (docValuesConsumer != null) {
|
||||||
|
return docValuesConsumer;
|
||||||
|
}
|
||||||
|
PerDocConsumer perDocConsumer = perDocConsumers.get(fieldInfo.getCodecId());
|
||||||
|
if (perDocConsumer == null) {
|
||||||
|
PerDocWriteState perDocWriteState = docState.docWriter.newPerDocWriteState(fieldInfo.getCodecId());
|
||||||
|
SegmentCodecs codecs = perDocWriteState.segmentCodecs;
|
||||||
|
assert codecs.codecs.length > fieldInfo.getCodecId();
|
||||||
|
Codec codec = codecs.codecs[fieldInfo.getCodecId()];
|
||||||
|
perDocConsumer = codec.docsConsumer(perDocWriteState);
|
||||||
|
perDocConsumers.put(Integer.valueOf(fieldInfo.getCodecId()), perDocConsumer);
|
||||||
|
}
|
||||||
|
boolean success = false;
|
||||||
|
try {
|
||||||
|
docValuesConsumer = perDocConsumer.addValuesField(fieldInfo);
|
||||||
|
fieldInfo.commitDocValues();
|
||||||
|
success = true;
|
||||||
|
} finally {
|
||||||
|
if (!success) {
|
||||||
|
fieldInfo.revertUncommitted();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
docValues.put(fieldInfo.name, docValuesConsumer);
|
||||||
|
return docValuesConsumer;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -32,6 +32,7 @@ import org.apache.lucene.search.SimilarityProvider;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.util.BitVector;
|
import org.apache.lucene.util.BitVector;
|
||||||
import org.apache.lucene.util.ByteBlockPool.Allocator;
|
import org.apache.lucene.util.ByteBlockPool.Allocator;
|
||||||
|
import org.apache.lucene.util.ByteBlockPool.DirectTrackingAllocator;
|
||||||
import org.apache.lucene.util.RamUsageEstimator;
|
import org.apache.lucene.util.RamUsageEstimator;
|
||||||
|
|
||||||
public class DocumentsWriterPerThread {
|
public class DocumentsWriterPerThread {
|
||||||
|
@ -169,6 +170,7 @@ public class DocumentsWriterPerThread {
|
||||||
DocumentsWriterDeleteQueue deleteQueue;
|
DocumentsWriterDeleteQueue deleteQueue;
|
||||||
DeleteSlice deleteSlice;
|
DeleteSlice deleteSlice;
|
||||||
private final NumberFormat nf = NumberFormat.getInstance();
|
private final NumberFormat nf = NumberFormat.getInstance();
|
||||||
|
final Allocator byteBlockAllocator;
|
||||||
|
|
||||||
|
|
||||||
public DocumentsWriterPerThread(Directory directory, DocumentsWriter parent,
|
public DocumentsWriterPerThread(Directory directory, DocumentsWriter parent,
|
||||||
|
@ -181,9 +183,9 @@ public class DocumentsWriterPerThread {
|
||||||
this.docState = new DocState(this);
|
this.docState = new DocState(this);
|
||||||
this.docState.similarityProvider = parent.indexWriter.getConfig()
|
this.docState.similarityProvider = parent.indexWriter.getConfig()
|
||||||
.getSimilarityProvider();
|
.getSimilarityProvider();
|
||||||
|
|
||||||
consumer = indexingChain.getChain(this);
|
|
||||||
bytesUsed = new AtomicLong(0);
|
bytesUsed = new AtomicLong(0);
|
||||||
|
byteBlockAllocator = new DirectTrackingAllocator(bytesUsed);
|
||||||
|
consumer = indexingChain.getChain(this);
|
||||||
pendingDeletes = new BufferedDeletes(false);
|
pendingDeletes = new BufferedDeletes(false);
|
||||||
initialize();
|
initialize();
|
||||||
}
|
}
|
||||||
|
@ -538,36 +540,13 @@ public class DocumentsWriterPerThread {
|
||||||
bytesUsed.addAndGet(-(length *(INT_BLOCK_SIZE*RamUsageEstimator.NUM_BYTES_INT)));
|
bytesUsed.addAndGet(-(length *(INT_BLOCK_SIZE*RamUsageEstimator.NUM_BYTES_INT)));
|
||||||
}
|
}
|
||||||
|
|
||||||
final Allocator byteBlockAllocator = new DirectTrackingAllocator();
|
PerDocWriteState newPerDocWriteState(int codecId) {
|
||||||
|
assert segment != null;
|
||||||
|
return new PerDocWriteState(infoStream, directory, segment, fieldInfos, bytesUsed, codecId);
|
||||||
private class DirectTrackingAllocator extends Allocator {
|
|
||||||
public DirectTrackingAllocator() {
|
|
||||||
this(BYTE_BLOCK_SIZE);
|
|
||||||
}
|
|
||||||
|
|
||||||
public DirectTrackingAllocator(int blockSize) {
|
|
||||||
super(blockSize);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public byte[] getByteBlock() {
|
|
||||||
bytesUsed.addAndGet(blockSize);
|
|
||||||
return new byte[blockSize];
|
|
||||||
}
|
|
||||||
@Override
|
|
||||||
public void recycleByteBlocks(byte[][] blocks, int start, int end) {
|
|
||||||
bytesUsed.addAndGet(-((end-start)* blockSize));
|
|
||||||
for (int i = start; i < end; i++) {
|
|
||||||
blocks[i] = null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void setInfoStream(PrintStream infoStream) {
|
void setInfoStream(PrintStream infoStream) {
|
||||||
this.infoStream = infoStream;
|
this.infoStream = infoStream;
|
||||||
docState.infoStream = infoStream;
|
docState.infoStream = infoStream;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,5 +1,7 @@
|
||||||
package org.apache.lucene.index;
|
package org.apache.lucene.index;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.values.ValueType;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
@ -24,6 +26,8 @@ public final class FieldInfo {
|
||||||
public final int number;
|
public final int number;
|
||||||
|
|
||||||
public boolean isIndexed;
|
public boolean isIndexed;
|
||||||
|
ValueType docValues;
|
||||||
|
|
||||||
|
|
||||||
// true if term vector for this field should be stored
|
// true if term vector for this field should be stored
|
||||||
boolean storeTermVector;
|
boolean storeTermVector;
|
||||||
|
@ -38,10 +42,11 @@ public final class FieldInfo {
|
||||||
|
|
||||||
FieldInfo(String na, boolean tk, int nu, boolean storeTermVector,
|
FieldInfo(String na, boolean tk, int nu, boolean storeTermVector,
|
||||||
boolean storePositionWithTermVector, boolean storeOffsetWithTermVector,
|
boolean storePositionWithTermVector, boolean storeOffsetWithTermVector,
|
||||||
boolean omitNorms, boolean storePayloads, boolean omitTermFreqAndPositions) {
|
boolean omitNorms, boolean storePayloads, boolean omitTermFreqAndPositions, ValueType docValues) {
|
||||||
name = na;
|
name = na;
|
||||||
isIndexed = tk;
|
isIndexed = tk;
|
||||||
number = nu;
|
number = nu;
|
||||||
|
this.docValues = docValues;
|
||||||
if (isIndexed) {
|
if (isIndexed) {
|
||||||
this.storeTermVector = storeTermVector;
|
this.storeTermVector = storeTermVector;
|
||||||
this.storeOffsetWithTermVector = storeOffsetWithTermVector;
|
this.storeOffsetWithTermVector = storeOffsetWithTermVector;
|
||||||
|
@ -72,7 +77,7 @@ public final class FieldInfo {
|
||||||
@Override
|
@Override
|
||||||
public Object clone() {
|
public Object clone() {
|
||||||
FieldInfo clone = new FieldInfo(name, isIndexed, number, storeTermVector, storePositionWithTermVector,
|
FieldInfo clone = new FieldInfo(name, isIndexed, number, storeTermVector, storePositionWithTermVector,
|
||||||
storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions);
|
storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions, docValues);
|
||||||
clone.codecId = this.codecId;
|
clone.codecId = this.codecId;
|
||||||
return clone;
|
return clone;
|
||||||
}
|
}
|
||||||
|
@ -107,8 +112,23 @@ public final class FieldInfo {
|
||||||
}
|
}
|
||||||
assert !this.omitTermFreqAndPositions || !this.storePayloads;
|
assert !this.omitTermFreqAndPositions || !this.storePayloads;
|
||||||
}
|
}
|
||||||
private boolean vectorsCommitted;
|
void setDocValues(ValueType v) {
|
||||||
|
if (docValues == null) {
|
||||||
|
docValues = v;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean hasDocValues() {
|
||||||
|
return docValues != null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public ValueType getDocValues() {
|
||||||
|
return docValues;
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean vectorsCommitted;
|
||||||
|
private boolean docValuesCommitted;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Reverts all uncommitted changes on this {@link FieldInfo}
|
* Reverts all uncommitted changes on this {@link FieldInfo}
|
||||||
* @see #commitVectors()
|
* @see #commitVectors()
|
||||||
|
@ -119,6 +139,10 @@ public final class FieldInfo {
|
||||||
storePositionWithTermVector = false;
|
storePositionWithTermVector = false;
|
||||||
storeTermVector = false;
|
storeTermVector = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (docValues != null && !docValuesCommitted) {
|
||||||
|
docValues = null;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -131,4 +155,9 @@ public final class FieldInfo {
|
||||||
assert storeTermVector;
|
assert storeTermVector;
|
||||||
vectorsCommitted = true;
|
vectorsCommitted = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void commitDocValues() {
|
||||||
|
assert hasDocValues();
|
||||||
|
docValuesCommitted = true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -31,6 +31,7 @@ import java.util.Map.Entry;
|
||||||
import org.apache.lucene.index.SegmentCodecs; // Required for Java 1.5 javadocs
|
import org.apache.lucene.index.SegmentCodecs; // Required for Java 1.5 javadocs
|
||||||
import org.apache.lucene.index.SegmentCodecs.SegmentCodecsBuilder;
|
import org.apache.lucene.index.SegmentCodecs.SegmentCodecsBuilder;
|
||||||
import org.apache.lucene.index.codecs.CodecProvider;
|
import org.apache.lucene.index.codecs.CodecProvider;
|
||||||
|
import org.apache.lucene.index.values.ValueType;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.store.IndexInput;
|
import org.apache.lucene.store.IndexInput;
|
||||||
import org.apache.lucene.store.IndexOutput;
|
import org.apache.lucene.store.IndexOutput;
|
||||||
|
@ -202,6 +203,9 @@ public final class FieldInfos implements Iterable<FieldInfo> {
|
||||||
public static final int FORMAT_START = -2;
|
public static final int FORMAT_START = -2;
|
||||||
public static final int FORMAT_PER_FIELD_CODEC = -3;
|
public static final int FORMAT_PER_FIELD_CODEC = -3;
|
||||||
|
|
||||||
|
// Records index values for this field
|
||||||
|
public static final int FORMAT_INDEX_VALUES = -3;
|
||||||
|
|
||||||
// whenever you add a new format, make it 1 smaller (negative version logic)!
|
// whenever you add a new format, make it 1 smaller (negative version logic)!
|
||||||
static final int FORMAT_CURRENT = FORMAT_PER_FIELD_CODEC;
|
static final int FORMAT_CURRENT = FORMAT_PER_FIELD_CODEC;
|
||||||
|
|
||||||
|
@ -410,7 +414,7 @@ public final class FieldInfos implements Iterable<FieldInfo> {
|
||||||
synchronized public void addOrUpdate(String name, boolean isIndexed, boolean storeTermVector,
|
synchronized public void addOrUpdate(String name, boolean isIndexed, boolean storeTermVector,
|
||||||
boolean storePositionWithTermVector, boolean storeOffsetWithTermVector, boolean omitNorms) {
|
boolean storePositionWithTermVector, boolean storeOffsetWithTermVector, boolean omitNorms) {
|
||||||
addOrUpdate(name, isIndexed, storeTermVector, storePositionWithTermVector,
|
addOrUpdate(name, isIndexed, storeTermVector, storePositionWithTermVector,
|
||||||
storeOffsetWithTermVector, omitNorms, false, false);
|
storeOffsetWithTermVector, omitNorms, false, false, null);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** If the field is not yet known, adds it. If it is known, checks to make
|
/** If the field is not yet known, adds it. If it is known, checks to make
|
||||||
|
@ -429,14 +433,14 @@ public final class FieldInfos implements Iterable<FieldInfo> {
|
||||||
*/
|
*/
|
||||||
synchronized public FieldInfo addOrUpdate(String name, boolean isIndexed, boolean storeTermVector,
|
synchronized public FieldInfo addOrUpdate(String name, boolean isIndexed, boolean storeTermVector,
|
||||||
boolean storePositionWithTermVector, boolean storeOffsetWithTermVector,
|
boolean storePositionWithTermVector, boolean storeOffsetWithTermVector,
|
||||||
boolean omitNorms, boolean storePayloads, boolean omitTermFreqAndPositions) {
|
boolean omitNorms, boolean storePayloads, boolean omitTermFreqAndPositions, ValueType docValues) {
|
||||||
return addOrUpdateInternal(name, -1, isIndexed, storeTermVector, storePositionWithTermVector,
|
return addOrUpdateInternal(name, -1, isIndexed, storeTermVector, storePositionWithTermVector,
|
||||||
storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions);
|
storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions, docValues);
|
||||||
}
|
}
|
||||||
|
|
||||||
synchronized private FieldInfo addOrUpdateInternal(String name, int preferredFieldNumber, boolean isIndexed,
|
synchronized private FieldInfo addOrUpdateInternal(String name, int preferredFieldNumber, boolean isIndexed,
|
||||||
boolean storeTermVector, boolean storePositionWithTermVector, boolean storeOffsetWithTermVector,
|
boolean storeTermVector, boolean storePositionWithTermVector, boolean storeOffsetWithTermVector,
|
||||||
boolean omitNorms, boolean storePayloads, boolean omitTermFreqAndPositions) {
|
boolean omitNorms, boolean storePayloads, boolean omitTermFreqAndPositions, ValueType docValues) {
|
||||||
if (globalFieldNumbers == null) {
|
if (globalFieldNumbers == null) {
|
||||||
throw new IllegalStateException("FieldInfos are read-only, create a new instance with a global field map to make modifications to FieldInfos");
|
throw new IllegalStateException("FieldInfos are read-only, create a new instance with a global field map to make modifications to FieldInfos");
|
||||||
}
|
}
|
||||||
|
@ -444,11 +448,12 @@ public final class FieldInfos implements Iterable<FieldInfo> {
|
||||||
FieldInfo fi = fieldInfo(name);
|
FieldInfo fi = fieldInfo(name);
|
||||||
if (fi == null) {
|
if (fi == null) {
|
||||||
final int fieldNumber = nextFieldNumber(name, preferredFieldNumber);
|
final int fieldNumber = nextFieldNumber(name, preferredFieldNumber);
|
||||||
fi = addInternal(name, fieldNumber, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions);
|
fi = addInternal(name, fieldNumber, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions, docValues);
|
||||||
} else {
|
} else {
|
||||||
fi.update(isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions);
|
fi.update(isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions);
|
||||||
|
fi.setDocValues(docValues);
|
||||||
}
|
}
|
||||||
if (fi.isIndexed && fi.getCodecId() == FieldInfo.UNASSIGNED_CODEC_ID) {
|
if ((fi.isIndexed || fi.hasDocValues()) && fi.getCodecId() == FieldInfo.UNASSIGNED_CODEC_ID) {
|
||||||
segmentCodecsBuilder.tryAddAndSet(fi);
|
segmentCodecsBuilder.tryAddAndSet(fi);
|
||||||
}
|
}
|
||||||
version++;
|
version++;
|
||||||
|
@ -460,7 +465,7 @@ public final class FieldInfos implements Iterable<FieldInfo> {
|
||||||
return addOrUpdateInternal(fi.name, fi.number, fi.isIndexed, fi.storeTermVector,
|
return addOrUpdateInternal(fi.name, fi.number, fi.isIndexed, fi.storeTermVector,
|
||||||
fi.storePositionWithTermVector, fi.storeOffsetWithTermVector,
|
fi.storePositionWithTermVector, fi.storeOffsetWithTermVector,
|
||||||
fi.omitNorms, fi.storePayloads,
|
fi.omitNorms, fi.storePayloads,
|
||||||
fi.omitTermFreqAndPositions);
|
fi.omitTermFreqAndPositions, fi.docValues);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -468,15 +473,14 @@ public final class FieldInfos implements Iterable<FieldInfo> {
|
||||||
*/
|
*/
|
||||||
private FieldInfo addInternal(String name, int fieldNumber, boolean isIndexed,
|
private FieldInfo addInternal(String name, int fieldNumber, boolean isIndexed,
|
||||||
boolean storeTermVector, boolean storePositionWithTermVector,
|
boolean storeTermVector, boolean storePositionWithTermVector,
|
||||||
boolean storeOffsetWithTermVector, boolean omitNorms, boolean storePayloads, boolean omitTermFreqAndPositions) {
|
boolean storeOffsetWithTermVector, boolean omitNorms, boolean storePayloads, boolean omitTermFreqAndPositions, ValueType docValuesType) {
|
||||||
// don't check modifiable here since we use that to initially build up FIs
|
// don't check modifiable here since we use that to initially build up FIs
|
||||||
name = StringHelper.intern(name);
|
name = StringHelper.intern(name);
|
||||||
if (globalFieldNumbers != null) {
|
if (globalFieldNumbers != null) {
|
||||||
globalFieldNumbers.setIfNotSet(fieldNumber, name);
|
globalFieldNumbers.setIfNotSet(fieldNumber, name);
|
||||||
}
|
}
|
||||||
final FieldInfo fi = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector, storePositionWithTermVector,
|
final FieldInfo fi = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector, storePositionWithTermVector,
|
||||||
storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions);
|
storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions, docValuesType);
|
||||||
|
|
||||||
putInternal(fi);
|
putInternal(fi);
|
||||||
return fi;
|
return fi;
|
||||||
}
|
}
|
||||||
|
@ -600,6 +604,45 @@ public final class FieldInfos implements Iterable<FieldInfo> {
|
||||||
output.writeInt(fi.number);
|
output.writeInt(fi.number);
|
||||||
output.writeInt(fi.getCodecId());
|
output.writeInt(fi.getCodecId());
|
||||||
output.writeByte(bits);
|
output.writeByte(bits);
|
||||||
|
|
||||||
|
final byte b;
|
||||||
|
|
||||||
|
if (fi.docValues == null) {
|
||||||
|
b = 0;
|
||||||
|
} else {
|
||||||
|
switch(fi.docValues) {
|
||||||
|
case INTS:
|
||||||
|
b = 1;
|
||||||
|
break;
|
||||||
|
case FLOAT_32:
|
||||||
|
b = 2;
|
||||||
|
break;
|
||||||
|
case FLOAT_64:
|
||||||
|
b = 3;
|
||||||
|
break;
|
||||||
|
case BYTES_FIXED_STRAIGHT:
|
||||||
|
b = 4;
|
||||||
|
break;
|
||||||
|
case BYTES_FIXED_DEREF:
|
||||||
|
b = 5;
|
||||||
|
break;
|
||||||
|
case BYTES_FIXED_SORTED:
|
||||||
|
b = 6;
|
||||||
|
break;
|
||||||
|
case BYTES_VAR_STRAIGHT:
|
||||||
|
b = 7;
|
||||||
|
break;
|
||||||
|
case BYTES_VAR_DEREF:
|
||||||
|
b = 8;
|
||||||
|
break;
|
||||||
|
case BYTES_VAR_SORTED:
|
||||||
|
b = 9;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
throw new IllegalStateException("unhandled indexValues type " + fi.docValues);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
output.writeByte(b);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -637,7 +680,45 @@ public final class FieldInfos implements Iterable<FieldInfo> {
|
||||||
}
|
}
|
||||||
hasVectors |= storeTermVector;
|
hasVectors |= storeTermVector;
|
||||||
hasProx |= isIndexed && !omitTermFreqAndPositions;
|
hasProx |= isIndexed && !omitTermFreqAndPositions;
|
||||||
final FieldInfo addInternal = addInternal(name, fieldNumber, isIndexed, storeTermVector, storePositionsWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions);
|
ValueType docValuesType = null;
|
||||||
|
if (format <= FORMAT_INDEX_VALUES) {
|
||||||
|
final byte b = input.readByte();
|
||||||
|
switch(b) {
|
||||||
|
case 0:
|
||||||
|
docValuesType = null;
|
||||||
|
break;
|
||||||
|
case 1:
|
||||||
|
docValuesType = ValueType.INTS;
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
docValuesType = ValueType.FLOAT_32;
|
||||||
|
break;
|
||||||
|
case 3:
|
||||||
|
docValuesType = ValueType.FLOAT_64;
|
||||||
|
break;
|
||||||
|
case 4:
|
||||||
|
docValuesType = ValueType.BYTES_FIXED_STRAIGHT;
|
||||||
|
break;
|
||||||
|
case 5:
|
||||||
|
docValuesType = ValueType.BYTES_FIXED_DEREF;
|
||||||
|
break;
|
||||||
|
case 6:
|
||||||
|
docValuesType = ValueType.BYTES_FIXED_SORTED;
|
||||||
|
break;
|
||||||
|
case 7:
|
||||||
|
docValuesType = ValueType.BYTES_VAR_STRAIGHT;
|
||||||
|
break;
|
||||||
|
case 8:
|
||||||
|
docValuesType = ValueType.BYTES_VAR_DEREF;
|
||||||
|
break;
|
||||||
|
case 9:
|
||||||
|
docValuesType = ValueType.BYTES_VAR_SORTED;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
throw new IllegalStateException("unhandled indexValues type " + b);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
final FieldInfo addInternal = addInternal(name, fieldNumber, isIndexed, storeTermVector, storePositionsWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions, docValuesType);
|
||||||
addInternal.setCodecId(codecId);
|
addInternal.setCodecId(codecId);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -669,5 +750,5 @@ public final class FieldInfos implements Iterable<FieldInfo> {
|
||||||
}
|
}
|
||||||
return roFis;
|
return roFis;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -31,6 +31,6 @@ public abstract class Fields {
|
||||||
/** Get the {@link Terms} for this field. This will return
|
/** Get the {@link Terms} for this field. This will return
|
||||||
* null if the field does not exist. */
|
* null if the field does not exist. */
|
||||||
public abstract Terms terms(String field) throws IOException;
|
public abstract Terms terms(String field) throws IOException;
|
||||||
|
|
||||||
public final static Fields[] EMPTY_ARRAY = new Fields[0];
|
public final static Fields[] EMPTY_ARRAY = new Fields[0];
|
||||||
}
|
}
|
||||||
|
|
|
@ -55,7 +55,7 @@ public abstract class FieldsEnum {
|
||||||
* null this method should not be called. This method
|
* null this method should not be called. This method
|
||||||
* will not return null. */
|
* will not return null. */
|
||||||
public abstract TermsEnum terms() throws IOException;
|
public abstract TermsEnum terms() throws IOException;
|
||||||
|
|
||||||
public final static FieldsEnum[] EMPTY_ARRAY = new FieldsEnum[0];
|
public final static FieldsEnum[] EMPTY_ARRAY = new FieldsEnum[0];
|
||||||
|
|
||||||
/** Provides zero fields */
|
/** Provides zero fields */
|
||||||
|
|
|
@ -19,7 +19,7 @@ package org.apache.lucene.index;
|
||||||
|
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.FieldSelector;
|
import org.apache.lucene.document.FieldSelector;
|
||||||
import org.apache.lucene.index.IndexReader.ReaderContext;
|
import org.apache.lucene.index.codecs.PerDocValues;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.util.Bits;
|
import org.apache.lucene.util.Bits;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
@ -463,4 +463,9 @@ public class FilterIndexReader extends IndexReader {
|
||||||
super.removeReaderFinishedListener(listener);
|
super.removeReaderFinishedListener(listener);
|
||||||
in.removeReaderFinishedListener(listener);
|
in.removeReaderFinishedListener(listener);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public PerDocValues perDocValues() throws IOException {
|
||||||
|
return in.perDocValues();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -22,7 +22,6 @@ import java.io.FilenameFilter;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.PrintStream;
|
import java.io.PrintStream;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.Date;
|
import java.util.Date;
|
||||||
|
|
|
@ -80,10 +80,9 @@ public final class IndexFileNames {
|
||||||
|
|
||||||
/** Extension of separate norms */
|
/** Extension of separate norms */
|
||||||
public static final String SEPARATE_NORMS_EXTENSION = "s";
|
public static final String SEPARATE_NORMS_EXTENSION = "s";
|
||||||
|
|
||||||
/** Extension of global field numbers */
|
/** Extension of global field numbers */
|
||||||
public static final String GLOBAL_FIELD_NUM_MAP_EXTENSION = "fnx";
|
public static final String GLOBAL_FIELD_NUM_MAP_EXTENSION = "fnx";
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This array contains all filename extensions used by
|
* This array contains all filename extensions used by
|
||||||
|
@ -208,7 +207,12 @@ public final class IndexFileNames {
|
||||||
return segmentName;
|
return segmentName;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Sugar for passing "" + name instead */
|
||||||
|
public static String segmentFileName(String segmentName, int name, String ext) {
|
||||||
|
return segmentFileName(segmentName, ""+name, ext);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns true if the given filename ends with the given extension. One
|
* Returns true if the given filename ends with the given extension. One
|
||||||
* should provide a <i>pure</i> extension, without '.'.
|
* should provide a <i>pure</i> extension, without '.'.
|
||||||
|
|
|
@ -23,6 +23,8 @@ import org.apache.lucene.search.FieldCache; // javadocs
|
||||||
import org.apache.lucene.search.Similarity;
|
import org.apache.lucene.search.Similarity;
|
||||||
import org.apache.lucene.index.codecs.Codec;
|
import org.apache.lucene.index.codecs.Codec;
|
||||||
import org.apache.lucene.index.codecs.CodecProvider;
|
import org.apache.lucene.index.codecs.CodecProvider;
|
||||||
|
import org.apache.lucene.index.codecs.PerDocValues;
|
||||||
|
import org.apache.lucene.index.values.IndexDocValues;
|
||||||
import org.apache.lucene.store.*;
|
import org.apache.lucene.store.*;
|
||||||
import org.apache.lucene.util.ArrayUtil;
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
import org.apache.lucene.util.Bits;
|
import org.apache.lucene.util.Bits;
|
||||||
|
@ -174,6 +176,9 @@ public abstract class IndexReader implements Cloneable,Closeable {
|
||||||
public static final FieldOption TERMVECTOR_WITH_OFFSET = new FieldOption ("TERMVECTOR_WITH_OFFSET");
|
public static final FieldOption TERMVECTOR_WITH_OFFSET = new FieldOption ("TERMVECTOR_WITH_OFFSET");
|
||||||
/** All fields with termvectors with offset values and position values enabled */
|
/** All fields with termvectors with offset values and position values enabled */
|
||||||
public static final FieldOption TERMVECTOR_WITH_POSITION_OFFSET = new FieldOption ("TERMVECTOR_WITH_POSITION_OFFSET");
|
public static final FieldOption TERMVECTOR_WITH_POSITION_OFFSET = new FieldOption ("TERMVECTOR_WITH_POSITION_OFFSET");
|
||||||
|
/** All fields holding doc values */
|
||||||
|
public static final FieldOption DOC_VALUES = new FieldOption ("DOC_VALUES");
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private boolean closed;
|
private boolean closed;
|
||||||
|
@ -1051,9 +1056,10 @@ public abstract class IndexReader implements Cloneable,Closeable {
|
||||||
protected abstract void doSetNorm(int doc, String field, byte value)
|
protected abstract void doSetNorm(int doc, String field, byte value)
|
||||||
throws CorruptIndexException, IOException;
|
throws CorruptIndexException, IOException;
|
||||||
|
|
||||||
/** Flex API: returns {@link Fields} for this reader.
|
/**
|
||||||
* This method may return null if the reader has no
|
* Returns {@link Fields} for this reader.
|
||||||
* postings.
|
* This method may return null if the reader has no
|
||||||
|
* postings.
|
||||||
*
|
*
|
||||||
* <p><b>NOTE</b>: if this is a multi reader ({@link
|
* <p><b>NOTE</b>: if this is a multi reader ({@link
|
||||||
* #getSequentialSubReaders} is not null) then this
|
* #getSequentialSubReaders} is not null) then this
|
||||||
|
@ -1064,6 +1070,21 @@ public abstract class IndexReader implements Cloneable,Closeable {
|
||||||
* using {@link ReaderUtil#gatherSubReaders} and iterate
|
* using {@link ReaderUtil#gatherSubReaders} and iterate
|
||||||
* through them yourself. */
|
* through them yourself. */
|
||||||
public abstract Fields fields() throws IOException;
|
public abstract Fields fields() throws IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns {@link PerDocValues} for this reader.
|
||||||
|
* This method may return null if the reader has no per-document
|
||||||
|
* values stored.
|
||||||
|
*
|
||||||
|
* <p><b>NOTE</b>: if this is a multi reader ({@link
|
||||||
|
* #getSequentialSubReaders} is not null) then this
|
||||||
|
* method will throw UnsupportedOperationException. If
|
||||||
|
* you really need {@link PerDocValues} for such a reader,
|
||||||
|
* use {@link MultiPerDocValues#getPerDocs(IndexReader)}. However, for
|
||||||
|
* performance reasons, it's best to get all sub-readers
|
||||||
|
* using {@link ReaderUtil#gatherSubReaders} and iterate
|
||||||
|
* through them yourself. */
|
||||||
|
public abstract PerDocValues perDocValues() throws IOException;
|
||||||
|
|
||||||
public int docFreq(Term term) throws IOException {
|
public int docFreq(Term term) throws IOException {
|
||||||
return docFreq(term.field(), term.bytes());
|
return docFreq(term.field(), term.bytes());
|
||||||
|
@ -1565,7 +1586,14 @@ public abstract class IndexReader implements Cloneable,Closeable {
|
||||||
public int getTermInfosIndexDivisor() {
|
public int getTermInfosIndexDivisor() {
|
||||||
throw new UnsupportedOperationException("This reader does not support this method.");
|
throw new UnsupportedOperationException("This reader does not support this method.");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public final IndexDocValues docValues(String field) throws IOException {
|
||||||
|
final PerDocValues perDoc = perDocValues();
|
||||||
|
if (perDoc == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
return perDoc.docValues(field);
|
||||||
|
}
|
||||||
|
|
||||||
private volatile Fields fields;
|
private volatile Fields fields;
|
||||||
|
|
||||||
|
@ -1578,6 +1606,19 @@ public abstract class IndexReader implements Cloneable,Closeable {
|
||||||
Fields retrieveFields() {
|
Fields retrieveFields() {
|
||||||
return fields;
|
return fields;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private volatile PerDocValues perDocValues;
|
||||||
|
|
||||||
|
/** @lucene.internal */
|
||||||
|
void storePerDoc(PerDocValues perDocValues) {
|
||||||
|
this.perDocValues = perDocValues;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** @lucene.internal */
|
||||||
|
PerDocValues retrievePerDoc() {
|
||||||
|
return perDocValues;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A struct like class that represents a hierarchical relationship between
|
* A struct like class that represents a hierarchical relationship between
|
||||||
|
|
|
@ -1999,6 +1999,9 @@ public class IndexWriter implements Closeable {
|
||||||
// will always write to a new generation ("write
|
// will always write to a new generation ("write
|
||||||
// once").
|
// once").
|
||||||
segmentInfos.rollbackSegmentInfos(rollbackSegments);
|
segmentInfos.rollbackSegmentInfos(rollbackSegments);
|
||||||
|
if (infoStream != null ) {
|
||||||
|
message("rollback: infos=" + segString(segmentInfos));
|
||||||
|
}
|
||||||
|
|
||||||
docWriter.abort();
|
docWriter.abort();
|
||||||
|
|
||||||
|
@ -2439,6 +2442,8 @@ public class IndexWriter implements Closeable {
|
||||||
flush(false, true);
|
flush(false, true);
|
||||||
|
|
||||||
String mergedName = newSegmentName();
|
String mergedName = newSegmentName();
|
||||||
|
// TODO: somehow we should fix this merge so it's
|
||||||
|
// abortable so that IW.close(false) is able to stop it
|
||||||
SegmentMerger merger = new SegmentMerger(directory, config.getTermIndexInterval(),
|
SegmentMerger merger = new SegmentMerger(directory, config.getTermIndexInterval(),
|
||||||
mergedName, null, payloadProcessorProvider,
|
mergedName, null, payloadProcessorProvider,
|
||||||
globalFieldNumberMap.newFieldInfos(SegmentCodecsBuilder.create(codecs)));
|
globalFieldNumberMap.newFieldInfos(SegmentCodecsBuilder.create(codecs)));
|
||||||
|
@ -2456,6 +2461,11 @@ public class IndexWriter implements Closeable {
|
||||||
|
|
||||||
boolean useCompoundFile;
|
boolean useCompoundFile;
|
||||||
synchronized(this) { // Guard segmentInfos
|
synchronized(this) { // Guard segmentInfos
|
||||||
|
if (stopMerges) {
|
||||||
|
deleter.deleteNewFiles(info.files());
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
ensureOpen();
|
||||||
useCompoundFile = mergePolicy.useCompoundFile(segmentInfos, info);
|
useCompoundFile = mergePolicy.useCompoundFile(segmentInfos, info);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2471,6 +2481,11 @@ public class IndexWriter implements Closeable {
|
||||||
|
|
||||||
// Register the new segment
|
// Register the new segment
|
||||||
synchronized(this) {
|
synchronized(this) {
|
||||||
|
if (stopMerges) {
|
||||||
|
deleter.deleteNewFiles(info.files());
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
ensureOpen();
|
||||||
segmentInfos.add(info);
|
segmentInfos.add(info);
|
||||||
checkpoint();
|
checkpoint();
|
||||||
}
|
}
|
||||||
|
@ -3076,6 +3091,7 @@ public class IndexWriter implements Closeable {
|
||||||
boolean success = false;
|
boolean success = false;
|
||||||
|
|
||||||
final long t0 = System.currentTimeMillis();
|
final long t0 = System.currentTimeMillis();
|
||||||
|
//System.out.println(Thread.currentThread().getName() + ": merge start: size=" + (merge.estimatedMergeBytes/1024./1024.) + " MB\n merge=" + merge.segString(directory) + "\n idx=" + segString());
|
||||||
|
|
||||||
try {
|
try {
|
||||||
try {
|
try {
|
||||||
|
@ -3116,6 +3132,7 @@ public class IndexWriter implements Closeable {
|
||||||
if (infoStream != null && merge.info != null) {
|
if (infoStream != null && merge.info != null) {
|
||||||
message("merge time " + (System.currentTimeMillis()-t0) + " msec for " + merge.info.docCount + " docs");
|
message("merge time " + (System.currentTimeMillis()-t0) + " msec for " + merge.info.docCount + " docs");
|
||||||
}
|
}
|
||||||
|
//System.out.println(Thread.currentThread().getName() + ": merge end");
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Hook that's called when the specified merge is complete. */
|
/** Hook that's called when the specified merge is complete. */
|
||||||
|
@ -3734,6 +3751,8 @@ public class IndexWriter implements Closeable {
|
||||||
|
|
||||||
assert testPoint("midStartCommit");
|
assert testPoint("midStartCommit");
|
||||||
|
|
||||||
|
boolean pendingCommitSet = false;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
// This call can take a long time -- 10s of seconds
|
// This call can take a long time -- 10s of seconds
|
||||||
// or more. We do it without sync:
|
// or more. We do it without sync:
|
||||||
|
@ -3753,6 +3772,7 @@ public class IndexWriter implements Closeable {
|
||||||
toSync.prepareCommit(directory);
|
toSync.prepareCommit(directory);
|
||||||
|
|
||||||
pendingCommit = toSync;
|
pendingCommit = toSync;
|
||||||
|
pendingCommitSet = true;
|
||||||
pendingCommitChangeCount = myChangeCount;
|
pendingCommitChangeCount = myChangeCount;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3770,7 +3790,7 @@ public class IndexWriter implements Closeable {
|
||||||
// double-write a segments_N file.
|
// double-write a segments_N file.
|
||||||
segmentInfos.updateGeneration(toSync);
|
segmentInfos.updateGeneration(toSync);
|
||||||
|
|
||||||
if (pendingCommit == null) {
|
if (!pendingCommitSet) {
|
||||||
if (infoStream != null) {
|
if (infoStream != null) {
|
||||||
message("hit exception committing segments file");
|
message("hit exception committing segments file");
|
||||||
}
|
}
|
||||||
|
|
|
@ -187,7 +187,7 @@ public final class MultiFields extends Fields {
|
||||||
return fields.terms(field);
|
return fields.terms(field);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Returns {@link DocsEnum} for the specified field &
|
/** Returns {@link DocsEnum} for the specified field &
|
||||||
* term. This may return null if the term does not
|
* term. This may return null if the term does not
|
||||||
* exist. */
|
* exist. */
|
||||||
|
|
|
@ -17,6 +17,7 @@ package org.apache.lucene.index;
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.index.values.MultiIndexDocValues;
|
||||||
import org.apache.lucene.util.PriorityQueue;
|
import org.apache.lucene.util.PriorityQueue;
|
||||||
import org.apache.lucene.util.ReaderUtil;
|
import org.apache.lucene.util.ReaderUtil;
|
||||||
|
|
||||||
|
@ -38,10 +39,14 @@ public final class MultiFieldsEnum extends FieldsEnum {
|
||||||
// Holds sub-readers containing field we are currently
|
// Holds sub-readers containing field we are currently
|
||||||
// on, popped from queue.
|
// on, popped from queue.
|
||||||
private final FieldsEnumWithSlice[] top;
|
private final FieldsEnumWithSlice[] top;
|
||||||
|
private final FieldsEnumWithSlice[] enumWithSlices;
|
||||||
|
|
||||||
private int numTop;
|
private int numTop;
|
||||||
|
|
||||||
// Re-used TermsEnum
|
// Re-used TermsEnum
|
||||||
private final MultiTermsEnum terms;
|
private final MultiTermsEnum terms;
|
||||||
|
private final MultiIndexDocValues docValues;
|
||||||
|
|
||||||
|
|
||||||
private String currentField;
|
private String currentField;
|
||||||
|
|
||||||
|
@ -50,7 +55,9 @@ public final class MultiFieldsEnum extends FieldsEnum {
|
||||||
public MultiFieldsEnum(FieldsEnum[] subs, ReaderUtil.Slice[] subSlices) throws IOException {
|
public MultiFieldsEnum(FieldsEnum[] subs, ReaderUtil.Slice[] subSlices) throws IOException {
|
||||||
terms = new MultiTermsEnum(subSlices);
|
terms = new MultiTermsEnum(subSlices);
|
||||||
queue = new FieldMergeQueue(subs.length);
|
queue = new FieldMergeQueue(subs.length);
|
||||||
|
docValues = new MultiIndexDocValues();
|
||||||
top = new FieldsEnumWithSlice[subs.length];
|
top = new FieldsEnumWithSlice[subs.length];
|
||||||
|
List<FieldsEnumWithSlice> enumWithSlices = new ArrayList<FieldsEnumWithSlice>();
|
||||||
|
|
||||||
// Init q
|
// Init q
|
||||||
for(int i=0;i<subs.length;i++) {
|
for(int i=0;i<subs.length;i++) {
|
||||||
|
@ -59,10 +66,13 @@ public final class MultiFieldsEnum extends FieldsEnum {
|
||||||
if (field != null) {
|
if (field != null) {
|
||||||
// this FieldsEnum has at least one field
|
// this FieldsEnum has at least one field
|
||||||
final FieldsEnumWithSlice sub = new FieldsEnumWithSlice(subs[i], subSlices[i], i);
|
final FieldsEnumWithSlice sub = new FieldsEnumWithSlice(subs[i], subSlices[i], i);
|
||||||
|
enumWithSlices.add(sub);
|
||||||
sub.current = field;
|
sub.current = field;
|
||||||
queue.add(sub);
|
queue.add(sub);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
this.enumWithSlices = enumWithSlices.toArray(FieldsEnumWithSlice.EMPTY_ARRAY);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -114,6 +124,7 @@ public final class MultiFieldsEnum extends FieldsEnum {
|
||||||
}
|
}
|
||||||
|
|
||||||
public final static class FieldsEnumWithSlice {
|
public final static class FieldsEnumWithSlice {
|
||||||
|
public static final FieldsEnumWithSlice[] EMPTY_ARRAY = new FieldsEnumWithSlice[0];
|
||||||
final FieldsEnum fields;
|
final FieldsEnum fields;
|
||||||
final ReaderUtil.Slice slice;
|
final ReaderUtil.Slice slice;
|
||||||
final int index;
|
final int index;
|
||||||
|
|
|
@ -0,0 +1,174 @@
|
||||||
|
package org.apache.lucene.index;
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.TreeSet;
|
||||||
|
import java.util.concurrent.ConcurrentHashMap;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.codecs.PerDocValues;
|
||||||
|
import org.apache.lucene.index.values.IndexDocValues;
|
||||||
|
import org.apache.lucene.index.values.MultiIndexDocValues;
|
||||||
|
import org.apache.lucene.index.values.ValueType;
|
||||||
|
import org.apache.lucene.index.values.MultiIndexDocValues.DocValuesIndex;
|
||||||
|
import org.apache.lucene.util.ReaderUtil;
|
||||||
|
import org.apache.lucene.util.ReaderUtil.Gather;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Exposes per-document values, merged from per-document values API of
|
||||||
|
* sub-segments. This is useful when you're interacting with an {@link IndexReader}
|
||||||
|
* implementation that consists of sequential sub-readers (eg DirectoryReader
|
||||||
|
* or {@link MultiReader}).
|
||||||
|
*
|
||||||
|
* <p>
|
||||||
|
* <b>NOTE</b>: for multi readers, you'll get better performance by gathering
|
||||||
|
* the sub readers using {@link ReaderUtil#gatherSubReaders} and then operate
|
||||||
|
* per-reader, instead of using this class.
|
||||||
|
*
|
||||||
|
* @lucene.experimental
|
||||||
|
*/
|
||||||
|
public class MultiPerDocValues extends PerDocValues {
|
||||||
|
private final PerDocValues[] subs;
|
||||||
|
private final ReaderUtil.Slice[] subSlices;
|
||||||
|
private final Map<String, IndexDocValues> docValues = new ConcurrentHashMap<String, IndexDocValues>();
|
||||||
|
private final TreeSet<String> fields;
|
||||||
|
|
||||||
|
public MultiPerDocValues(PerDocValues[] subs, ReaderUtil.Slice[] subSlices) {
|
||||||
|
this.subs = subs;
|
||||||
|
this.subSlices = subSlices;
|
||||||
|
fields = new TreeSet<String>();
|
||||||
|
for (PerDocValues sub : subs) {
|
||||||
|
fields.addAll(sub.fields());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a single {@link PerDocValues} instance for this reader, merging
|
||||||
|
* their values on the fly. This method will not return <code>null</code>.
|
||||||
|
*
|
||||||
|
* <p>
|
||||||
|
* <b>NOTE</b>: this is a slow way to access postings. It's better to get the
|
||||||
|
* sub-readers (using {@link Gather}) and iterate through them yourself.
|
||||||
|
*/
|
||||||
|
public static PerDocValues getPerDocs(IndexReader r) throws IOException {
|
||||||
|
final IndexReader[] subs = r.getSequentialSubReaders();
|
||||||
|
if (subs == null) {
|
||||||
|
// already an atomic reader
|
||||||
|
return r.perDocValues();
|
||||||
|
} else if (subs.length == 0) {
|
||||||
|
// no fields
|
||||||
|
return null;
|
||||||
|
} else if (subs.length == 1) {
|
||||||
|
return getPerDocs(subs[0]);
|
||||||
|
}
|
||||||
|
PerDocValues perDocValues = r.retrievePerDoc();
|
||||||
|
if (perDocValues == null) {
|
||||||
|
|
||||||
|
final List<PerDocValues> producer = new ArrayList<PerDocValues>();
|
||||||
|
final List<ReaderUtil.Slice> slices = new ArrayList<ReaderUtil.Slice>();
|
||||||
|
|
||||||
|
new ReaderUtil.Gather(r) {
|
||||||
|
@Override
|
||||||
|
protected void add(int base, IndexReader r) throws IOException {
|
||||||
|
final PerDocValues f = r.perDocValues();
|
||||||
|
if (f != null) {
|
||||||
|
producer.add(f);
|
||||||
|
slices
|
||||||
|
.add(new ReaderUtil.Slice(base, r.maxDoc(), producer.size() - 1));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}.run();
|
||||||
|
|
||||||
|
if (producer.size() == 0) {
|
||||||
|
return null;
|
||||||
|
} else if (producer.size() == 1) {
|
||||||
|
perDocValues = producer.get(0);
|
||||||
|
} else {
|
||||||
|
perDocValues = new MultiPerDocValues(
|
||||||
|
producer.toArray(PerDocValues.EMPTY_ARRAY),
|
||||||
|
slices.toArray(ReaderUtil.Slice.EMPTY_ARRAY));
|
||||||
|
}
|
||||||
|
r.storePerDoc(perDocValues);
|
||||||
|
}
|
||||||
|
return perDocValues;
|
||||||
|
}
|
||||||
|
|
||||||
|
public IndexDocValues docValues(String field) throws IOException {
|
||||||
|
IndexDocValues result = docValues.get(field);
|
||||||
|
if (result == null) {
|
||||||
|
// Lazy init: first time this field is requested, we
|
||||||
|
// create & add to docValues:
|
||||||
|
final List<MultiIndexDocValues.DocValuesIndex> docValuesIndex = new ArrayList<MultiIndexDocValues.DocValuesIndex>();
|
||||||
|
int docsUpto = 0;
|
||||||
|
ValueType type = null;
|
||||||
|
// Gather all sub-readers that share this field
|
||||||
|
for (int i = 0; i < subs.length; i++) {
|
||||||
|
IndexDocValues values = subs[i].docValues(field);
|
||||||
|
final int start = subSlices[i].start;
|
||||||
|
final int length = subSlices[i].length;
|
||||||
|
if (values != null) {
|
||||||
|
if (docsUpto != start) {
|
||||||
|
type = values.type();
|
||||||
|
docValuesIndex.add(new MultiIndexDocValues.DocValuesIndex(
|
||||||
|
new MultiIndexDocValues.DummyDocValues(start, type), docsUpto, start
|
||||||
|
- docsUpto));
|
||||||
|
}
|
||||||
|
docValuesIndex.add(new MultiIndexDocValues.DocValuesIndex(values, start,
|
||||||
|
length));
|
||||||
|
docsUpto = start + length;
|
||||||
|
|
||||||
|
} else if (i + 1 == subs.length && !docValuesIndex.isEmpty()) {
|
||||||
|
docValuesIndex.add(new MultiIndexDocValues.DocValuesIndex(
|
||||||
|
new MultiIndexDocValues.DummyDocValues(start, type), docsUpto, start
|
||||||
|
- docsUpto));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (docValuesIndex.isEmpty()) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
result = new MultiIndexDocValues(
|
||||||
|
docValuesIndex.toArray(DocValuesIndex.EMPTY_ARRAY));
|
||||||
|
docValues.put(field, result);
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void close() throws IOException {
|
||||||
|
final PerDocValues[] perDocValues = this.subs;
|
||||||
|
IOException ex = null;
|
||||||
|
for (PerDocValues values : perDocValues) {
|
||||||
|
try {
|
||||||
|
values.close();
|
||||||
|
} catch (IOException e) {
|
||||||
|
if (ex == null) {
|
||||||
|
ex = e;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (ex != null) {
|
||||||
|
throw ex;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Collection<String> fields() {
|
||||||
|
return fields;
|
||||||
|
}
|
||||||
|
}
|
|
@ -24,6 +24,7 @@ import java.util.concurrent.ConcurrentHashMap;
|
||||||
|
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.FieldSelector;
|
import org.apache.lucene.document.FieldSelector;
|
||||||
|
import org.apache.lucene.index.codecs.PerDocValues;
|
||||||
import org.apache.lucene.util.Bits;
|
import org.apache.lucene.util.Bits;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.ReaderUtil;
|
import org.apache.lucene.util.ReaderUtil;
|
||||||
|
@ -403,4 +404,9 @@ public class MultiReader extends IndexReader implements Cloneable {
|
||||||
sub.removeReaderFinishedListener(listener);
|
sub.removeReaderFinishedListener(listener);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public PerDocValues perDocValues() throws IOException {
|
||||||
|
throw new UnsupportedOperationException("please use MultiPerDocValues#getPerDocs, or wrap your IndexReader with SlowMultiReaderWrapper, if you really need a top level Fields");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -21,6 +21,8 @@ import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.FieldSelector;
|
import org.apache.lucene.document.FieldSelector;
|
||||||
import org.apache.lucene.document.FieldSelectorResult;
|
import org.apache.lucene.document.FieldSelectorResult;
|
||||||
import org.apache.lucene.document.Fieldable;
|
import org.apache.lucene.document.Fieldable;
|
||||||
|
import org.apache.lucene.index.codecs.PerDocValues;
|
||||||
|
import org.apache.lucene.index.values.IndexDocValues;
|
||||||
import org.apache.lucene.util.Bits;
|
import org.apache.lucene.util.Bits;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.MapBackedSet;
|
import org.apache.lucene.util.MapBackedSet;
|
||||||
|
@ -60,7 +62,8 @@ public class ParallelReader extends IndexReader {
|
||||||
private int numDocs;
|
private int numDocs;
|
||||||
private boolean hasDeletions;
|
private boolean hasDeletions;
|
||||||
|
|
||||||
private ParallelFields fields = new ParallelFields();
|
private final ParallelFields fields = new ParallelFields();
|
||||||
|
private final ParallelPerDocs perDocs = new ParallelPerDocs();
|
||||||
|
|
||||||
/** Construct a ParallelReader.
|
/** Construct a ParallelReader.
|
||||||
* <p>Note that all subreaders are closed if this ParallelReader is closed.</p>
|
* <p>Note that all subreaders are closed if this ParallelReader is closed.</p>
|
||||||
|
@ -134,6 +137,7 @@ public class ParallelReader extends IndexReader {
|
||||||
fieldToReader.put(field, reader);
|
fieldToReader.put(field, reader);
|
||||||
}
|
}
|
||||||
this.fields.addField(field, reader);
|
this.fields.addField(field, reader);
|
||||||
|
this.perDocs.addField(field, reader);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!ignoreStoredFields)
|
if (!ignoreStoredFields)
|
||||||
|
@ -180,6 +184,7 @@ public class ParallelReader extends IndexReader {
|
||||||
return TermsEnum.EMPTY;
|
return TermsEnum.EMPTY;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Single instance of this, per ParallelReader instance
|
// Single instance of this, per ParallelReader instance
|
||||||
|
@ -187,7 +192,8 @@ public class ParallelReader extends IndexReader {
|
||||||
final HashMap<String,Terms> fields = new HashMap<String,Terms>();
|
final HashMap<String,Terms> fields = new HashMap<String,Terms>();
|
||||||
|
|
||||||
public void addField(String field, IndexReader r) throws IOException {
|
public void addField(String field, IndexReader r) throws IOException {
|
||||||
fields.put(field, MultiFields.getFields(r).terms(field));
|
Fields multiFields = MultiFields.getFields(r);
|
||||||
|
fields.put(field, multiFields.terms(field));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -199,8 +205,8 @@ public class ParallelReader extends IndexReader {
|
||||||
return fields.get(field);
|
return fields.get(field);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Bits getDeletedDocs() {
|
public Bits getDeletedDocs() {
|
||||||
return MultiFields.getDeletedDocs(readers.get(0));
|
return MultiFields.getDeletedDocs(readers.get(0));
|
||||||
}
|
}
|
||||||
|
@ -563,6 +569,36 @@ public class ParallelReader extends IndexReader {
|
||||||
reader.removeReaderFinishedListener(listener);
|
reader.removeReaderFinishedListener(listener);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public PerDocValues perDocValues() throws IOException {
|
||||||
|
return perDocs;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Single instance of this, per ParallelReader instance
|
||||||
|
private static final class ParallelPerDocs extends PerDocValues {
|
||||||
|
final TreeMap<String,IndexDocValues> fields = new TreeMap<String,IndexDocValues>();
|
||||||
|
|
||||||
|
void addField(String field, IndexReader r) throws IOException {
|
||||||
|
PerDocValues perDocs = MultiPerDocValues.getPerDocs(r);
|
||||||
|
fields.put(field, perDocs.docValues(field));
|
||||||
|
}
|
||||||
|
|
||||||
|
//@Override -- not until Java 1.6
|
||||||
|
public void close() throws IOException {
|
||||||
|
// nothing to do here
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public IndexDocValues docValues(String field) throws IOException {
|
||||||
|
return fields.get(field);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Collection<String> fields() {
|
||||||
|
return fields.keySet();
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,70 @@
|
||||||
|
package org.apache.lucene.index;
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
import java.io.PrintStream;
|
||||||
|
import java.util.concurrent.atomic.AtomicLong;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.codecs.PerDocConsumer;
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Encapsulates all necessary state to initiate a {@link PerDocConsumer} and
|
||||||
|
* create all necessary files in order to consume and merge per-document values.
|
||||||
|
*
|
||||||
|
* @lucene.experimental
|
||||||
|
*/
|
||||||
|
public class PerDocWriteState {
|
||||||
|
public final PrintStream infoStream;
|
||||||
|
public final Directory directory;
|
||||||
|
public final String segmentName;
|
||||||
|
public final FieldInfos fieldInfos;
|
||||||
|
public final AtomicLong bytesUsed;
|
||||||
|
public final SegmentCodecs segmentCodecs;
|
||||||
|
public final int codecId;
|
||||||
|
|
||||||
|
PerDocWriteState(PrintStream infoStream, Directory directory,
|
||||||
|
String segmentName, FieldInfos fieldInfos, AtomicLong bytesUsed,
|
||||||
|
int codecId) {
|
||||||
|
this.infoStream = infoStream;
|
||||||
|
this.directory = directory;
|
||||||
|
this.segmentName = segmentName;
|
||||||
|
this.fieldInfos = fieldInfos;
|
||||||
|
this.segmentCodecs = fieldInfos.buildSegmentCodecs(false);
|
||||||
|
this.codecId = codecId;
|
||||||
|
this.bytesUsed = bytesUsed;
|
||||||
|
}
|
||||||
|
|
||||||
|
PerDocWriteState(SegmentWriteState state) {
|
||||||
|
infoStream = state.infoStream;
|
||||||
|
directory = state.directory;
|
||||||
|
segmentCodecs = state.segmentCodecs;
|
||||||
|
segmentName = state.segmentName;
|
||||||
|
fieldInfos = state.fieldInfos;
|
||||||
|
codecId = state.codecId;
|
||||||
|
bytesUsed = new AtomicLong(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
PerDocWriteState(PerDocWriteState state, int codecId) {
|
||||||
|
this.infoStream = state.infoStream;
|
||||||
|
this.directory = state.directory;
|
||||||
|
this.segmentName = state.segmentName;
|
||||||
|
this.fieldInfos = state.fieldInfos;
|
||||||
|
this.segmentCodecs = state.segmentCodecs;
|
||||||
|
this.codecId = codecId;
|
||||||
|
this.bytesUsed = state.bytesUsed;
|
||||||
|
}
|
||||||
|
}
|
|
@ -19,16 +19,22 @@ package org.apache.lucene.index;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collection;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
import java.util.TreeMap;
|
||||||
import java.util.TreeSet;
|
import java.util.TreeSet;
|
||||||
|
|
||||||
import org.apache.lucene.index.codecs.Codec;
|
import org.apache.lucene.index.codecs.Codec;
|
||||||
import org.apache.lucene.index.codecs.FieldsConsumer;
|
import org.apache.lucene.index.codecs.FieldsConsumer;
|
||||||
import org.apache.lucene.index.codecs.FieldsProducer;
|
import org.apache.lucene.index.codecs.FieldsProducer;
|
||||||
|
import org.apache.lucene.index.codecs.PerDocConsumer;
|
||||||
|
import org.apache.lucene.index.codecs.PerDocValues;
|
||||||
import org.apache.lucene.index.codecs.TermsConsumer;
|
import org.apache.lucene.index.codecs.TermsConsumer;
|
||||||
|
import org.apache.lucene.index.codecs.DocValuesConsumer;
|
||||||
|
import org.apache.lucene.index.values.IndexDocValues;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.util.IOUtils;
|
import org.apache.lucene.util.IOUtils;
|
||||||
|
|
||||||
|
@ -64,7 +70,7 @@ final class PerFieldCodecWrapper extends Codec {
|
||||||
for (int i = 0; i < codecs.length; i++) {
|
for (int i = 0; i < codecs.length; i++) {
|
||||||
boolean success = false;
|
boolean success = false;
|
||||||
try {
|
try {
|
||||||
consumers.add(codecs[i].fieldsConsumer(new SegmentWriteState(state, "" + i)));
|
consumers.add(codecs[i].fieldsConsumer(new SegmentWriteState(state, i)));
|
||||||
success = true;
|
success = true;
|
||||||
} finally {
|
} finally {
|
||||||
if (!success) {
|
if (!success) {
|
||||||
|
@ -99,13 +105,13 @@ final class PerFieldCodecWrapper extends Codec {
|
||||||
boolean success = false;
|
boolean success = false;
|
||||||
try {
|
try {
|
||||||
for (FieldInfo fi : fieldInfos) {
|
for (FieldInfo fi : fieldInfos) {
|
||||||
if (fi.isIndexed) { // TODO this does not work for non-indexed fields
|
if (fi.isIndexed) {
|
||||||
fields.add(fi.name);
|
fields.add(fi.name);
|
||||||
assert fi.getCodecId() != FieldInfo.UNASSIGNED_CODEC_ID;
|
assert fi.getCodecId() != FieldInfo.UNASSIGNED_CODEC_ID;
|
||||||
Codec codec = segmentCodecs.codecs[fi.getCodecId()];
|
Codec codec = segmentCodecs.codecs[fi.getCodecId()];
|
||||||
if (!producers.containsKey(codec)) {
|
if (!producers.containsKey(codec)) {
|
||||||
producers.put(codec, codec.fieldsProducer(new SegmentReadState(dir,
|
producers.put(codec, codec.fieldsProducer(new SegmentReadState(dir,
|
||||||
si, fieldInfos, readBufferSize, indexDivisor, ""+fi.getCodecId())));
|
si, fieldInfos, readBufferSize, indexDivisor, fi.getCodecId())));
|
||||||
}
|
}
|
||||||
codecs.put(fi.name, producers.get(codec));
|
codecs.put(fi.name, producers.get(codec));
|
||||||
}
|
}
|
||||||
|
@ -120,6 +126,7 @@ final class PerFieldCodecWrapper extends Codec {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private final class FieldsIterator extends FieldsEnum {
|
private final class FieldsIterator extends FieldsEnum {
|
||||||
private final Iterator<String> it;
|
private final Iterator<String> it;
|
||||||
|
@ -161,7 +168,7 @@ final class PerFieldCodecWrapper extends Codec {
|
||||||
FieldsProducer fields = codecs.get(field);
|
FieldsProducer fields = codecs.get(field);
|
||||||
return fields == null ? null : fields.terms(field);
|
return fields == null ? null : fields.terms(field);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void close() throws IOException {
|
public void close() throws IOException {
|
||||||
IOUtils.closeSafely(false, codecs.values());
|
IOUtils.closeSafely(false, codecs.values());
|
||||||
|
@ -184,7 +191,7 @@ final class PerFieldCodecWrapper extends Codec {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void files(Directory dir, SegmentInfo info, String codecId, Set<String> files)
|
public void files(Directory dir, SegmentInfo info, int codecId, Set<String> files)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
// ignore codecid since segmentCodec will assign it per codec
|
// ignore codecid since segmentCodec will assign it per codec
|
||||||
segmentCodecs.files(dir, info, files);
|
segmentCodecs.files(dir, info, files);
|
||||||
|
@ -196,4 +203,91 @@ final class PerFieldCodecWrapper extends Codec {
|
||||||
codec.getExtensions(extensions);
|
codec.getExtensions(extensions);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public PerDocConsumer docsConsumer(PerDocWriteState state) throws IOException {
|
||||||
|
return new PerDocConsumers(state);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public PerDocValues docsProducer(SegmentReadState state) throws IOException {
|
||||||
|
return new PerDocProducers(state.dir, state.fieldInfos, state.segmentInfo,
|
||||||
|
state.readBufferSize, state.termsIndexDivisor);
|
||||||
|
}
|
||||||
|
|
||||||
|
private final class PerDocProducers extends PerDocValues {
|
||||||
|
private final TreeMap<String, PerDocValues> codecs = new TreeMap<String, PerDocValues>();
|
||||||
|
|
||||||
|
public PerDocProducers(Directory dir, FieldInfos fieldInfos, SegmentInfo si,
|
||||||
|
int readBufferSize, int indexDivisor) throws IOException {
|
||||||
|
final Map<Codec, PerDocValues> producers = new HashMap<Codec, PerDocValues>();
|
||||||
|
boolean success = false;
|
||||||
|
try {
|
||||||
|
for (FieldInfo fi : fieldInfos) {
|
||||||
|
if (fi.hasDocValues()) {
|
||||||
|
assert fi.getCodecId() != FieldInfo.UNASSIGNED_CODEC_ID;
|
||||||
|
Codec codec = segmentCodecs.codecs[fi.getCodecId()];
|
||||||
|
if (!producers.containsKey(codec)) {
|
||||||
|
producers.put(codec, codec.docsProducer(new SegmentReadState(dir,
|
||||||
|
si, fieldInfos, readBufferSize, indexDivisor, fi.getCodecId())));
|
||||||
|
}
|
||||||
|
codecs.put(fi.name, producers.get(codec));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
success = true;
|
||||||
|
} finally {
|
||||||
|
if (!success) {
|
||||||
|
IOUtils.closeSafely(true, producers.values());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Collection<String> fields() {
|
||||||
|
return codecs.keySet();
|
||||||
|
}
|
||||||
|
@Override
|
||||||
|
public IndexDocValues docValues(String field) throws IOException {
|
||||||
|
final PerDocValues perDocProducer = codecs.get(field);
|
||||||
|
if (perDocProducer == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
return perDocProducer.docValues(field);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void close() throws IOException {
|
||||||
|
IOUtils.closeSafely(false, codecs.values());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private final class PerDocConsumers extends PerDocConsumer {
|
||||||
|
private final PerDocConsumer[] consumers;
|
||||||
|
private final Codec[] codecs;
|
||||||
|
private final PerDocWriteState state;
|
||||||
|
|
||||||
|
public PerDocConsumers(PerDocWriteState state) throws IOException {
|
||||||
|
assert segmentCodecs == state.segmentCodecs;
|
||||||
|
this.state = state;
|
||||||
|
codecs = segmentCodecs.codecs;
|
||||||
|
consumers = new PerDocConsumer[codecs.length];
|
||||||
|
}
|
||||||
|
|
||||||
|
public void close() throws IOException {
|
||||||
|
IOUtils.closeSafely(false, consumers);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DocValuesConsumer addValuesField(FieldInfo field) throws IOException {
|
||||||
|
final int codecId = field.getCodecId();
|
||||||
|
assert codecId != FieldInfo.UNASSIGNED_CODEC_ID;
|
||||||
|
PerDocConsumer perDoc = consumers[codecId];
|
||||||
|
if (perDoc == null) {
|
||||||
|
perDoc = codecs[codecId].docsConsumer(new PerDocWriteState(state, codecId));
|
||||||
|
assert perDoc != null;
|
||||||
|
consumers[codecId] = perDoc;
|
||||||
|
}
|
||||||
|
return perDoc.addValuesField(field);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -102,7 +102,7 @@ final class SegmentCodecs implements Cloneable {
|
||||||
throws IOException {
|
throws IOException {
|
||||||
final Codec[] codecArray = codecs;
|
final Codec[] codecArray = codecs;
|
||||||
for (int i = 0; i < codecArray.length; i++) {
|
for (int i = 0; i < codecArray.length; i++) {
|
||||||
codecArray[i].files(dir, info, ""+i, files);
|
codecArray[i].files(dir, info, i, files);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -148,13 +148,6 @@ final class SegmentCodecs implements Cloneable {
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
SegmentCodecsBuilder addAll(FieldInfos infos) {
|
|
||||||
for (FieldInfo fieldInfo : infos) {
|
|
||||||
tryAddAndSet(fieldInfo);
|
|
||||||
}
|
|
||||||
return this;
|
|
||||||
}
|
|
||||||
|
|
||||||
SegmentCodecs build() {
|
SegmentCodecs build() {
|
||||||
return new SegmentCodecs(provider, codecs.toArray(Codec.EMPTY));
|
return new SegmentCodecs(provider, codecs.toArray(Codec.EMPTY));
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,7 +20,9 @@ package org.apache.lucene.index;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.concurrent.atomic.AtomicInteger;
|
import java.util.concurrent.atomic.AtomicInteger;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.codecs.Codec;
|
||||||
import org.apache.lucene.index.codecs.FieldsProducer;
|
import org.apache.lucene.index.codecs.FieldsProducer;
|
||||||
|
import org.apache.lucene.index.codecs.PerDocValues;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
|
|
||||||
/** Holds core readers that are shared (unchanged) when
|
/** Holds core readers that are shared (unchanged) when
|
||||||
|
@ -39,7 +41,8 @@ final class SegmentCoreReaders {
|
||||||
final FieldInfos fieldInfos;
|
final FieldInfos fieldInfos;
|
||||||
|
|
||||||
final FieldsProducer fields;
|
final FieldsProducer fields;
|
||||||
|
final PerDocValues perDocProducer;
|
||||||
|
|
||||||
final Directory dir;
|
final Directory dir;
|
||||||
final Directory cfsDir;
|
final Directory cfsDir;
|
||||||
final int readBufferSize;
|
final int readBufferSize;
|
||||||
|
@ -51,6 +54,8 @@ final class SegmentCoreReaders {
|
||||||
TermVectorsReader termVectorsReaderOrig;
|
TermVectorsReader termVectorsReaderOrig;
|
||||||
CompoundFileReader cfsReader;
|
CompoundFileReader cfsReader;
|
||||||
CompoundFileReader storeCFSReader;
|
CompoundFileReader storeCFSReader;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
SegmentCoreReaders(SegmentReader owner, Directory dir, SegmentInfo si, int readBufferSize, int termsIndexDivisor) throws IOException {
|
SegmentCoreReaders(SegmentReader owner, Directory dir, SegmentInfo si, int readBufferSize, int termsIndexDivisor) throws IOException {
|
||||||
|
|
||||||
|
@ -76,11 +81,12 @@ final class SegmentCoreReaders {
|
||||||
fieldInfos = si.getFieldInfos();
|
fieldInfos = si.getFieldInfos();
|
||||||
|
|
||||||
this.termsIndexDivisor = termsIndexDivisor;
|
this.termsIndexDivisor = termsIndexDivisor;
|
||||||
|
final Codec codec = segmentCodecs.codec();
|
||||||
|
final SegmentReadState segmentReadState = new SegmentReadState(cfsDir, si, fieldInfos, readBufferSize, termsIndexDivisor);
|
||||||
// Ask codec for its Fields
|
// Ask codec for its Fields
|
||||||
fields = segmentCodecs.codec().fieldsProducer(new SegmentReadState(cfsDir, si, fieldInfos, readBufferSize, termsIndexDivisor));
|
fields = codec.fieldsProducer(segmentReadState);
|
||||||
assert fields != null;
|
assert fields != null;
|
||||||
|
perDocProducer = codec.docsProducer(segmentReadState);
|
||||||
success = true;
|
success = true;
|
||||||
} finally {
|
} finally {
|
||||||
if (!success) {
|
if (!success) {
|
||||||
|
@ -119,6 +125,10 @@ final class SegmentCoreReaders {
|
||||||
fields.close();
|
fields.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (perDocProducer != null) {
|
||||||
|
perDocProducer.close();
|
||||||
|
}
|
||||||
|
|
||||||
if (termVectorsReaderOrig != null) {
|
if (termVectorsReaderOrig != null) {
|
||||||
termVectorsReaderOrig.close();
|
termVectorsReaderOrig.close();
|
||||||
}
|
}
|
||||||
|
|
|
@ -17,6 +17,7 @@ package org.apache.lucene.index;
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
@ -652,7 +653,7 @@ public final class SegmentInfo implements Cloneable {
|
||||||
if (delFileName != null && (delGen >= YES || dir.fileExists(delFileName))) {
|
if (delFileName != null && (delGen >= YES || dir.fileExists(delFileName))) {
|
||||||
fileSet.add(delFileName);
|
fileSet.add(delFileName);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (normGen != null) {
|
if (normGen != null) {
|
||||||
for (Entry<Integer,Long> entry : normGen.entrySet()) {
|
for (Entry<Integer,Long> entry : normGen.entrySet()) {
|
||||||
long gen = entry.getValue();
|
long gen = entry.getValue();
|
||||||
|
|
|
@ -29,6 +29,8 @@ import org.apache.lucene.index.MergePolicy.MergeAbortedException;
|
||||||
import org.apache.lucene.index.codecs.Codec;
|
import org.apache.lucene.index.codecs.Codec;
|
||||||
import org.apache.lucene.index.codecs.FieldsConsumer;
|
import org.apache.lucene.index.codecs.FieldsConsumer;
|
||||||
import org.apache.lucene.index.codecs.MergeState;
|
import org.apache.lucene.index.codecs.MergeState;
|
||||||
|
import org.apache.lucene.index.codecs.PerDocConsumer;
|
||||||
|
import org.apache.lucene.index.codecs.PerDocValues;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.store.IndexInput;
|
import org.apache.lucene.store.IndexInput;
|
||||||
import org.apache.lucene.store.IndexOutput;
|
import org.apache.lucene.store.IndexOutput;
|
||||||
|
@ -116,7 +118,6 @@ final class SegmentMerger {
|
||||||
|
|
||||||
if (fieldInfos.hasVectors())
|
if (fieldInfos.hasVectors())
|
||||||
mergeVectors();
|
mergeVectors();
|
||||||
|
|
||||||
return mergedDocs;
|
return mergedDocs;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -154,7 +155,7 @@ final class SegmentMerger {
|
||||||
for (String field : names) {
|
for (String field : names) {
|
||||||
fInfos.addOrUpdate(field, true, storeTermVectors,
|
fInfos.addOrUpdate(field, true, storeTermVectors,
|
||||||
storePositionWithTermVector, storeOffsetWithTermVector, !reader
|
storePositionWithTermVector, storeOffsetWithTermVector, !reader
|
||||||
.hasNorms(field), storePayloads, omitTFAndPositions);
|
.hasNorms(field), storePayloads, omitTFAndPositions, null);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -222,6 +223,7 @@ final class SegmentMerger {
|
||||||
addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.STORES_PAYLOADS), false, false, false, true, false);
|
addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.STORES_PAYLOADS), false, false, false, true, false);
|
||||||
addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.INDEXED), false, false, false, false, false);
|
addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.INDEXED), false, false, false, false, false);
|
||||||
fieldInfos.addOrUpdate(reader.getFieldNames(FieldOption.UNINDEXED), false);
|
fieldInfos.addOrUpdate(reader.getFieldNames(FieldOption.UNINDEXED), false);
|
||||||
|
fieldInfos.addOrUpdate(reader.getFieldNames(FieldOption.DOC_VALUES), false);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
final SegmentCodecs codecInfo = fieldInfos.buildSegmentCodecs(false);
|
final SegmentCodecs codecInfo = fieldInfos.buildSegmentCodecs(false);
|
||||||
|
@ -477,9 +479,16 @@ final class SegmentMerger {
|
||||||
int docBase = 0;
|
int docBase = 0;
|
||||||
|
|
||||||
final List<Fields> fields = new ArrayList<Fields>();
|
final List<Fields> fields = new ArrayList<Fields>();
|
||||||
|
|
||||||
final List<ReaderUtil.Slice> slices = new ArrayList<ReaderUtil.Slice>();
|
final List<ReaderUtil.Slice> slices = new ArrayList<ReaderUtil.Slice>();
|
||||||
final List<Bits> bits = new ArrayList<Bits>();
|
final List<Bits> bits = new ArrayList<Bits>();
|
||||||
final List<Integer> bitsStarts = new ArrayList<Integer>();
|
final List<Integer> bitsStarts = new ArrayList<Integer>();
|
||||||
|
|
||||||
|
// TODO: move this into its own method - this merges currently only docvalues
|
||||||
|
final List<PerDocValues> perDocProducers = new ArrayList<PerDocValues>();
|
||||||
|
final List<ReaderUtil.Slice> perDocSlices = new ArrayList<ReaderUtil.Slice>();
|
||||||
|
final List<Bits> perDocBits = new ArrayList<Bits>();
|
||||||
|
final List<Integer> perDocBitsStarts = new ArrayList<Integer>();
|
||||||
|
|
||||||
for(IndexReader r : readers) {
|
for(IndexReader r : readers) {
|
||||||
final Fields f = r.fields();
|
final Fields f = r.fields();
|
||||||
|
@ -490,10 +499,18 @@ final class SegmentMerger {
|
||||||
bits.add(r.getDeletedDocs());
|
bits.add(r.getDeletedDocs());
|
||||||
bitsStarts.add(docBase);
|
bitsStarts.add(docBase);
|
||||||
}
|
}
|
||||||
|
final PerDocValues producer = r.perDocValues();
|
||||||
|
if (producer != null) {
|
||||||
|
perDocSlices.add(new ReaderUtil.Slice(docBase, maxDoc, fields.size()));
|
||||||
|
perDocProducers.add(producer);
|
||||||
|
perDocBits.add(r.getDeletedDocs());
|
||||||
|
perDocBitsStarts.add(docBase);
|
||||||
|
}
|
||||||
docBase += maxDoc;
|
docBase += maxDoc;
|
||||||
}
|
}
|
||||||
|
|
||||||
bitsStarts.add(docBase);
|
bitsStarts.add(docBase);
|
||||||
|
perDocBitsStarts.add(docBase);
|
||||||
|
|
||||||
// we may gather more readers than mergeState.readerCount
|
// we may gather more readers than mergeState.readerCount
|
||||||
mergeState = new MergeState();
|
mergeState = new MergeState();
|
||||||
|
@ -559,6 +576,20 @@ final class SegmentMerger {
|
||||||
} finally {
|
} finally {
|
||||||
consumer.close();
|
consumer.close();
|
||||||
}
|
}
|
||||||
|
if (!perDocSlices.isEmpty()) {
|
||||||
|
mergeState.multiDeletedDocs = new MultiBits(perDocBits, perDocBitsStarts);
|
||||||
|
final PerDocConsumer docsConsumer = codec
|
||||||
|
.docsConsumer(new PerDocWriteState(segmentWriteState));
|
||||||
|
try {
|
||||||
|
final MultiPerDocValues multiPerDocValues = new MultiPerDocValues(perDocProducers
|
||||||
|
.toArray(PerDocValues.EMPTY_ARRAY), perDocSlices
|
||||||
|
.toArray(ReaderUtil.Slice.EMPTY_ARRAY));
|
||||||
|
docsConsumer.merge(mergeState, multiPerDocValues);
|
||||||
|
} finally {
|
||||||
|
docsConsumer.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private MergeState mergeState;
|
private MergeState mergeState;
|
||||||
|
|
|
@ -34,11 +34,11 @@ public class SegmentReadState {
|
||||||
// that must do so), then it should negate this value to
|
// that must do so), then it should negate this value to
|
||||||
// get the app's terms divisor:
|
// get the app's terms divisor:
|
||||||
public int termsIndexDivisor;
|
public int termsIndexDivisor;
|
||||||
public final String codecId;
|
public final int codecId;
|
||||||
|
|
||||||
public SegmentReadState(Directory dir, SegmentInfo info,
|
public SegmentReadState(Directory dir, SegmentInfo info,
|
||||||
FieldInfos fieldInfos, int readBufferSize, int termsIndexDivisor) {
|
FieldInfos fieldInfos, int readBufferSize, int termsIndexDivisor) {
|
||||||
this(dir, info, fieldInfos, readBufferSize, termsIndexDivisor, "");
|
this(dir, info, fieldInfos, readBufferSize, termsIndexDivisor, -1);
|
||||||
}
|
}
|
||||||
|
|
||||||
public SegmentReadState(Directory dir,
|
public SegmentReadState(Directory dir,
|
||||||
|
@ -46,7 +46,7 @@ public class SegmentReadState {
|
||||||
FieldInfos fieldInfos,
|
FieldInfos fieldInfos,
|
||||||
int readBufferSize,
|
int readBufferSize,
|
||||||
int termsIndexDivisor,
|
int termsIndexDivisor,
|
||||||
String codecId) {
|
int codecId) {
|
||||||
this.dir = dir;
|
this.dir = dir;
|
||||||
this.segmentInfo = info;
|
this.segmentInfo = info;
|
||||||
this.fieldInfos = fieldInfos;
|
this.fieldInfos = fieldInfos;
|
||||||
|
|
|
@ -29,6 +29,8 @@ import java.util.concurrent.atomic.AtomicInteger;
|
||||||
|
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.FieldSelector;
|
import org.apache.lucene.document.FieldSelector;
|
||||||
|
import org.apache.lucene.index.codecs.PerDocValues;
|
||||||
|
import org.apache.lucene.index.values.IndexDocValues;
|
||||||
import org.apache.lucene.store.BufferedIndexInput;
|
import org.apache.lucene.store.BufferedIndexInput;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.store.IndexInput;
|
import org.apache.lucene.store.IndexInput;
|
||||||
|
@ -839,4 +841,9 @@ public class SegmentReader extends IndexReader implements Cloneable {
|
||||||
// longer used (all SegmentReaders sharing it have been
|
// longer used (all SegmentReaders sharing it have been
|
||||||
// closed).
|
// closed).
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public PerDocValues perDocValues() throws IOException {
|
||||||
|
return core.perDocProducer;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -43,7 +43,7 @@ public class SegmentWriteState {
|
||||||
public BitVector deletedDocs;
|
public BitVector deletedDocs;
|
||||||
|
|
||||||
final SegmentCodecs segmentCodecs;
|
final SegmentCodecs segmentCodecs;
|
||||||
public final String codecId;
|
public final int codecId;
|
||||||
|
|
||||||
/** Expert: The fraction of terms in the "dictionary" which should be stored
|
/** Expert: The fraction of terms in the "dictionary" which should be stored
|
||||||
* in RAM. Smaller values use more memory, but make searching slightly
|
* in RAM. Smaller values use more memory, but make searching slightly
|
||||||
|
@ -53,7 +53,7 @@ public class SegmentWriteState {
|
||||||
public int termIndexInterval; // TODO: this should be private to the codec, not settable here or in IWC
|
public int termIndexInterval; // TODO: this should be private to the codec, not settable here or in IWC
|
||||||
|
|
||||||
public SegmentWriteState(PrintStream infoStream, Directory directory, String segmentName, FieldInfos fieldInfos,
|
public SegmentWriteState(PrintStream infoStream, Directory directory, String segmentName, FieldInfos fieldInfos,
|
||||||
int numDocs, int termIndexInterval, SegmentCodecs segmentCodecs, BufferedDeletes segDeletes) {
|
int numDocs, int termIndexInterval, SegmentCodecs segmentCodecs, BufferedDeletes segDeletes) {
|
||||||
this.infoStream = infoStream;
|
this.infoStream = infoStream;
|
||||||
this.segDeletes = segDeletes;
|
this.segDeletes = segDeletes;
|
||||||
this.directory = directory;
|
this.directory = directory;
|
||||||
|
@ -62,13 +62,13 @@ public class SegmentWriteState {
|
||||||
this.numDocs = numDocs;
|
this.numDocs = numDocs;
|
||||||
this.termIndexInterval = termIndexInterval;
|
this.termIndexInterval = termIndexInterval;
|
||||||
this.segmentCodecs = segmentCodecs;
|
this.segmentCodecs = segmentCodecs;
|
||||||
codecId = "";
|
codecId = -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a shallow {@link SegmentWriteState} copy final a codec ID
|
* Create a shallow {@link SegmentWriteState} copy final a codec ID
|
||||||
*/
|
*/
|
||||||
SegmentWriteState(SegmentWriteState state, String codecId) {
|
SegmentWriteState(SegmentWriteState state, int codecId) {
|
||||||
infoStream = state.infoStream;
|
infoStream = state.infoStream;
|
||||||
directory = state.directory;
|
directory = state.directory;
|
||||||
segmentName = state.segmentName;
|
segmentName = state.segmentName;
|
||||||
|
|
|
@ -26,6 +26,7 @@ import org.apache.lucene.util.ReaderUtil; // javadoc
|
||||||
|
|
||||||
import org.apache.lucene.index.DirectoryReader; // javadoc
|
import org.apache.lucene.index.DirectoryReader; // javadoc
|
||||||
import org.apache.lucene.index.MultiReader; // javadoc
|
import org.apache.lucene.index.MultiReader; // javadoc
|
||||||
|
import org.apache.lucene.index.codecs.PerDocValues;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This class forces a composite reader (eg a {@link
|
* This class forces a composite reader (eg a {@link
|
||||||
|
@ -64,6 +65,11 @@ public final class SlowMultiReaderWrapper extends FilterIndexReader {
|
||||||
return MultiFields.getFields(in);
|
return MultiFields.getFields(in);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public PerDocValues perDocValues() throws IOException {
|
||||||
|
return MultiPerDocValues.getPerDocs(in);
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Bits getDeletedDocs() {
|
public Bits getDeletedDocs() {
|
||||||
return MultiFields.getDeletedDocs(in);
|
return MultiFields.getDeletedDocs(in);
|
||||||
|
|
|
@ -108,7 +108,7 @@ public class BlockTermsReader extends FieldsProducer {
|
||||||
//private String segment;
|
//private String segment;
|
||||||
|
|
||||||
public BlockTermsReader(TermsIndexReaderBase indexReader, Directory dir, FieldInfos fieldInfos, String segment, PostingsReaderBase postingsReader, int readBufferSize,
|
public BlockTermsReader(TermsIndexReaderBase indexReader, Directory dir, FieldInfos fieldInfos, String segment, PostingsReaderBase postingsReader, int readBufferSize,
|
||||||
int termsCacheSize, String codecId)
|
int termsCacheSize, int codecId)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
|
|
||||||
this.postingsReader = postingsReader;
|
this.postingsReader = postingsReader;
|
||||||
|
@ -196,7 +196,7 @@ public class BlockTermsReader extends FieldsProducer {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static void files(Directory dir, SegmentInfo segmentInfo, String id, Collection<String> files) {
|
public static void files(Directory dir, SegmentInfo segmentInfo, int id, Collection<String> files) {
|
||||||
files.add(IndexFileNames.segmentFileName(segmentInfo.name, id, BlockTermsWriter.TERMS_EXTENSION));
|
files.add(IndexFileNames.segmentFileName(segmentInfo.name, id, BlockTermsWriter.TERMS_EXTENSION));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -20,6 +20,7 @@ package org.apache.lucene.index.codecs;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.PerDocWriteState;
|
||||||
import org.apache.lucene.index.SegmentInfo;
|
import org.apache.lucene.index.SegmentInfo;
|
||||||
import org.apache.lucene.index.SegmentWriteState;
|
import org.apache.lucene.index.SegmentWriteState;
|
||||||
import org.apache.lucene.index.SegmentReadState;
|
import org.apache.lucene.index.SegmentReadState;
|
||||||
|
@ -50,6 +51,10 @@ public abstract class Codec {
|
||||||
* returns, it must hold open any files it will need to
|
* returns, it must hold open any files it will need to
|
||||||
* use; else, those files may be deleted. */
|
* use; else, those files may be deleted. */
|
||||||
public abstract FieldsProducer fieldsProducer(SegmentReadState state) throws IOException;
|
public abstract FieldsProducer fieldsProducer(SegmentReadState state) throws IOException;
|
||||||
|
|
||||||
|
public abstract PerDocConsumer docsConsumer(PerDocWriteState state) throws IOException;
|
||||||
|
|
||||||
|
public abstract PerDocValues docsProducer(SegmentReadState state) throws IOException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gathers files associated with this segment
|
* Gathers files associated with this segment
|
||||||
|
@ -59,7 +64,7 @@ public abstract class Codec {
|
||||||
* @param id the codec id within this segment
|
* @param id the codec id within this segment
|
||||||
* @param files the of files to add the codec files to.
|
* @param files the of files to add the codec files to.
|
||||||
*/
|
*/
|
||||||
public abstract void files(Directory dir, SegmentInfo segmentInfo, String id, Set<String> files) throws IOException;
|
public abstract void files(Directory dir, SegmentInfo segmentInfo, int id, Set<String> files) throws IOException;
|
||||||
|
|
||||||
/** Records all file extensions this codec uses */
|
/** Records all file extensions this codec uses */
|
||||||
public abstract void getExtensions(Set<String> extensions);
|
public abstract void getExtensions(Set<String> extensions);
|
||||||
|
|
|
@ -22,6 +22,7 @@ import java.util.HashMap;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
import java.util.Map.Entry;
|
||||||
|
|
||||||
/** Holds a set of codecs, keyed by name. You subclass
|
/** Holds a set of codecs, keyed by name. You subclass
|
||||||
* this, instantiate it, and register your codecs, then
|
* this, instantiate it, and register your codecs, then
|
||||||
|
|
|
@ -0,0 +1,104 @@
|
||||||
|
package org.apache.lucene.index.codecs;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Comparator;
|
||||||
|
import java.util.Set;
|
||||||
|
import java.util.concurrent.atomic.AtomicLong;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.FieldInfo;
|
||||||
|
import org.apache.lucene.index.FieldInfos;
|
||||||
|
import org.apache.lucene.index.IndexFileNames;
|
||||||
|
import org.apache.lucene.index.PerDocWriteState;
|
||||||
|
import org.apache.lucene.index.SegmentInfo;
|
||||||
|
import org.apache.lucene.index.values.Writer;
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
|
||||||
|
public class DefaultDocValuesConsumer extends PerDocConsumer {
|
||||||
|
private final String segmentName;
|
||||||
|
private final int codecId;
|
||||||
|
private final Directory directory;
|
||||||
|
private final AtomicLong bytesUsed;
|
||||||
|
private final Comparator<BytesRef> comparator;
|
||||||
|
|
||||||
|
public DefaultDocValuesConsumer(PerDocWriteState state, Comparator<BytesRef> comparator) {
|
||||||
|
this.segmentName = state.segmentName;
|
||||||
|
this.codecId = state.codecId;
|
||||||
|
this.bytesUsed = state.bytesUsed;
|
||||||
|
this.directory = state.directory;
|
||||||
|
this.comparator = comparator;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void close() throws IOException {
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DocValuesConsumer addValuesField(FieldInfo field) throws IOException {
|
||||||
|
return Writer.create(field.getDocValues(),
|
||||||
|
docValuesId(segmentName, codecId, field.number),
|
||||||
|
// TODO can we have a compound file per segment and codec for
|
||||||
|
// docvalues?
|
||||||
|
directory, comparator, bytesUsed);
|
||||||
|
}
|
||||||
|
|
||||||
|
@SuppressWarnings("fallthrough")
|
||||||
|
public static void files(Directory dir, SegmentInfo segmentInfo, int codecId,
|
||||||
|
Set<String> files) throws IOException {
|
||||||
|
FieldInfos fieldInfos = segmentInfo.getFieldInfos();
|
||||||
|
for (FieldInfo fieldInfo : fieldInfos) {
|
||||||
|
if (fieldInfo.getCodecId() == codecId && fieldInfo.hasDocValues()) {
|
||||||
|
String filename = docValuesId(segmentInfo.name, codecId,
|
||||||
|
fieldInfo.number);
|
||||||
|
switch (fieldInfo.getDocValues()) {
|
||||||
|
case BYTES_FIXED_DEREF:
|
||||||
|
case BYTES_VAR_DEREF:
|
||||||
|
case BYTES_VAR_SORTED:
|
||||||
|
case BYTES_FIXED_SORTED:
|
||||||
|
case BYTES_VAR_STRAIGHT:
|
||||||
|
files.add(IndexFileNames.segmentFileName(filename, "",
|
||||||
|
Writer.INDEX_EXTENSION));
|
||||||
|
assert dir.fileExists(IndexFileNames.segmentFileName(filename, "",
|
||||||
|
Writer.INDEX_EXTENSION));
|
||||||
|
// until here all types use an index
|
||||||
|
case BYTES_FIXED_STRAIGHT:
|
||||||
|
case FLOAT_32:
|
||||||
|
case FLOAT_64:
|
||||||
|
case INTS:
|
||||||
|
files.add(IndexFileNames.segmentFileName(filename, "",
|
||||||
|
Writer.DATA_EXTENSION));
|
||||||
|
assert dir.fileExists(IndexFileNames.segmentFileName(filename, "",
|
||||||
|
Writer.DATA_EXTENSION));
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
assert false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static String docValuesId(String segmentsName, int codecID, int fieldId) {
|
||||||
|
return segmentsName + "_" + codecID + "-" + fieldId;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void getDocValuesExtensions(Set<String> extensions) {
|
||||||
|
extensions.add(Writer.DATA_EXTENSION);
|
||||||
|
extensions.add(Writer.INDEX_EXTENSION);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,170 @@
|
||||||
|
package org.apache.lucene.index.codecs;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.TreeMap;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.FieldInfo;
|
||||||
|
import org.apache.lucene.index.FieldInfos;
|
||||||
|
import org.apache.lucene.index.SegmentInfo;
|
||||||
|
import org.apache.lucene.index.values.Bytes;
|
||||||
|
import org.apache.lucene.index.values.IndexDocValues;
|
||||||
|
import org.apache.lucene.index.values.Floats;
|
||||||
|
import org.apache.lucene.index.values.Ints;
|
||||||
|
import org.apache.lucene.index.values.ValueType;
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Abstract base class for FieldsProducer implementations supporting
|
||||||
|
* {@link IndexDocValues}.
|
||||||
|
*
|
||||||
|
* @lucene.experimental
|
||||||
|
*/
|
||||||
|
public class DefaultDocValuesProducer extends PerDocValues {
|
||||||
|
|
||||||
|
protected final TreeMap<String, IndexDocValues> docValues;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a new {@link DefaultDocValuesProducer} instance and loads all
|
||||||
|
* {@link IndexDocValues} instances for this segment and codec.
|
||||||
|
*
|
||||||
|
* @param si
|
||||||
|
* the segment info to load the {@link IndexDocValues} for.
|
||||||
|
* @param dir
|
||||||
|
* the directory to load the {@link IndexDocValues} from.
|
||||||
|
* @param fieldInfo
|
||||||
|
* the {@link FieldInfos}
|
||||||
|
* @param codecId
|
||||||
|
* the codec ID
|
||||||
|
* @throws IOException
|
||||||
|
* if an {@link IOException} occurs
|
||||||
|
*/
|
||||||
|
public DefaultDocValuesProducer(SegmentInfo si, Directory dir,
|
||||||
|
FieldInfos fieldInfo, int codecId) throws IOException {
|
||||||
|
docValues = load(fieldInfo, si.name, si.docCount, dir, codecId);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a {@link IndexDocValues} instance for the given field name or
|
||||||
|
* <code>null</code> if this field has no {@link IndexDocValues}.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public IndexDocValues docValues(String field) throws IOException {
|
||||||
|
return docValues.get(field);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Only opens files... doesn't actually load any values
|
||||||
|
protected TreeMap<String, IndexDocValues> load(FieldInfos fieldInfos,
|
||||||
|
String segment, int docCount, Directory dir, int codecId)
|
||||||
|
throws IOException {
|
||||||
|
TreeMap<String, IndexDocValues> values = new TreeMap<String, IndexDocValues>();
|
||||||
|
boolean success = false;
|
||||||
|
try {
|
||||||
|
|
||||||
|
for (FieldInfo fieldInfo : fieldInfos) {
|
||||||
|
if (codecId == fieldInfo.getCodecId() && fieldInfo.hasDocValues()) {
|
||||||
|
final String field = fieldInfo.name;
|
||||||
|
// TODO can we have a compound file per segment and codec for
|
||||||
|
// docvalues?
|
||||||
|
final String id = DefaultDocValuesConsumer.docValuesId(segment,
|
||||||
|
codecId, fieldInfo.number);
|
||||||
|
values.put(field,
|
||||||
|
loadDocValues(docCount, dir, id, fieldInfo.getDocValues()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
success = true;
|
||||||
|
} finally {
|
||||||
|
if (!success) {
|
||||||
|
// if we fail we must close all opened resources if there are any
|
||||||
|
closeDocValues(values.values());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return values;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Loads a {@link IndexDocValues} instance depending on the given {@link ValueType}.
|
||||||
|
* Codecs that use different implementations for a certain {@link ValueType} can
|
||||||
|
* simply override this method and return their custom implementations.
|
||||||
|
*
|
||||||
|
* @param docCount
|
||||||
|
* number of documents in the segment
|
||||||
|
* @param dir
|
||||||
|
* the {@link Directory} to load the {@link IndexDocValues} from
|
||||||
|
* @param id
|
||||||
|
* the unique file ID within the segment
|
||||||
|
* @param type
|
||||||
|
* the type to load
|
||||||
|
* @return a {@link IndexDocValues} instance for the given type
|
||||||
|
* @throws IOException
|
||||||
|
* if an {@link IOException} occurs
|
||||||
|
* @throws IllegalArgumentException
|
||||||
|
* if the given {@link ValueType} is not supported
|
||||||
|
*/
|
||||||
|
protected IndexDocValues loadDocValues(int docCount, Directory dir, String id,
|
||||||
|
ValueType type) throws IOException {
|
||||||
|
switch (type) {
|
||||||
|
case INTS:
|
||||||
|
return Ints.getValues(dir, id, false);
|
||||||
|
case FLOAT_32:
|
||||||
|
return Floats.getValues(dir, id, docCount);
|
||||||
|
case FLOAT_64:
|
||||||
|
return Floats.getValues(dir, id, docCount);
|
||||||
|
case BYTES_FIXED_STRAIGHT:
|
||||||
|
return Bytes.getValues(dir, id, Bytes.Mode.STRAIGHT, true, docCount);
|
||||||
|
case BYTES_FIXED_DEREF:
|
||||||
|
return Bytes.getValues(dir, id, Bytes.Mode.DEREF, true, docCount);
|
||||||
|
case BYTES_FIXED_SORTED:
|
||||||
|
return Bytes.getValues(dir, id, Bytes.Mode.SORTED, true, docCount);
|
||||||
|
case BYTES_VAR_STRAIGHT:
|
||||||
|
return Bytes.getValues(dir, id, Bytes.Mode.STRAIGHT, false, docCount);
|
||||||
|
case BYTES_VAR_DEREF:
|
||||||
|
return Bytes.getValues(dir, id, Bytes.Mode.DEREF, false, docCount);
|
||||||
|
case BYTES_VAR_SORTED:
|
||||||
|
return Bytes.getValues(dir, id, Bytes.Mode.SORTED, false, docCount);
|
||||||
|
default:
|
||||||
|
throw new IllegalStateException("unrecognized index values mode " + type);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void close() throws IOException {
|
||||||
|
closeDocValues(docValues.values());
|
||||||
|
}
|
||||||
|
|
||||||
|
private void closeDocValues(final Collection<IndexDocValues> values)
|
||||||
|
throws IOException {
|
||||||
|
IOException ex = null;
|
||||||
|
for (IndexDocValues docValues : values) {
|
||||||
|
try {
|
||||||
|
docValues.close();
|
||||||
|
} catch (IOException e) {
|
||||||
|
ex = e;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (ex != null) {
|
||||||
|
throw ex;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Collection<String> fields() {
|
||||||
|
return docValues.keySet();
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,166 @@
|
||||||
|
package org.apache.lucene.index.codecs;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.concurrent.atomic.AtomicLong;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.index.values.IndexDocValues;
|
||||||
|
import org.apache.lucene.index.values.PerDocFieldValues;
|
||||||
|
import org.apache.lucene.index.values.Writer;
|
||||||
|
import org.apache.lucene.util.Bits;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Abstract API that consumes {@link PerDocFieldValues}.
|
||||||
|
* {@link DocValuesConsumer} are always associated with a specific field and
|
||||||
|
* segments. Concrete implementations of this API write the given
|
||||||
|
* {@link PerDocFieldValues} into a implementation specific format depending on
|
||||||
|
* the fields meta-data.
|
||||||
|
*
|
||||||
|
* @lucene.experimental
|
||||||
|
*/
|
||||||
|
public abstract class DocValuesConsumer {
|
||||||
|
|
||||||
|
protected final AtomicLong bytesUsed;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a new {@link DocValuesConsumer}.
|
||||||
|
*
|
||||||
|
* @param bytesUsed
|
||||||
|
* bytes-usage tracking reference used by implementation to track
|
||||||
|
* internally allocated memory. All tracked bytes must be released
|
||||||
|
* once {@link #finish(int)} has been called.
|
||||||
|
*/
|
||||||
|
protected DocValuesConsumer(AtomicLong bytesUsed) {
|
||||||
|
this.bytesUsed = bytesUsed == null ? new AtomicLong(0) : bytesUsed;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Adds the given {@link PerDocFieldValues} instance to this
|
||||||
|
* {@link DocValuesConsumer}
|
||||||
|
*
|
||||||
|
* @param docID
|
||||||
|
* the document ID to add the value for. The docID must always
|
||||||
|
* increase or be <tt>0</tt> if it is the first call to this method.
|
||||||
|
* @param docValues
|
||||||
|
* the values to add
|
||||||
|
* @throws IOException
|
||||||
|
* if an {@link IOException} occurs
|
||||||
|
*/
|
||||||
|
public abstract void add(int docID, PerDocFieldValues docValues)
|
||||||
|
throws IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Called when the consumer of this API is doc with adding
|
||||||
|
* {@link PerDocFieldValues} to this {@link DocValuesConsumer}
|
||||||
|
*
|
||||||
|
* @param docCount
|
||||||
|
* the total number of documents in this {@link DocValuesConsumer}.
|
||||||
|
* Must be greater than or equal the last given docID to
|
||||||
|
* {@link #add(int, PerDocFieldValues)}.
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public abstract void finish(int docCount) throws IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gathers files associated with this {@link DocValuesConsumer}
|
||||||
|
*
|
||||||
|
* @param files
|
||||||
|
* the of files to add the consumers files to.
|
||||||
|
*/
|
||||||
|
public abstract void files(Collection<String> files) throws IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Merges the given {@link org.apache.lucene.index.codecs.MergeState} into
|
||||||
|
* this {@link DocValuesConsumer}.
|
||||||
|
*
|
||||||
|
* @param mergeState
|
||||||
|
* the state to merge
|
||||||
|
* @param values
|
||||||
|
* the docValues to merge in
|
||||||
|
* @throws IOException
|
||||||
|
* if an {@link IOException} occurs
|
||||||
|
*/
|
||||||
|
public void merge(org.apache.lucene.index.codecs.MergeState mergeState,
|
||||||
|
IndexDocValues values) throws IOException {
|
||||||
|
assert mergeState != null;
|
||||||
|
// TODO we need some kind of compatibility notation for values such
|
||||||
|
// that two slightly different segments can be merged eg. fixed vs.
|
||||||
|
// variable byte len or float32 vs. float64
|
||||||
|
int docBase = 0;
|
||||||
|
boolean merged = false;
|
||||||
|
/*
|
||||||
|
* We ignore the given DocValues here and merge from the subReaders directly
|
||||||
|
* to support bulk copies on the DocValues Writer level. if this gets merged
|
||||||
|
* with MultiDocValues the writer can not optimize for bulk-copyable data
|
||||||
|
*/
|
||||||
|
for (final IndexReader reader : mergeState.readers) {
|
||||||
|
final IndexDocValues r = reader.docValues(mergeState.fieldInfo.name);
|
||||||
|
if (r != null) {
|
||||||
|
merged = true;
|
||||||
|
merge(new Writer.MergeState(r, docBase, reader.maxDoc(), reader
|
||||||
|
.getDeletedDocs()));
|
||||||
|
}
|
||||||
|
docBase += reader.numDocs();
|
||||||
|
}
|
||||||
|
if (merged) {
|
||||||
|
finish(mergeState.mergedDocCount);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Merges the given {@link MergeState} into this {@link DocValuesConsumer}.
|
||||||
|
* {@link MergeState#docBase} must always be increasing. Merging segments out
|
||||||
|
* of order is not supported.
|
||||||
|
*
|
||||||
|
* @param mergeState
|
||||||
|
* the {@link MergeState} to merge
|
||||||
|
* @throws IOException
|
||||||
|
* if an {@link IOException} occurs
|
||||||
|
*/
|
||||||
|
protected abstract void merge(MergeState mergeState) throws IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Specialized auxiliary MergeState is necessary since we don't want to
|
||||||
|
* exploit internals up to the codecs consumer. An instance of this class is
|
||||||
|
* created for each merged low level {@link IndexReader} we are merging to
|
||||||
|
* support low level bulk copies.
|
||||||
|
*/
|
||||||
|
public static class MergeState {
|
||||||
|
/**
|
||||||
|
* the source reader for this MergeState - merged values should be read from
|
||||||
|
* this instance
|
||||||
|
*/
|
||||||
|
public final IndexDocValues reader;
|
||||||
|
/** the absolute docBase for this MergeState within the resulting segment */
|
||||||
|
public final int docBase;
|
||||||
|
/** the number of documents in this MergeState */
|
||||||
|
public final int docCount;
|
||||||
|
/** the deleted bits for this MergeState */
|
||||||
|
public final Bits bits;
|
||||||
|
|
||||||
|
public MergeState(IndexDocValues reader, int docBase, int docCount, Bits bits) {
|
||||||
|
assert reader != null;
|
||||||
|
this.reader = reader;
|
||||||
|
this.docBase = docBase;
|
||||||
|
this.docCount = docCount;
|
||||||
|
this.bits = bits;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -20,6 +20,7 @@ package org.apache.lucene.index.codecs;
|
||||||
import org.apache.lucene.index.FieldInfo;
|
import org.apache.lucene.index.FieldInfo;
|
||||||
import org.apache.lucene.index.Fields;
|
import org.apache.lucene.index.Fields;
|
||||||
import org.apache.lucene.index.FieldsEnum;
|
import org.apache.lucene.index.FieldsEnum;
|
||||||
|
import org.apache.lucene.index.TermsEnum;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.Closeable;
|
import java.io.Closeable;
|
||||||
|
@ -35,7 +36,7 @@ public abstract class FieldsConsumer implements Closeable {
|
||||||
|
|
||||||
/** Add a new field */
|
/** Add a new field */
|
||||||
public abstract TermsConsumer addField(FieldInfo field) throws IOException;
|
public abstract TermsConsumer addField(FieldInfo field) throws IOException;
|
||||||
|
|
||||||
/** Called when we are done adding everything. */
|
/** Called when we are done adding everything. */
|
||||||
public abstract void close() throws IOException;
|
public abstract void close() throws IOException;
|
||||||
|
|
||||||
|
@ -45,8 +46,12 @@ public abstract class FieldsConsumer implements Closeable {
|
||||||
String field;
|
String field;
|
||||||
while((field = fieldsEnum.next()) != null) {
|
while((field = fieldsEnum.next()) != null) {
|
||||||
mergeState.fieldInfo = mergeState.fieldInfos.fieldInfo(field);
|
mergeState.fieldInfo = mergeState.fieldInfos.fieldInfo(field);
|
||||||
final TermsConsumer termsConsumer = addField(mergeState.fieldInfo);
|
assert mergeState.fieldInfo != null : "FieldInfo for field is null: "+ field;
|
||||||
termsConsumer.merge(mergeState, fieldsEnum.terms());
|
TermsEnum terms = fieldsEnum.terms();
|
||||||
|
if (terms != null) {
|
||||||
|
final TermsConsumer termsConsumer = addField(mergeState.fieldInfo);
|
||||||
|
termsConsumer.merge(mergeState, terms);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -17,10 +17,12 @@ package org.apache.lucene.index.codecs;
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import org.apache.lucene.index.Fields;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.Closeable;
|
import java.io.Closeable;
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.Fields;
|
||||||
|
import org.apache.lucene.index.FieldsEnum;
|
||||||
|
import org.apache.lucene.index.Terms;
|
||||||
|
|
||||||
/** Abstract API that consumes terms, doc, freq, prox and
|
/** Abstract API that consumes terms, doc, freq, prox and
|
||||||
* payloads postings. Concrete implementations of this
|
* payloads postings. Concrete implementations of this
|
||||||
|
@ -33,4 +35,28 @@ import java.io.Closeable;
|
||||||
public abstract class FieldsProducer extends Fields implements Closeable {
|
public abstract class FieldsProducer extends Fields implements Closeable {
|
||||||
public abstract void close() throws IOException;
|
public abstract void close() throws IOException;
|
||||||
public abstract void loadTermsIndex(int indexDivisor) throws IOException;
|
public abstract void loadTermsIndex(int indexDivisor) throws IOException;
|
||||||
|
|
||||||
|
public static final FieldsProducer EMPTY = new FieldsProducer() {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Terms terms(String field) throws IOException {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public FieldsEnum iterator() throws IOException {
|
||||||
|
return FieldsEnum.EMPTY;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void loadTermsIndex(int indexDivisor) throws IOException {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void close() throws IOException {
|
||||||
|
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -68,7 +68,7 @@ public class FixedGapTermsIndexReader extends TermsIndexReaderBase {
|
||||||
// start of the field info data
|
// start of the field info data
|
||||||
protected long dirOffset;
|
protected long dirOffset;
|
||||||
|
|
||||||
public FixedGapTermsIndexReader(Directory dir, FieldInfos fieldInfos, String segment, int indexDivisor, Comparator<BytesRef> termComp, String codecId)
|
public FixedGapTermsIndexReader(Directory dir, FieldInfos fieldInfos, String segment, int indexDivisor, Comparator<BytesRef> termComp, int codecId)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
|
|
||||||
this.termComp = termComp;
|
this.termComp = termComp;
|
||||||
|
@ -406,7 +406,7 @@ public class FixedGapTermsIndexReader extends TermsIndexReaderBase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static void files(Directory dir, SegmentInfo info, String id, Collection<String> files) {
|
public static void files(Directory dir, SegmentInfo info, int id, Collection<String> files) {
|
||||||
files.add(IndexFileNames.segmentFileName(info.name, id, FixedGapTermsIndexWriter.TERMS_INDEX_EXTENSION));
|
files.add(IndexFileNames.segmentFileName(info.name, id, FixedGapTermsIndexWriter.TERMS_INDEX_EXTENSION));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,67 @@
|
||||||
|
package org.apache.lucene.index.codecs;
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with this
|
||||||
|
* work for additional information regarding copyright ownership. The ASF
|
||||||
|
* licenses this file to You under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||||
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
|
* License for the specific language governing permissions and limitations under
|
||||||
|
* the License.
|
||||||
|
*/
|
||||||
|
import java.io.Closeable;
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.FieldInfo;
|
||||||
|
import org.apache.lucene.index.values.IndexDocValues;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Abstract API that consumes per document values. Concrete implementations of
|
||||||
|
* this convert field values into a Codec specific format during indexing.
|
||||||
|
* <p>
|
||||||
|
* The {@link PerDocConsumer} API is accessible through the
|
||||||
|
* {@link Codec} - API providing per field consumers and producers for inverted
|
||||||
|
* data (terms, postings) as well as per-document data.
|
||||||
|
*
|
||||||
|
* @lucene.experimental
|
||||||
|
*/
|
||||||
|
public abstract class PerDocConsumer implements Closeable{
|
||||||
|
/** Adds a new DocValuesField */
|
||||||
|
public abstract DocValuesConsumer addValuesField(FieldInfo field)
|
||||||
|
throws IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Consumes and merges the given {@link PerDocValues} producer
|
||||||
|
* into this consumers format.
|
||||||
|
*/
|
||||||
|
public void merge(MergeState mergeState, PerDocValues producer)
|
||||||
|
throws IOException {
|
||||||
|
Iterable<String> fields = producer.fields();
|
||||||
|
for (String field : fields) {
|
||||||
|
mergeState.fieldInfo = mergeState.fieldInfos.fieldInfo(field);
|
||||||
|
assert mergeState.fieldInfo != null : "FieldInfo for field is null: "
|
||||||
|
+ field;
|
||||||
|
if (mergeState.fieldInfo.hasDocValues()) {
|
||||||
|
final IndexDocValues docValues = producer.docValues(field);
|
||||||
|
if (docValues == null) {
|
||||||
|
/*
|
||||||
|
* It is actually possible that a fieldInfo has a values type but no
|
||||||
|
* values are actually available. this can happen if there are already
|
||||||
|
* segments without values around.
|
||||||
|
*/
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
final DocValuesConsumer docValuesConsumer = addValuesField(mergeState.fieldInfo);
|
||||||
|
assert docValuesConsumer != null;
|
||||||
|
docValuesConsumer.merge(mergeState, docValues);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,55 @@
|
||||||
|
package org.apache.lucene.index.codecs;
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
import java.io.Closeable;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Collection;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.values.IndexDocValues;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Abstract API that provides access to one or more per-document storage
|
||||||
|
* features. The concrete implementations provide access to the underlying
|
||||||
|
* storage on a per-document basis corresponding to their actual
|
||||||
|
* {@link PerDocConsumer} counterpart.
|
||||||
|
* <p>
|
||||||
|
* The {@link PerDocValues} API is accessible through the
|
||||||
|
* {@link Codec} - API providing per field consumers and producers for inverted
|
||||||
|
* data (terms, postings) as well as per-document data.
|
||||||
|
*
|
||||||
|
* @lucene.experimental
|
||||||
|
*/
|
||||||
|
public abstract class PerDocValues implements Closeable {
|
||||||
|
/**
|
||||||
|
* Returns {@link IndexDocValues} for the current field.
|
||||||
|
*
|
||||||
|
* @param field
|
||||||
|
* the field name
|
||||||
|
* @return the {@link IndexDocValues} for this field or <code>null</code> if not
|
||||||
|
* applicable.
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public abstract IndexDocValues docValues(String field) throws IOException;
|
||||||
|
|
||||||
|
public static final PerDocValues[] EMPTY_ARRAY = new PerDocValues[0];
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns all fields this {@link PerDocValues} contains values for.
|
||||||
|
*/
|
||||||
|
public abstract Collection<String> fields();
|
||||||
|
|
||||||
|
}
|
|
@ -57,8 +57,7 @@ public class VariableGapTermsIndexReader extends TermsIndexReaderBase {
|
||||||
protected long dirOffset;
|
protected long dirOffset;
|
||||||
|
|
||||||
final String segment;
|
final String segment;
|
||||||
|
public VariableGapTermsIndexReader(Directory dir, FieldInfos fieldInfos, String segment, int indexDivisor, int codecId)
|
||||||
public VariableGapTermsIndexReader(Directory dir, FieldInfos fieldInfos, String segment, int indexDivisor, String codecId)
|
|
||||||
throws IOException {
|
throws IOException {
|
||||||
|
|
||||||
in = dir.openInput(IndexFileNames.segmentFileName(segment, codecId, VariableGapTermsIndexWriter.TERMS_INDEX_EXTENSION));
|
in = dir.openInput(IndexFileNames.segmentFileName(segment, codecId, VariableGapTermsIndexWriter.TERMS_INDEX_EXTENSION));
|
||||||
|
@ -159,15 +158,11 @@ public class VariableGapTermsIndexReader extends TermsIndexReaderBase {
|
||||||
|
|
||||||
private final class FieldIndexData {
|
private final class FieldIndexData {
|
||||||
|
|
||||||
private final FieldInfo fieldInfo;
|
|
||||||
private final long indexStart;
|
private final long indexStart;
|
||||||
|
|
||||||
// Set only if terms index is loaded:
|
// Set only if terms index is loaded:
|
||||||
private volatile FST<Long> fst;
|
private volatile FST<Long> fst;
|
||||||
|
|
||||||
public FieldIndexData(FieldInfo fieldInfo, long indexStart) throws IOException {
|
public FieldIndexData(FieldInfo fieldInfo, long indexStart) throws IOException {
|
||||||
|
|
||||||
this.fieldInfo = fieldInfo;
|
|
||||||
this.indexStart = indexStart;
|
this.indexStart = indexStart;
|
||||||
|
|
||||||
if (indexDivisor > 0) {
|
if (indexDivisor > 0) {
|
||||||
|
@ -241,7 +236,7 @@ public class VariableGapTermsIndexReader extends TermsIndexReaderBase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static void files(Directory dir, SegmentInfo info, String id, Collection<String> files) {
|
public static void files(Directory dir, SegmentInfo info, int id, Collection<String> files) {
|
||||||
files.add(IndexFileNames.segmentFileName(info.name, id, VariableGapTermsIndexWriter.TERMS_INDEX_EXTENSION));
|
files.add(IndexFileNames.segmentFileName(info.name, id, VariableGapTermsIndexWriter.TERMS_INDEX_EXTENSION));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -22,11 +22,14 @@ import java.io.IOException;
|
||||||
|
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.index.codecs.Codec;
|
import org.apache.lucene.index.codecs.Codec;
|
||||||
|
import org.apache.lucene.index.PerDocWriteState;
|
||||||
import org.apache.lucene.index.SegmentInfo;
|
import org.apache.lucene.index.SegmentInfo;
|
||||||
import org.apache.lucene.index.SegmentWriteState;
|
import org.apache.lucene.index.SegmentWriteState;
|
||||||
import org.apache.lucene.index.SegmentReadState;
|
import org.apache.lucene.index.SegmentReadState;
|
||||||
import org.apache.lucene.index.codecs.FieldsConsumer;
|
import org.apache.lucene.index.codecs.FieldsConsumer;
|
||||||
import org.apache.lucene.index.codecs.FieldsProducer;
|
import org.apache.lucene.index.codecs.FieldsProducer;
|
||||||
|
import org.apache.lucene.index.codecs.PerDocConsumer;
|
||||||
|
import org.apache.lucene.index.codecs.PerDocValues;
|
||||||
|
|
||||||
/** Codec that reads the pre-flex-indexing postings
|
/** Codec that reads the pre-flex-indexing postings
|
||||||
* format. It does not provide a writer because newly
|
* format. It does not provide a writer because newly
|
||||||
|
@ -66,7 +69,7 @@ public class PreFlexCodec extends Codec {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void files(Directory dir, SegmentInfo info, String id, Set<String> files) throws IOException {
|
public void files(Directory dir, SegmentInfo info, int id, Set<String> files) throws IOException {
|
||||||
// preflex fields have no codec ID - we ignore it here
|
// preflex fields have no codec ID - we ignore it here
|
||||||
PreFlexFields.files(dir, info, files);
|
PreFlexFields.files(dir, info, files);
|
||||||
}
|
}
|
||||||
|
@ -78,4 +81,14 @@ public class PreFlexCodec extends Codec {
|
||||||
extensions.add(TERMS_EXTENSION);
|
extensions.add(TERMS_EXTENSION);
|
||||||
extensions.add(TERMS_INDEX_EXTENSION);
|
extensions.add(TERMS_INDEX_EXTENSION);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public PerDocConsumer docsConsumer(PerDocWriteState state) throws IOException {
|
||||||
|
throw new UnsupportedOperationException("PerDocConsumer is not supported by Preflex codec");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public PerDocValues docsProducer(SegmentReadState state) throws IOException {
|
||||||
|
throw new UnsupportedOperationException("PerDocValues is not supported by Preflex codec");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -153,8 +153,12 @@ public final class SegmentTermEnum implements Cloneable {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Optimized scan, without allocating new terms.
|
/* Optimized scan, without allocating new terms.
|
||||||
* Return number of invocations to next(). */
|
* Return number of invocations to next().
|
||||||
|
*
|
||||||
|
* NOTE: LUCENE-3183: if you pass Term("", "") here then this
|
||||||
|
* will incorrectly return before positioning the enum,
|
||||||
|
* and position will be -1; caller must detect this. */
|
||||||
final int scanTo(Term term) throws IOException {
|
final int scanTo(Term term) throws IOException {
|
||||||
scanBuffer.set(term);
|
scanBuffer.set(term);
|
||||||
int count = 0;
|
int count = 0;
|
||||||
|
|
|
@ -57,6 +57,7 @@ public final class TermInfosReader {
|
||||||
final long termOrd;
|
final long termOrd;
|
||||||
public TermInfoAndOrd(TermInfo ti, long termOrd) {
|
public TermInfoAndOrd(TermInfo ti, long termOrd) {
|
||||||
super(ti);
|
super(ti);
|
||||||
|
assert termOrd >= 0;
|
||||||
this.termOrd = termOrd;
|
this.termOrd = termOrd;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -306,7 +307,13 @@ public final class TermInfosReader {
|
||||||
ti = enumerator.termInfo;
|
ti = enumerator.termInfo;
|
||||||
if (tiOrd == null) {
|
if (tiOrd == null) {
|
||||||
if (useCache) {
|
if (useCache) {
|
||||||
termsCache.put(new CloneableTerm(term), new TermInfoAndOrd(ti, enumerator.position));
|
// LUCENE-3183: it's possible, if term is Term("",
|
||||||
|
// ""), for the STE to be incorrectly un-positioned
|
||||||
|
// after scan-to; work around this by not caching in
|
||||||
|
// this case:
|
||||||
|
if (enumerator.position >= 0) {
|
||||||
|
termsCache.put(new CloneableTerm(term), new TermInfoAndOrd(ti, enumerator.position));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
assert sameTermInfo(ti, tiOrd, enumerator);
|
assert sameTermInfo(ti, tiOrd, enumerator);
|
||||||
|
|
|
@ -20,6 +20,7 @@ package org.apache.lucene.index.codecs.pulsing;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.PerDocWriteState;
|
||||||
import org.apache.lucene.index.SegmentInfo;
|
import org.apache.lucene.index.SegmentInfo;
|
||||||
import org.apache.lucene.index.SegmentWriteState;
|
import org.apache.lucene.index.SegmentWriteState;
|
||||||
import org.apache.lucene.index.SegmentReadState;
|
import org.apache.lucene.index.SegmentReadState;
|
||||||
|
@ -28,8 +29,12 @@ import org.apache.lucene.index.codecs.PostingsWriterBase;
|
||||||
import org.apache.lucene.index.codecs.standard.StandardPostingsWriter;
|
import org.apache.lucene.index.codecs.standard.StandardPostingsWriter;
|
||||||
import org.apache.lucene.index.codecs.PostingsReaderBase;
|
import org.apache.lucene.index.codecs.PostingsReaderBase;
|
||||||
import org.apache.lucene.index.codecs.standard.StandardPostingsReader;
|
import org.apache.lucene.index.codecs.standard.StandardPostingsReader;
|
||||||
|
import org.apache.lucene.index.codecs.DefaultDocValuesProducer;
|
||||||
import org.apache.lucene.index.codecs.FieldsConsumer;
|
import org.apache.lucene.index.codecs.FieldsConsumer;
|
||||||
import org.apache.lucene.index.codecs.FieldsProducer;
|
import org.apache.lucene.index.codecs.FieldsProducer;
|
||||||
|
import org.apache.lucene.index.codecs.PerDocConsumer;
|
||||||
|
import org.apache.lucene.index.codecs.DefaultDocValuesConsumer;
|
||||||
|
import org.apache.lucene.index.codecs.PerDocValues;
|
||||||
import org.apache.lucene.index.codecs.VariableGapTermsIndexReader;
|
import org.apache.lucene.index.codecs.VariableGapTermsIndexReader;
|
||||||
import org.apache.lucene.index.codecs.VariableGapTermsIndexWriter;
|
import org.apache.lucene.index.codecs.VariableGapTermsIndexWriter;
|
||||||
import org.apache.lucene.index.codecs.BlockTermsReader;
|
import org.apache.lucene.index.codecs.BlockTermsReader;
|
||||||
|
@ -38,6 +43,7 @@ import org.apache.lucene.index.codecs.TermsIndexReaderBase;
|
||||||
import org.apache.lucene.index.codecs.TermsIndexWriterBase;
|
import org.apache.lucene.index.codecs.TermsIndexWriterBase;
|
||||||
import org.apache.lucene.index.codecs.standard.StandardCodec;
|
import org.apache.lucene.index.codecs.standard.StandardCodec;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.IOUtils;
|
import org.apache.lucene.util.IOUtils;
|
||||||
|
|
||||||
/** This codec "inlines" the postings for terms that have
|
/** This codec "inlines" the postings for terms that have
|
||||||
|
@ -147,14 +153,26 @@ public class PulsingCodec extends Codec {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void files(Directory dir, SegmentInfo segmentInfo, String id, Set<String> files) throws IOException {
|
public void files(Directory dir, SegmentInfo segmentInfo, int id, Set<String> files) throws IOException {
|
||||||
StandardPostingsReader.files(dir, segmentInfo, id, files);
|
StandardPostingsReader.files(dir, segmentInfo, id, files);
|
||||||
BlockTermsReader.files(dir, segmentInfo, id, files);
|
BlockTermsReader.files(dir, segmentInfo, id, files);
|
||||||
VariableGapTermsIndexReader.files(dir, segmentInfo, id, files);
|
VariableGapTermsIndexReader.files(dir, segmentInfo, id, files);
|
||||||
|
DefaultDocValuesConsumer.files(dir, segmentInfo, id, files);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void getExtensions(Set<String> extensions) {
|
public void getExtensions(Set<String> extensions) {
|
||||||
StandardCodec.getStandardExtensions(extensions);
|
StandardCodec.getStandardExtensions(extensions);
|
||||||
|
DefaultDocValuesConsumer.getDocValuesExtensions(extensions);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public PerDocConsumer docsConsumer(PerDocWriteState state) throws IOException {
|
||||||
|
return new DefaultDocValuesConsumer(state, BytesRef.getUTF8SortedAsUnicodeComparator());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public PerDocValues docsProducer(SegmentReadState state) throws IOException {
|
||||||
|
return new DefaultDocValuesProducer(state.segmentInfo, state.dir, state.fieldInfos, state.codecId);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -58,8 +58,7 @@ public class SepPostingsReaderImpl extends PostingsReaderBase {
|
||||||
int maxSkipLevels;
|
int maxSkipLevels;
|
||||||
int skipMinimum;
|
int skipMinimum;
|
||||||
|
|
||||||
public SepPostingsReaderImpl(Directory dir, SegmentInfo segmentInfo, int readBufferSize, IntStreamFactory intFactory, String codecId) throws IOException {
|
public SepPostingsReaderImpl(Directory dir, SegmentInfo segmentInfo, int readBufferSize, IntStreamFactory intFactory, int codecId) throws IOException {
|
||||||
|
|
||||||
boolean success = false;
|
boolean success = false;
|
||||||
try {
|
try {
|
||||||
|
|
||||||
|
@ -85,7 +84,7 @@ public class SepPostingsReaderImpl extends PostingsReaderBase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static void files(SegmentInfo segmentInfo, String codecId, Collection<String> files) throws IOException {
|
public static void files(SegmentInfo segmentInfo, int codecId, Collection<String> files) throws IOException {
|
||||||
files.add(IndexFileNames.segmentFileName(segmentInfo.name, codecId, SepPostingsWriterImpl.DOC_EXTENSION));
|
files.add(IndexFileNames.segmentFileName(segmentInfo.name, codecId, SepPostingsWriterImpl.DOC_EXTENSION));
|
||||||
files.add(IndexFileNames.segmentFileName(segmentInfo.name, codecId, SepPostingsWriterImpl.SKIP_EXTENSION));
|
files.add(IndexFileNames.segmentFileName(segmentInfo.name, codecId, SepPostingsWriterImpl.SKIP_EXTENSION));
|
||||||
|
|
||||||
|
|
|
@ -20,14 +20,20 @@ package org.apache.lucene.index.codecs.simpletext;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.PerDocWriteState;
|
||||||
import org.apache.lucene.index.SegmentInfo;
|
import org.apache.lucene.index.SegmentInfo;
|
||||||
import org.apache.lucene.index.SegmentWriteState;
|
import org.apache.lucene.index.SegmentWriteState;
|
||||||
import org.apache.lucene.index.SegmentReadState;
|
import org.apache.lucene.index.SegmentReadState;
|
||||||
import org.apache.lucene.index.IndexFileNames;
|
import org.apache.lucene.index.IndexFileNames;
|
||||||
import org.apache.lucene.index.codecs.Codec;
|
import org.apache.lucene.index.codecs.Codec;
|
||||||
|
import org.apache.lucene.index.codecs.DefaultDocValuesProducer;
|
||||||
import org.apache.lucene.index.codecs.FieldsConsumer;
|
import org.apache.lucene.index.codecs.FieldsConsumer;
|
||||||
import org.apache.lucene.index.codecs.FieldsProducer;
|
import org.apache.lucene.index.codecs.FieldsProducer;
|
||||||
|
import org.apache.lucene.index.codecs.PerDocConsumer;
|
||||||
|
import org.apache.lucene.index.codecs.DefaultDocValuesConsumer;
|
||||||
|
import org.apache.lucene.index.codecs.PerDocValues;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
|
||||||
/** For debugging, curiosity, transparency only!! Do not
|
/** For debugging, curiosity, transparency only!! Do not
|
||||||
* use this codec in production.
|
* use this codec in production.
|
||||||
|
@ -56,17 +62,30 @@ public class SimpleTextCodec extends Codec {
|
||||||
/** Extension of freq postings file */
|
/** Extension of freq postings file */
|
||||||
static final String POSTINGS_EXTENSION = "pst";
|
static final String POSTINGS_EXTENSION = "pst";
|
||||||
|
|
||||||
static String getPostingsFileName(String segment, String id) {
|
static String getPostingsFileName(String segment, int id) {
|
||||||
return IndexFileNames.segmentFileName(segment, id, POSTINGS_EXTENSION);
|
return IndexFileNames.segmentFileName(segment, id, POSTINGS_EXTENSION);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void files(Directory dir, SegmentInfo segmentInfo, String id, Set<String> files) throws IOException {
|
public void files(Directory dir, SegmentInfo segmentInfo, int id, Set<String> files) throws IOException {
|
||||||
files.add(getPostingsFileName(segmentInfo.name, id));
|
files.add(getPostingsFileName(segmentInfo.name, id));
|
||||||
|
DefaultDocValuesConsumer.files(dir, segmentInfo, id, files);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void getExtensions(Set<String> extensions) {
|
public void getExtensions(Set<String> extensions) {
|
||||||
extensions.add(POSTINGS_EXTENSION);
|
extensions.add(POSTINGS_EXTENSION);
|
||||||
|
DefaultDocValuesConsumer.getDocValuesExtensions(extensions);
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: would be great if these used a plain text impl
|
||||||
|
@Override
|
||||||
|
public PerDocConsumer docsConsumer(PerDocWriteState state) throws IOException {
|
||||||
|
return new DefaultDocValuesConsumer(state, BytesRef.getUTF8SortedAsUnicodeComparator());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public PerDocValues docsProducer(SegmentReadState state) throws IOException {
|
||||||
|
return new DefaultDocValuesProducer(state.segmentInfo, state.dir, state.fieldInfos, state.codecId);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -58,7 +58,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
||||||
final static BytesRef PAYLOAD = SimpleTextFieldsWriter.PAYLOAD;
|
final static BytesRef PAYLOAD = SimpleTextFieldsWriter.PAYLOAD;
|
||||||
|
|
||||||
public SimpleTextFieldsReader(SegmentReadState state) throws IOException {
|
public SimpleTextFieldsReader(SegmentReadState state) throws IOException {
|
||||||
in = state.dir.openInput(SimpleTextCodec.getPostingsFileName(state.segmentInfo.name, ""+state.codecId));
|
in = state.dir.openInput(SimpleTextCodec.getPostingsFileName(state.segmentInfo.name, state.codecId));
|
||||||
|
|
||||||
fieldInfos = state.fieldInfos;
|
fieldInfos = state.fieldInfos;
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,12 +20,16 @@ package org.apache.lucene.index.codecs.standard;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.PerDocWriteState;
|
||||||
import org.apache.lucene.index.SegmentInfo;
|
import org.apache.lucene.index.SegmentInfo;
|
||||||
import org.apache.lucene.index.SegmentWriteState;
|
import org.apache.lucene.index.SegmentWriteState;
|
||||||
import org.apache.lucene.index.SegmentReadState;
|
import org.apache.lucene.index.SegmentReadState;
|
||||||
import org.apache.lucene.index.codecs.Codec;
|
import org.apache.lucene.index.codecs.Codec;
|
||||||
import org.apache.lucene.index.codecs.FieldsConsumer;
|
import org.apache.lucene.index.codecs.FieldsConsumer;
|
||||||
import org.apache.lucene.index.codecs.FieldsProducer;
|
import org.apache.lucene.index.codecs.FieldsProducer;
|
||||||
|
import org.apache.lucene.index.codecs.PerDocConsumer;
|
||||||
|
import org.apache.lucene.index.codecs.DefaultDocValuesConsumer;
|
||||||
|
import org.apache.lucene.index.codecs.PerDocValues;
|
||||||
import org.apache.lucene.index.codecs.PostingsWriterBase;
|
import org.apache.lucene.index.codecs.PostingsWriterBase;
|
||||||
import org.apache.lucene.index.codecs.PostingsReaderBase;
|
import org.apache.lucene.index.codecs.PostingsReaderBase;
|
||||||
import org.apache.lucene.index.codecs.TermsIndexWriterBase;
|
import org.apache.lucene.index.codecs.TermsIndexWriterBase;
|
||||||
|
@ -34,7 +38,9 @@ import org.apache.lucene.index.codecs.VariableGapTermsIndexWriter;
|
||||||
import org.apache.lucene.index.codecs.VariableGapTermsIndexReader;
|
import org.apache.lucene.index.codecs.VariableGapTermsIndexReader;
|
||||||
import org.apache.lucene.index.codecs.BlockTermsWriter;
|
import org.apache.lucene.index.codecs.BlockTermsWriter;
|
||||||
import org.apache.lucene.index.codecs.BlockTermsReader;
|
import org.apache.lucene.index.codecs.BlockTermsReader;
|
||||||
|
import org.apache.lucene.index.codecs.DefaultDocValuesProducer;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
|
||||||
/** Default codec.
|
/** Default codec.
|
||||||
* @lucene.experimental */
|
* @lucene.experimental */
|
||||||
|
@ -130,15 +136,17 @@ public class StandardCodec extends Codec {
|
||||||
static final String PROX_EXTENSION = "prx";
|
static final String PROX_EXTENSION = "prx";
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void files(Directory dir, SegmentInfo segmentInfo, String id, Set<String> files) throws IOException {
|
public void files(Directory dir, SegmentInfo segmentInfo, int id, Set<String> files) throws IOException {
|
||||||
StandardPostingsReader.files(dir, segmentInfo, id, files);
|
StandardPostingsReader.files(dir, segmentInfo, id, files);
|
||||||
BlockTermsReader.files(dir, segmentInfo, id, files);
|
BlockTermsReader.files(dir, segmentInfo, id, files);
|
||||||
VariableGapTermsIndexReader.files(dir, segmentInfo, id, files);
|
VariableGapTermsIndexReader.files(dir, segmentInfo, id, files);
|
||||||
|
DefaultDocValuesConsumer.files(dir, segmentInfo, id, files);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void getExtensions(Set<String> extensions) {
|
public void getExtensions(Set<String> extensions) {
|
||||||
getStandardExtensions(extensions);
|
getStandardExtensions(extensions);
|
||||||
|
DefaultDocValuesConsumer.getDocValuesExtensions(extensions);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static void getStandardExtensions(Set<String> extensions) {
|
public static void getStandardExtensions(Set<String> extensions) {
|
||||||
|
@ -147,4 +155,14 @@ public class StandardCodec extends Codec {
|
||||||
BlockTermsReader.getExtensions(extensions);
|
BlockTermsReader.getExtensions(extensions);
|
||||||
VariableGapTermsIndexReader.getIndexExtensions(extensions);
|
VariableGapTermsIndexReader.getIndexExtensions(extensions);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public PerDocConsumer docsConsumer(PerDocWriteState state) throws IOException {
|
||||||
|
return new DefaultDocValuesConsumer(state, BytesRef.getUTF8SortedAsUnicodeComparator());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public PerDocValues docsProducer(SegmentReadState state) throws IOException {
|
||||||
|
return new DefaultDocValuesProducer(state.segmentInfo, state.dir, state.fieldInfos, state.codecId);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -51,7 +51,7 @@ public class StandardPostingsReader extends PostingsReaderBase {
|
||||||
|
|
||||||
//private String segment;
|
//private String segment;
|
||||||
|
|
||||||
public StandardPostingsReader(Directory dir, SegmentInfo segmentInfo, int readBufferSize, String codecId) throws IOException {
|
public StandardPostingsReader(Directory dir, SegmentInfo segmentInfo, int readBufferSize, int codecId) throws IOException {
|
||||||
freqIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, codecId, StandardCodec.FREQ_EXTENSION),
|
freqIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, codecId, StandardCodec.FREQ_EXTENSION),
|
||||||
readBufferSize);
|
readBufferSize);
|
||||||
//this.segment = segmentInfo.name;
|
//this.segment = segmentInfo.name;
|
||||||
|
@ -71,7 +71,7 @@ public class StandardPostingsReader extends PostingsReaderBase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static void files(Directory dir, SegmentInfo segmentInfo, String id, Collection<String> files) throws IOException {
|
public static void files(Directory dir, SegmentInfo segmentInfo, int id, Collection<String> files) throws IOException {
|
||||||
files.add(IndexFileNames.segmentFileName(segmentInfo.name, id, StandardCodec.FREQ_EXTENSION));
|
files.add(IndexFileNames.segmentFileName(segmentInfo.name, id, StandardCodec.FREQ_EXTENSION));
|
||||||
if (segmentInfo.getHasProx()) {
|
if (segmentInfo.getHasProx()) {
|
||||||
files.add(IndexFileNames.segmentFileName(segmentInfo.name, id, StandardCodec.PROX_EXTENSION));
|
files.add(IndexFileNames.segmentFileName(segmentInfo.name, id, StandardCodec.PROX_EXTENSION));
|
||||||
|
|
|
@ -0,0 +1,491 @@
|
||||||
|
package org.apache.lucene.index.values;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/** Base class for specific Bytes Reader/Writer implementations */
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.Comparator;
|
||||||
|
import java.util.concurrent.atomic.AtomicLong;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.IndexFileNames;
|
||||||
|
import org.apache.lucene.index.values.IndexDocValues.SortedSource;
|
||||||
|
import org.apache.lucene.index.values.IndexDocValues.Source;
|
||||||
|
import org.apache.lucene.index.values.IndexDocValues.SourceEnum;
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.store.IndexInput;
|
||||||
|
import org.apache.lucene.store.IndexOutput;
|
||||||
|
import org.apache.lucene.util.AttributeSource;
|
||||||
|
import org.apache.lucene.util.ByteBlockPool;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.CodecUtil;
|
||||||
|
import org.apache.lucene.util.IOUtils;
|
||||||
|
import org.apache.lucene.util.PagedBytes;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Provides concrete Writer/Reader implementations for <tt>byte[]</tt> value per
|
||||||
|
* document. There are 6 package-private default implementations of this, for
|
||||||
|
* all combinations of {@link Mode#DEREF}/{@link Mode#STRAIGHT}/
|
||||||
|
* {@link Mode#SORTED} x fixed-length/variable-length.
|
||||||
|
*
|
||||||
|
* <p>
|
||||||
|
* NOTE: Currently the total amount of byte[] data stored (across a single
|
||||||
|
* segment) cannot exceed 2GB.
|
||||||
|
* </p>
|
||||||
|
* <p>
|
||||||
|
* NOTE: Each byte[] must be <= 32768 bytes in length
|
||||||
|
* </p>
|
||||||
|
*
|
||||||
|
* @lucene.experimental
|
||||||
|
*/
|
||||||
|
public final class Bytes {
|
||||||
|
// TODO - add bulk copy where possible
|
||||||
|
private Bytes() { /* don't instantiate! */
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Defines the {@link Writer}s store mode. The writer will either store the
|
||||||
|
* bytes sequentially ({@link #STRAIGHT}, dereferenced ({@link #DEREF}) or
|
||||||
|
* sorted ({@link #SORTED})
|
||||||
|
*
|
||||||
|
* @lucene.experimental
|
||||||
|
*/
|
||||||
|
public static enum Mode {
|
||||||
|
/**
|
||||||
|
* Mode for sequentially stored bytes
|
||||||
|
*/
|
||||||
|
STRAIGHT,
|
||||||
|
/**
|
||||||
|
* Mode for dereferenced stored bytes
|
||||||
|
*/
|
||||||
|
DEREF,
|
||||||
|
/**
|
||||||
|
* Mode for sorted stored bytes
|
||||||
|
*/
|
||||||
|
SORTED
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a new <tt>byte[]</tt> {@link Writer} instances for the given
|
||||||
|
* directory.
|
||||||
|
*
|
||||||
|
* @param dir
|
||||||
|
* the directory to write the values to
|
||||||
|
* @param id
|
||||||
|
* the id used to create a unique file name. Usually composed out of
|
||||||
|
* the segment name and a unique id per segment.
|
||||||
|
* @param mode
|
||||||
|
* the writers store mode
|
||||||
|
* @param comp
|
||||||
|
* a {@link BytesRef} comparator - only used with {@link Mode#SORTED}
|
||||||
|
* @param fixedSize
|
||||||
|
* <code>true</code> if all bytes subsequently passed to the
|
||||||
|
* {@link Writer} will have the same length
|
||||||
|
* @param bytesUsed
|
||||||
|
* an {@link AtomicLong} instance to track the used bytes within the
|
||||||
|
* {@link Writer}. A call to {@link Writer#finish(int)} will release
|
||||||
|
* all internally used resources and frees the memeory tracking
|
||||||
|
* reference.
|
||||||
|
* @return a new {@link Writer} instance
|
||||||
|
* @throws IOException
|
||||||
|
* if the files for the writer can not be created.
|
||||||
|
*/
|
||||||
|
public static Writer getWriter(Directory dir, String id, Mode mode,
|
||||||
|
Comparator<BytesRef> comp, boolean fixedSize, AtomicLong bytesUsed)
|
||||||
|
throws IOException {
|
||||||
|
// TODO -- i shouldn't have to specify fixed? can
|
||||||
|
// track itself & do the write thing at write time?
|
||||||
|
if (comp == null) {
|
||||||
|
comp = BytesRef.getUTF8SortedAsUnicodeComparator();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (fixedSize) {
|
||||||
|
if (mode == Mode.STRAIGHT) {
|
||||||
|
return new FixedStraightBytesImpl.Writer(dir, id);
|
||||||
|
} else if (mode == Mode.DEREF) {
|
||||||
|
return new FixedDerefBytesImpl.Writer(dir, id, bytesUsed);
|
||||||
|
} else if (mode == Mode.SORTED) {
|
||||||
|
return new FixedSortedBytesImpl.Writer(dir, id, comp, bytesUsed);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (mode == Mode.STRAIGHT) {
|
||||||
|
return new VarStraightBytesImpl.Writer(dir, id, bytesUsed);
|
||||||
|
} else if (mode == Mode.DEREF) {
|
||||||
|
return new VarDerefBytesImpl.Writer(dir, id, bytesUsed);
|
||||||
|
} else if (mode == Mode.SORTED) {
|
||||||
|
return new VarSortedBytesImpl.Writer(dir, id, comp, bytesUsed);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
throw new IllegalArgumentException("");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a new {@link IndexDocValues} instance that provides either memory
|
||||||
|
* resident or iterative access to a per-document stored <tt>byte[]</tt>
|
||||||
|
* value. The returned {@link IndexDocValues} instance will be initialized without
|
||||||
|
* consuming a significant amount of memory.
|
||||||
|
*
|
||||||
|
* @param dir
|
||||||
|
* the directory to load the {@link IndexDocValues} from.
|
||||||
|
* @param id
|
||||||
|
* the file ID in the {@link Directory} to load the values from.
|
||||||
|
* @param mode
|
||||||
|
* the mode used to store the values
|
||||||
|
* @param fixedSize
|
||||||
|
* <code>true</code> iff the values are stored with fixed-size,
|
||||||
|
* otherwise <code>false</code>
|
||||||
|
* @param maxDoc
|
||||||
|
* the number of document values stored for the given ID
|
||||||
|
* @return an initialized {@link IndexDocValues} instance.
|
||||||
|
* @throws IOException
|
||||||
|
* if an {@link IOException} occurs
|
||||||
|
*/
|
||||||
|
public static IndexDocValues getValues(Directory dir, String id, Mode mode,
|
||||||
|
boolean fixedSize, int maxDoc) throws IOException {
|
||||||
|
// TODO -- I can peek @ header to determing fixed/mode?
|
||||||
|
if (fixedSize) {
|
||||||
|
if (mode == Mode.STRAIGHT) {
|
||||||
|
return new FixedStraightBytesImpl.Reader(dir, id, maxDoc);
|
||||||
|
} else if (mode == Mode.DEREF) {
|
||||||
|
return new FixedDerefBytesImpl.Reader(dir, id, maxDoc);
|
||||||
|
} else if (mode == Mode.SORTED) {
|
||||||
|
return new FixedSortedBytesImpl.Reader(dir, id, maxDoc);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (mode == Mode.STRAIGHT) {
|
||||||
|
return new VarStraightBytesImpl.Reader(dir, id, maxDoc);
|
||||||
|
} else if (mode == Mode.DEREF) {
|
||||||
|
return new VarDerefBytesImpl.Reader(dir, id, maxDoc);
|
||||||
|
} else if (mode == Mode.SORTED) {
|
||||||
|
return new VarSortedBytesImpl.Reader(dir, id, maxDoc);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
throw new IllegalArgumentException("Illegal Mode: " + mode);
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO open up this API?
|
||||||
|
static abstract class BytesBaseSource extends Source {
|
||||||
|
private final PagedBytes pagedBytes;
|
||||||
|
protected final IndexInput datIn;
|
||||||
|
protected final IndexInput idxIn;
|
||||||
|
protected final static int PAGED_BYTES_BITS = 15;
|
||||||
|
protected final PagedBytes.Reader data;
|
||||||
|
protected final long totalLengthInBytes;
|
||||||
|
|
||||||
|
protected BytesBaseSource(IndexInput datIn, IndexInput idxIn,
|
||||||
|
PagedBytes pagedBytes, long bytesToRead) throws IOException {
|
||||||
|
assert bytesToRead <= datIn.length() : " file size is less than the expected size diff: "
|
||||||
|
+ (bytesToRead - datIn.length()) + " pos: " + datIn.getFilePointer();
|
||||||
|
this.datIn = datIn;
|
||||||
|
this.totalLengthInBytes = bytesToRead;
|
||||||
|
this.pagedBytes = pagedBytes;
|
||||||
|
this.pagedBytes.copy(datIn, bytesToRead);
|
||||||
|
data = pagedBytes.freeze(true);
|
||||||
|
this.idxIn = idxIn;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void close() throws IOException {
|
||||||
|
try {
|
||||||
|
data.close(); // close data
|
||||||
|
} finally {
|
||||||
|
try {
|
||||||
|
if (datIn != null) {
|
||||||
|
datIn.close();
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
if (idxIn != null) {// if straight - no index needed
|
||||||
|
idxIn.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns one greater than the largest possible document number.
|
||||||
|
*/
|
||||||
|
protected abstract int maxDoc();
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ValuesEnum getEnum(AttributeSource attrSource) throws IOException {
|
||||||
|
return new SourceEnum(attrSource, type(), this, maxDoc()) {
|
||||||
|
@Override
|
||||||
|
public int advance(int target) throws IOException {
|
||||||
|
if (target >= numDocs) {
|
||||||
|
return pos = NO_MORE_DOCS;
|
||||||
|
}
|
||||||
|
while (source.getBytes(target, bytesRef).length == 0) {
|
||||||
|
if (++target >= numDocs) {
|
||||||
|
return pos = NO_MORE_DOCS;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return pos = target;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
static abstract class BytesBaseSortedSource extends SortedSource {
|
||||||
|
protected final IndexInput datIn;
|
||||||
|
protected final IndexInput idxIn;
|
||||||
|
protected final BytesRef defaultValue = new BytesRef();
|
||||||
|
protected final static int PAGED_BYTES_BITS = 15;
|
||||||
|
private final PagedBytes pagedBytes;
|
||||||
|
protected final PagedBytes.Reader data;
|
||||||
|
private final Comparator<BytesRef> comp;
|
||||||
|
|
||||||
|
protected BytesBaseSortedSource(IndexInput datIn, IndexInput idxIn,
|
||||||
|
Comparator<BytesRef> comp, PagedBytes pagedBytes, long bytesToRead)
|
||||||
|
throws IOException {
|
||||||
|
assert bytesToRead <= datIn.length() : " file size is less than the expected size diff: "
|
||||||
|
+ (bytesToRead - datIn.length()) + " pos: " + datIn.getFilePointer();
|
||||||
|
this.datIn = datIn;
|
||||||
|
this.pagedBytes = pagedBytes;
|
||||||
|
this.pagedBytes.copy(datIn, bytesToRead);
|
||||||
|
data = pagedBytes.freeze(true);
|
||||||
|
this.idxIn = idxIn;
|
||||||
|
this.comp = comp == null ? BytesRef.getUTF8SortedAsUnicodeComparator()
|
||||||
|
: comp;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public BytesRef getByOrd(int ord, BytesRef bytesRef) {
|
||||||
|
assert ord >= 0;
|
||||||
|
return deref(ord, bytesRef);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void closeIndexInput() throws IOException {
|
||||||
|
try {
|
||||||
|
if (datIn != null) {
|
||||||
|
datIn.close();
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
if (idxIn != null) {// if straight
|
||||||
|
idxIn.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the largest doc id + 1 in this doc values source
|
||||||
|
*/
|
||||||
|
protected abstract int maxDoc();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Copies the value for the given ord to the given {@link BytesRef} and
|
||||||
|
* returns it.
|
||||||
|
*/
|
||||||
|
protected abstract BytesRef deref(int ord, BytesRef bytesRef);
|
||||||
|
|
||||||
|
protected int binarySearch(BytesRef b, BytesRef bytesRef, int low,
|
||||||
|
int high) {
|
||||||
|
int mid = 0;
|
||||||
|
while (low <= high) {
|
||||||
|
mid = (low + high) >>> 1;
|
||||||
|
deref(mid, bytesRef);
|
||||||
|
final int cmp = comp.compare(bytesRef, b);
|
||||||
|
if (cmp < 0) {
|
||||||
|
low = mid + 1;
|
||||||
|
} else if (cmp > 0) {
|
||||||
|
high = mid - 1;
|
||||||
|
} else {
|
||||||
|
return mid;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assert comp.compare(bytesRef, b) != 0;
|
||||||
|
return -(low + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ValuesEnum getEnum(AttributeSource attrSource) throws IOException {
|
||||||
|
return new SourceEnum(attrSource, type(), this, maxDoc()) {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int advance(int target) throws IOException {
|
||||||
|
if (target >= numDocs) {
|
||||||
|
return pos = NO_MORE_DOCS;
|
||||||
|
}
|
||||||
|
while (source.getBytes(target, bytesRef).length == 0) {
|
||||||
|
if (++target >= numDocs) {
|
||||||
|
return pos = NO_MORE_DOCS;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return pos = target;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: open up this API?!
|
||||||
|
static abstract class BytesWriterBase extends Writer {
|
||||||
|
private final String id;
|
||||||
|
protected IndexOutput idxOut;
|
||||||
|
protected IndexOutput datOut;
|
||||||
|
protected BytesRef bytesRef;
|
||||||
|
protected final ByteBlockPool pool;
|
||||||
|
|
||||||
|
protected BytesWriterBase(Directory dir, String id, String codecName,
|
||||||
|
int version, boolean initIndex, ByteBlockPool pool,
|
||||||
|
AtomicLong bytesUsed) throws IOException {
|
||||||
|
super(bytesUsed);
|
||||||
|
this.id = id;
|
||||||
|
this.pool = pool;
|
||||||
|
datOut = dir.createOutput(IndexFileNames.segmentFileName(id, "",
|
||||||
|
DATA_EXTENSION));
|
||||||
|
boolean success = false;
|
||||||
|
try {
|
||||||
|
CodecUtil.writeHeader(datOut, codecName, version);
|
||||||
|
if (initIndex) {
|
||||||
|
idxOut = dir.createOutput(IndexFileNames.segmentFileName(id, "",
|
||||||
|
INDEX_EXTENSION));
|
||||||
|
CodecUtil.writeHeader(idxOut, codecName, version);
|
||||||
|
} else {
|
||||||
|
idxOut = null;
|
||||||
|
}
|
||||||
|
success = true;
|
||||||
|
} finally {
|
||||||
|
if (!success) {
|
||||||
|
IOUtils.closeSafely(true, datOut, idxOut);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Must be called only with increasing docIDs. It's OK for some docIDs to be
|
||||||
|
* skipped; they will be filled with 0 bytes.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public abstract void add(int docID, BytesRef bytes) throws IOException;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void finish(int docCount) throws IOException {
|
||||||
|
try {
|
||||||
|
IOUtils.closeSafely(false, datOut, idxOut);
|
||||||
|
} finally {
|
||||||
|
if (pool != null) {
|
||||||
|
pool.reset();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void add(int docID) throws IOException {
|
||||||
|
add(docID, bytesRef);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void add(int docID, PerDocFieldValues docValues) throws IOException {
|
||||||
|
final BytesRef ref;
|
||||||
|
if ((ref = docValues.getBytes()) != null) {
|
||||||
|
add(docID, ref);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void setNextEnum(ValuesEnum valuesEnum) {
|
||||||
|
bytesRef = valuesEnum.bytes();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void files(Collection<String> files) throws IOException {
|
||||||
|
assert datOut != null;
|
||||||
|
files.add(IndexFileNames.segmentFileName(id, "", DATA_EXTENSION));
|
||||||
|
if (idxOut != null) { // called after flush - so this must be initialized
|
||||||
|
// if needed or present
|
||||||
|
final String idxFile = IndexFileNames.segmentFileName(id, "",
|
||||||
|
INDEX_EXTENSION);
|
||||||
|
files.add(idxFile);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Opens all necessary files, but does not read any data in until you call
|
||||||
|
* {@link #load}.
|
||||||
|
*/
|
||||||
|
static abstract class BytesReaderBase extends IndexDocValues {
|
||||||
|
protected final IndexInput idxIn;
|
||||||
|
protected final IndexInput datIn;
|
||||||
|
protected final int version;
|
||||||
|
protected final String id;
|
||||||
|
|
||||||
|
protected BytesReaderBase(Directory dir, String id, String codecName,
|
||||||
|
int maxVersion, boolean doIndex) throws IOException {
|
||||||
|
this.id = id;
|
||||||
|
datIn = dir.openInput(IndexFileNames.segmentFileName(id, "",
|
||||||
|
Writer.DATA_EXTENSION));
|
||||||
|
boolean success = false;
|
||||||
|
try {
|
||||||
|
version = CodecUtil.checkHeader(datIn, codecName, maxVersion, maxVersion);
|
||||||
|
if (doIndex) {
|
||||||
|
idxIn = dir.openInput(IndexFileNames.segmentFileName(id, "",
|
||||||
|
Writer.INDEX_EXTENSION));
|
||||||
|
final int version2 = CodecUtil.checkHeader(idxIn, codecName,
|
||||||
|
maxVersion, maxVersion);
|
||||||
|
assert version == version2;
|
||||||
|
} else {
|
||||||
|
idxIn = null;
|
||||||
|
}
|
||||||
|
success = true;
|
||||||
|
} finally {
|
||||||
|
if (!success) {
|
||||||
|
closeInternal();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* clones and returns the data {@link IndexInput}
|
||||||
|
*/
|
||||||
|
protected final IndexInput cloneData() {
|
||||||
|
assert datIn != null;
|
||||||
|
return (IndexInput) datIn.clone();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* clones and returns the indexing {@link IndexInput}
|
||||||
|
*/
|
||||||
|
protected final IndexInput cloneIndex() {
|
||||||
|
assert idxIn != null;
|
||||||
|
return (IndexInput) idxIn.clone();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void close() throws IOException {
|
||||||
|
try {
|
||||||
|
super.close();
|
||||||
|
} finally {
|
||||||
|
closeInternal();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void closeInternal() throws IOException {
|
||||||
|
try {
|
||||||
|
datIn.close();
|
||||||
|
} finally {
|
||||||
|
if (idxIn != null) {
|
||||||
|
idxIn.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,279 @@
|
||||||
|
package org.apache.lucene.index.values;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.concurrent.atomic.AtomicLong;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.values.Bytes.BytesBaseSource;
|
||||||
|
import org.apache.lucene.index.values.Bytes.BytesReaderBase;
|
||||||
|
import org.apache.lucene.index.values.Bytes.BytesWriterBase;
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.store.IndexInput;
|
||||||
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
|
import org.apache.lucene.util.AttributeSource;
|
||||||
|
import org.apache.lucene.util.ByteBlockPool;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.BytesRefHash;
|
||||||
|
import org.apache.lucene.util.PagedBytes;
|
||||||
|
import org.apache.lucene.util.RamUsageEstimator;
|
||||||
|
import org.apache.lucene.util.ByteBlockPool.Allocator;
|
||||||
|
import org.apache.lucene.util.ByteBlockPool.DirectTrackingAllocator;
|
||||||
|
import org.apache.lucene.util.BytesRefHash.TrackingDirectBytesStartArray;
|
||||||
|
import org.apache.lucene.util.packed.PackedInts;
|
||||||
|
|
||||||
|
// Stores fixed-length byte[] by deref, ie when two docs
|
||||||
|
// have the same value, they store only 1 byte[]
|
||||||
|
/**
|
||||||
|
* @lucene.experimental
|
||||||
|
*/
|
||||||
|
class FixedDerefBytesImpl {
|
||||||
|
|
||||||
|
static final String CODEC_NAME = "FixedDerefBytes";
|
||||||
|
static final int VERSION_START = 0;
|
||||||
|
static final int VERSION_CURRENT = VERSION_START;
|
||||||
|
|
||||||
|
static class Writer extends BytesWriterBase {
|
||||||
|
private int size = -1;
|
||||||
|
private int[] docToID;
|
||||||
|
private final BytesRefHash hash = new BytesRefHash(pool,
|
||||||
|
BytesRefHash.DEFAULT_CAPACITY, new TrackingDirectBytesStartArray(
|
||||||
|
BytesRefHash.DEFAULT_CAPACITY, bytesUsed));
|
||||||
|
public Writer(Directory dir, String id, AtomicLong bytesUsed)
|
||||||
|
throws IOException {
|
||||||
|
this(dir, id, new DirectTrackingAllocator(ByteBlockPool.BYTE_BLOCK_SIZE, bytesUsed),
|
||||||
|
bytesUsed);
|
||||||
|
}
|
||||||
|
|
||||||
|
public Writer(Directory dir, String id, Allocator allocator,
|
||||||
|
AtomicLong bytesUsed) throws IOException {
|
||||||
|
super(dir, id, CODEC_NAME, VERSION_CURRENT, true,
|
||||||
|
new ByteBlockPool(allocator), bytesUsed);
|
||||||
|
docToID = new int[1];
|
||||||
|
bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT); // TODO BytesRefHash
|
||||||
|
// uses bytes too!
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void add(int docID, BytesRef bytes) throws IOException {
|
||||||
|
if (bytes.length == 0) // default value - skip it
|
||||||
|
return;
|
||||||
|
if (size == -1) {
|
||||||
|
size = bytes.length;
|
||||||
|
datOut.writeInt(size);
|
||||||
|
} else if (bytes.length != size) {
|
||||||
|
throw new IllegalArgumentException("expected bytes size=" + size
|
||||||
|
+ " but got " + bytes.length);
|
||||||
|
}
|
||||||
|
int ord = hash.add(bytes);
|
||||||
|
|
||||||
|
if (ord >= 0) {
|
||||||
|
// new added entry
|
||||||
|
datOut.writeBytes(bytes.bytes, bytes.offset, bytes.length);
|
||||||
|
} else {
|
||||||
|
ord = (-ord) - 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (docID >= docToID.length) {
|
||||||
|
final int size = docToID.length;
|
||||||
|
docToID = ArrayUtil.grow(docToID, 1 + docID);
|
||||||
|
bytesUsed.addAndGet((docToID.length - size)
|
||||||
|
* RamUsageEstimator.NUM_BYTES_INT);
|
||||||
|
}
|
||||||
|
docToID[docID] = 1 + ord;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Important that we get docCount, in case there were
|
||||||
|
// some last docs that we didn't see
|
||||||
|
@Override
|
||||||
|
public void finish(int docCount) throws IOException {
|
||||||
|
try {
|
||||||
|
if (size == -1) {
|
||||||
|
datOut.writeInt(size);
|
||||||
|
}
|
||||||
|
final int count = 1 + hash.size();
|
||||||
|
idxOut.writeInt(count - 1);
|
||||||
|
// write index
|
||||||
|
final PackedInts.Writer w = PackedInts.getWriter(idxOut, docCount,
|
||||||
|
PackedInts.bitsRequired(count - 1));
|
||||||
|
final int limit = docCount > docToID.length ? docToID.length : docCount;
|
||||||
|
for (int i = 0; i < limit; i++) {
|
||||||
|
w.add(docToID[i]);
|
||||||
|
}
|
||||||
|
// fill up remaining doc with zeros
|
||||||
|
for (int i = limit; i < docCount; i++) {
|
||||||
|
w.add(0);
|
||||||
|
}
|
||||||
|
w.finish();
|
||||||
|
} finally {
|
||||||
|
hash.close();
|
||||||
|
super.finish(docCount);
|
||||||
|
bytesUsed
|
||||||
|
.addAndGet((-docToID.length) * RamUsageEstimator.NUM_BYTES_INT);
|
||||||
|
docToID = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static class Reader extends BytesReaderBase {
|
||||||
|
private final int size;
|
||||||
|
|
||||||
|
Reader(Directory dir, String id, int maxDoc) throws IOException {
|
||||||
|
super(dir, id, CODEC_NAME, VERSION_START, true);
|
||||||
|
size = datIn.readInt();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Source load() throws IOException {
|
||||||
|
final IndexInput index = cloneIndex();
|
||||||
|
return new Source(cloneData(), index, size, index.readInt());
|
||||||
|
}
|
||||||
|
|
||||||
|
private static class Source extends BytesBaseSource {
|
||||||
|
private final PackedInts.Reader index;
|
||||||
|
private final int size;
|
||||||
|
private final int numValues;
|
||||||
|
|
||||||
|
protected Source(IndexInput datIn, IndexInput idxIn, int size,
|
||||||
|
int numValues) throws IOException {
|
||||||
|
super(datIn, idxIn, new PagedBytes(PAGED_BYTES_BITS), size * numValues);
|
||||||
|
this.size = size;
|
||||||
|
this.numValues = numValues;
|
||||||
|
index = PackedInts.getReader(idxIn);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public BytesRef getBytes(int docID, BytesRef bytesRef) {
|
||||||
|
final int id = (int) index.get(docID);
|
||||||
|
if (id == 0) {
|
||||||
|
bytesRef.length = 0;
|
||||||
|
return bytesRef;
|
||||||
|
}
|
||||||
|
return data.fillSlice(bytesRef, ((id - 1) * size), size);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int getValueCount() {
|
||||||
|
return numValues;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ValueType type() {
|
||||||
|
return ValueType.BYTES_FIXED_DEREF;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected int maxDoc() {
|
||||||
|
return index.size();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ValuesEnum getEnum(AttributeSource source) throws IOException {
|
||||||
|
return new DerefBytesEnum(source, cloneData(), cloneIndex(), size);
|
||||||
|
}
|
||||||
|
|
||||||
|
static class DerefBytesEnum extends ValuesEnum {
|
||||||
|
protected final IndexInput datIn;
|
||||||
|
private final PackedInts.ReaderIterator idx;
|
||||||
|
protected final long fp;
|
||||||
|
private final int size;
|
||||||
|
private final int valueCount;
|
||||||
|
private int pos = -1;
|
||||||
|
|
||||||
|
public DerefBytesEnum(AttributeSource source, IndexInput datIn,
|
||||||
|
IndexInput idxIn, int size) throws IOException {
|
||||||
|
this(source, datIn, idxIn, size, ValueType.BYTES_FIXED_DEREF);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected DerefBytesEnum(AttributeSource source, IndexInput datIn,
|
||||||
|
IndexInput idxIn, int size, ValueType enumType) throws IOException {
|
||||||
|
super(source, enumType);
|
||||||
|
this.datIn = datIn;
|
||||||
|
this.size = size;
|
||||||
|
idxIn.readInt();// read valueCount
|
||||||
|
idx = PackedInts.getReaderIterator(idxIn);
|
||||||
|
fp = datIn.getFilePointer();
|
||||||
|
bytesRef.grow(this.size);
|
||||||
|
bytesRef.length = this.size;
|
||||||
|
bytesRef.offset = 0;
|
||||||
|
valueCount = idx.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void copyFrom(ValuesEnum valuesEnum) {
|
||||||
|
bytesRef = valuesEnum.bytesRef;
|
||||||
|
if (bytesRef.bytes.length < size) {
|
||||||
|
bytesRef.grow(size);
|
||||||
|
}
|
||||||
|
bytesRef.length = size;
|
||||||
|
bytesRef.offset = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int advance(int target) throws IOException {
|
||||||
|
if (target < valueCount) {
|
||||||
|
long address;
|
||||||
|
while ((address = idx.advance(target)) == 0) {
|
||||||
|
if (++target >= valueCount) {
|
||||||
|
return pos = NO_MORE_DOCS;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pos = idx.ord();
|
||||||
|
fill(address, bytesRef);
|
||||||
|
return pos;
|
||||||
|
}
|
||||||
|
return pos = NO_MORE_DOCS;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int nextDoc() throws IOException {
|
||||||
|
if (pos >= valueCount) {
|
||||||
|
return pos = NO_MORE_DOCS;
|
||||||
|
}
|
||||||
|
return advance(pos + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void close() throws IOException {
|
||||||
|
try {
|
||||||
|
datIn.close();
|
||||||
|
} finally {
|
||||||
|
idx.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void fill(long address, BytesRef ref) throws IOException {
|
||||||
|
datIn.seek(fp + ((address - 1) * size));
|
||||||
|
datIn.readBytes(ref.bytes, 0, size);
|
||||||
|
ref.length = size;
|
||||||
|
ref.offset = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int docID() {
|
||||||
|
return pos;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ValueType type() {
|
||||||
|
return ValueType.BYTES_FIXED_DEREF;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,242 @@
|
||||||
|
package org.apache.lucene.index.values;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Comparator;
|
||||||
|
import java.util.concurrent.atomic.AtomicLong;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.values.Bytes.BytesBaseSortedSource;
|
||||||
|
import org.apache.lucene.index.values.Bytes.BytesReaderBase;
|
||||||
|
import org.apache.lucene.index.values.Bytes.BytesWriterBase;
|
||||||
|
import org.apache.lucene.index.values.FixedDerefBytesImpl.Reader.DerefBytesEnum;
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.store.IndexInput;
|
||||||
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
|
import org.apache.lucene.util.AttributeSource;
|
||||||
|
import org.apache.lucene.util.ByteBlockPool;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.BytesRefHash;
|
||||||
|
import org.apache.lucene.util.CodecUtil;
|
||||||
|
import org.apache.lucene.util.PagedBytes;
|
||||||
|
import org.apache.lucene.util.RamUsageEstimator;
|
||||||
|
import org.apache.lucene.util.ByteBlockPool.Allocator;
|
||||||
|
import org.apache.lucene.util.ByteBlockPool.DirectTrackingAllocator;
|
||||||
|
import org.apache.lucene.util.BytesRefHash.TrackingDirectBytesStartArray;
|
||||||
|
import org.apache.lucene.util.packed.PackedInts;
|
||||||
|
|
||||||
|
// Stores fixed-length byte[] by deref, ie when two docs
|
||||||
|
// have the same value, they store only 1 byte[]
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @lucene.experimental
|
||||||
|
*/
|
||||||
|
class FixedSortedBytesImpl {
|
||||||
|
|
||||||
|
static final String CODEC_NAME = "FixedSortedBytes";
|
||||||
|
static final int VERSION_START = 0;
|
||||||
|
static final int VERSION_CURRENT = VERSION_START;
|
||||||
|
|
||||||
|
static class Writer extends BytesWriterBase {
|
||||||
|
private int size = -1;
|
||||||
|
private int[] docToEntry;
|
||||||
|
private final Comparator<BytesRef> comp;
|
||||||
|
|
||||||
|
private final BytesRefHash hash = new BytesRefHash(pool,
|
||||||
|
BytesRefHash.DEFAULT_CAPACITY, new TrackingDirectBytesStartArray(
|
||||||
|
BytesRefHash.DEFAULT_CAPACITY, bytesUsed));
|
||||||
|
|
||||||
|
public Writer(Directory dir, String id, Comparator<BytesRef> comp,
|
||||||
|
AtomicLong bytesUsed) throws IOException {
|
||||||
|
this(dir, id, comp, new DirectTrackingAllocator(ByteBlockPool.BYTE_BLOCK_SIZE, bytesUsed),
|
||||||
|
bytesUsed);
|
||||||
|
}
|
||||||
|
|
||||||
|
public Writer(Directory dir, String id, Comparator<BytesRef> comp,
|
||||||
|
Allocator allocator, AtomicLong bytesUsed) throws IOException {
|
||||||
|
super(dir, id, CODEC_NAME, VERSION_CURRENT, true,
|
||||||
|
new ByteBlockPool(allocator), bytesUsed);
|
||||||
|
docToEntry = new int[1];
|
||||||
|
// docToEntry[0] = -1;
|
||||||
|
bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT);
|
||||||
|
this.comp = comp;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void add(int docID, BytesRef bytes) throws IOException {
|
||||||
|
if (bytes.length == 0)
|
||||||
|
return; // default - skip it
|
||||||
|
if (size == -1) {
|
||||||
|
size = bytes.length;
|
||||||
|
datOut.writeInt(size);
|
||||||
|
} else if (bytes.length != size) {
|
||||||
|
throw new IllegalArgumentException("expected bytes size=" + size
|
||||||
|
+ " but got " + bytes.length);
|
||||||
|
}
|
||||||
|
if (docID >= docToEntry.length) {
|
||||||
|
final int[] newArray = new int[ArrayUtil.oversize(1 + docID,
|
||||||
|
RamUsageEstimator.NUM_BYTES_INT)];
|
||||||
|
System.arraycopy(docToEntry, 0, newArray, 0, docToEntry.length);
|
||||||
|
bytesUsed.addAndGet((newArray.length - docToEntry.length)
|
||||||
|
* RamUsageEstimator.NUM_BYTES_INT);
|
||||||
|
docToEntry = newArray;
|
||||||
|
}
|
||||||
|
int e = hash.add(bytes);
|
||||||
|
docToEntry[docID] = 1 + (e < 0 ? (-e) - 1 : e);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Important that we get docCount, in case there were
|
||||||
|
// some last docs that we didn't see
|
||||||
|
@Override
|
||||||
|
public void finish(int docCount) throws IOException {
|
||||||
|
try {
|
||||||
|
if (size == -1) {// no data added
|
||||||
|
datOut.writeInt(size);
|
||||||
|
}
|
||||||
|
final int[] sortedEntries = hash.sort(comp);
|
||||||
|
final int count = hash.size();
|
||||||
|
int[] address = new int[count];
|
||||||
|
// first dump bytes data, recording address as we go
|
||||||
|
for (int i = 0; i < count; i++) {
|
||||||
|
final int e = sortedEntries[i];
|
||||||
|
final BytesRef bytes = hash.get(e, new BytesRef());
|
||||||
|
assert bytes.length == size;
|
||||||
|
datOut.writeBytes(bytes.bytes, bytes.offset, bytes.length);
|
||||||
|
address[e] = 1 + i;
|
||||||
|
}
|
||||||
|
|
||||||
|
idxOut.writeInt(count);
|
||||||
|
|
||||||
|
// next write index
|
||||||
|
PackedInts.Writer w = PackedInts.getWriter(idxOut, docCount,
|
||||||
|
PackedInts.bitsRequired(count));
|
||||||
|
final int limit;
|
||||||
|
if (docCount > docToEntry.length) {
|
||||||
|
limit = docToEntry.length;
|
||||||
|
} else {
|
||||||
|
limit = docCount;
|
||||||
|
}
|
||||||
|
for (int i = 0; i < limit; i++) {
|
||||||
|
final int e = docToEntry[i];
|
||||||
|
if (e == 0) {
|
||||||
|
// null is encoded as zero
|
||||||
|
w.add(0);
|
||||||
|
} else {
|
||||||
|
assert e > 0 && e <= count : "index must 0 > && <= " + count
|
||||||
|
+ " was: " + e;
|
||||||
|
w.add(address[e - 1]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = limit; i < docCount; i++) {
|
||||||
|
w.add(0);
|
||||||
|
}
|
||||||
|
w.finish();
|
||||||
|
} finally {
|
||||||
|
super.finish(docCount);
|
||||||
|
bytesUsed.addAndGet((-docToEntry.length)
|
||||||
|
* RamUsageEstimator.NUM_BYTES_INT);
|
||||||
|
docToEntry = null;
|
||||||
|
hash.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static class Reader extends BytesReaderBase {
|
||||||
|
private final int size;
|
||||||
|
|
||||||
|
public Reader(Directory dir, String id, int maxDoc) throws IOException {
|
||||||
|
super(dir, id, CODEC_NAME, VERSION_START, true);
|
||||||
|
size = datIn.readInt();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public org.apache.lucene.index.values.IndexDocValues.Source load()
|
||||||
|
throws IOException {
|
||||||
|
return loadSorted(null);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public SortedSource loadSorted(Comparator<BytesRef> comp)
|
||||||
|
throws IOException {
|
||||||
|
final IndexInput idxInput = cloneIndex();
|
||||||
|
final IndexInput datInput = cloneData();
|
||||||
|
datInput.seek(CodecUtil.headerLength(CODEC_NAME) + 4);
|
||||||
|
idxInput.seek(CodecUtil.headerLength(CODEC_NAME));
|
||||||
|
return new Source(datInput, idxInput, size, idxInput.readInt(), comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static class Source extends BytesBaseSortedSource {
|
||||||
|
|
||||||
|
private final PackedInts.Reader index;
|
||||||
|
private final int numValue;
|
||||||
|
private final int size;
|
||||||
|
|
||||||
|
public Source(IndexInput datIn, IndexInput idxIn, int size,
|
||||||
|
int numValues, Comparator<BytesRef> comp) throws IOException {
|
||||||
|
super(datIn, idxIn, comp, new PagedBytes(PAGED_BYTES_BITS), size
|
||||||
|
* numValues);
|
||||||
|
this.size = size;
|
||||||
|
this.numValue = numValues;
|
||||||
|
index = PackedInts.getReader(idxIn);
|
||||||
|
closeIndexInput();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int ord(int docID) {
|
||||||
|
return (int) index.get(docID) -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int getByValue(BytesRef bytes, BytesRef tmpRef) {
|
||||||
|
return binarySearch(bytes, tmpRef, 0, numValue - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int getValueCount() {
|
||||||
|
return numValue;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected BytesRef deref(int ord, BytesRef bytesRef) {
|
||||||
|
return data.fillSlice(bytesRef, (ord * size), size);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ValueType type() {
|
||||||
|
return ValueType.BYTES_FIXED_SORTED;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected int maxDoc() {
|
||||||
|
return index.size();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ValuesEnum getEnum(AttributeSource source) throws IOException {
|
||||||
|
// do unsorted
|
||||||
|
return new DerefBytesEnum(source, cloneData(), cloneIndex(), size);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ValueType type() {
|
||||||
|
return ValueType.BYTES_FIXED_SORTED;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,249 @@
|
||||||
|
package org.apache.lucene.index.values;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.values.Bytes.BytesBaseSource;
|
||||||
|
import org.apache.lucene.index.values.Bytes.BytesReaderBase;
|
||||||
|
import org.apache.lucene.index.values.Bytes.BytesWriterBase;
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.store.IndexInput;
|
||||||
|
import org.apache.lucene.util.AttributeSource;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.PagedBytes;
|
||||||
|
|
||||||
|
// Simplest storage: stores fixed length byte[] per
|
||||||
|
// document, with no dedup and no sorting.
|
||||||
|
/**
|
||||||
|
* @lucene.experimental
|
||||||
|
*/
|
||||||
|
class FixedStraightBytesImpl {
|
||||||
|
|
||||||
|
static final String CODEC_NAME = "FixedStraightBytes";
|
||||||
|
static final int VERSION_START = 0;
|
||||||
|
static final int VERSION_CURRENT = VERSION_START;
|
||||||
|
|
||||||
|
static class Writer extends BytesWriterBase {
|
||||||
|
private int size = -1;
|
||||||
|
// start at -1 if the first added value is > 0
|
||||||
|
private int lastDocID = -1;
|
||||||
|
private byte[] oneRecord;
|
||||||
|
|
||||||
|
protected Writer(Directory dir, String id) throws IOException {
|
||||||
|
super(dir, id, CODEC_NAME, VERSION_CURRENT, false, null, null);
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO - impl bulk copy here!
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void add(int docID, BytesRef bytes) throws IOException {
|
||||||
|
if (size == -1) {
|
||||||
|
size = bytes.length;
|
||||||
|
datOut.writeInt(size);
|
||||||
|
oneRecord = new byte[size];
|
||||||
|
} else if (bytes.length != size) {
|
||||||
|
throw new IllegalArgumentException("expected bytes size=" + size
|
||||||
|
+ " but got " + bytes.length);
|
||||||
|
}
|
||||||
|
fill(docID);
|
||||||
|
assert bytes.bytes.length >= bytes.length;
|
||||||
|
datOut.writeBytes(bytes.bytes, bytes.offset, bytes.length);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* (non-Javadoc)
|
||||||
|
*
|
||||||
|
* @see
|
||||||
|
* org.apache.lucene.index.values.Writer#merge(org.apache.lucene.index.values
|
||||||
|
* .Writer.MergeState)
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
protected void merge(MergeState state) throws IOException {
|
||||||
|
if (state.bits == null && state.reader instanceof Reader) {
|
||||||
|
Reader reader = (Reader) state.reader;
|
||||||
|
final int maxDocs = reader.maxDoc;
|
||||||
|
if (maxDocs == 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (size == -1) {
|
||||||
|
size = reader.size;
|
||||||
|
datOut.writeInt(size);
|
||||||
|
oneRecord = new byte[size];
|
||||||
|
}
|
||||||
|
fill(state.docBase);
|
||||||
|
// TODO should we add a transfer to API to each reader?
|
||||||
|
final IndexInput cloneData = reader.cloneData();
|
||||||
|
try {
|
||||||
|
datOut.copyBytes(cloneData, size * maxDocs);
|
||||||
|
} finally {
|
||||||
|
cloneData.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
lastDocID += maxDocs - 1;
|
||||||
|
} else
|
||||||
|
super.merge(state);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fills up to but not including this docID
|
||||||
|
private void fill(int docID) throws IOException {
|
||||||
|
assert size >= 0;
|
||||||
|
for (int i = lastDocID + 1; i < docID; i++) {
|
||||||
|
datOut.writeBytes(oneRecord, size);
|
||||||
|
}
|
||||||
|
lastDocID = docID;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void finish(int docCount) throws IOException {
|
||||||
|
try {
|
||||||
|
if (size == -1) {// no data added
|
||||||
|
datOut.writeInt(0);
|
||||||
|
} else {
|
||||||
|
fill(docCount);
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
super.finish(docCount);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public long ramBytesUsed() {
|
||||||
|
return oneRecord == null ? 0 : oneRecord.length;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static class Reader extends BytesReaderBase {
|
||||||
|
private final int size;
|
||||||
|
private final int maxDoc;
|
||||||
|
|
||||||
|
Reader(Directory dir, String id, int maxDoc) throws IOException {
|
||||||
|
super(dir, id, CODEC_NAME, VERSION_START, false);
|
||||||
|
size = datIn.readInt();
|
||||||
|
this.maxDoc = maxDoc;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Source load() throws IOException {
|
||||||
|
return new Source(cloneData(), size, maxDoc);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void close() throws IOException {
|
||||||
|
datIn.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
private static class Source extends BytesBaseSource {
|
||||||
|
private final int size;
|
||||||
|
private final int maxDoc;
|
||||||
|
|
||||||
|
public Source(IndexInput datIn, int size, int maxDoc)
|
||||||
|
throws IOException {
|
||||||
|
super(datIn, null, new PagedBytes(PAGED_BYTES_BITS), size * maxDoc);
|
||||||
|
this.size = size;
|
||||||
|
this.maxDoc = maxDoc;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public BytesRef getBytes(int docID, BytesRef bytesRef) {
|
||||||
|
return data.fillSlice(bytesRef, docID * size, size);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int getValueCount() {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ValueType type() {
|
||||||
|
return ValueType.BYTES_FIXED_STRAIGHT;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected int maxDoc() {
|
||||||
|
return maxDoc;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ValuesEnum getEnum(AttributeSource source) throws IOException {
|
||||||
|
return new FixedStraightBytesEnum(source, cloneData(), size, maxDoc);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static final class FixedStraightBytesEnum extends ValuesEnum {
|
||||||
|
private final IndexInput datIn;
|
||||||
|
private final int size;
|
||||||
|
private final int maxDoc;
|
||||||
|
private int pos = -1;
|
||||||
|
private final long fp;
|
||||||
|
|
||||||
|
public FixedStraightBytesEnum(AttributeSource source, IndexInput datIn,
|
||||||
|
int size, int maxDoc) throws IOException {
|
||||||
|
super(source, ValueType.BYTES_FIXED_STRAIGHT);
|
||||||
|
this.datIn = datIn;
|
||||||
|
this.size = size;
|
||||||
|
this.maxDoc = maxDoc;
|
||||||
|
bytesRef.grow(size);
|
||||||
|
bytesRef.length = size;
|
||||||
|
bytesRef.offset = 0;
|
||||||
|
fp = datIn.getFilePointer();
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void copyFrom(ValuesEnum valuesEnum) {
|
||||||
|
bytesRef = valuesEnum.bytesRef;
|
||||||
|
if (bytesRef.bytes.length < size) {
|
||||||
|
bytesRef.grow(size);
|
||||||
|
}
|
||||||
|
bytesRef.length = size;
|
||||||
|
bytesRef.offset = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void close() throws IOException {
|
||||||
|
datIn.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int advance(int target) throws IOException {
|
||||||
|
if (target >= maxDoc || size == 0) {
|
||||||
|
return pos = NO_MORE_DOCS;
|
||||||
|
}
|
||||||
|
if ((target - 1) != pos) // pos inc == 1
|
||||||
|
datIn.seek(fp + target * size);
|
||||||
|
datIn.readBytes(bytesRef.bytes, 0, size);
|
||||||
|
return pos = target;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int docID() {
|
||||||
|
return pos;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int nextDoc() throws IOException {
|
||||||
|
if (pos >= maxDoc) {
|
||||||
|
return pos = NO_MORE_DOCS;
|
||||||
|
}
|
||||||
|
return advance(pos + 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ValueType type() {
|
||||||
|
return ValueType.BYTES_FIXED_STRAIGHT;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,467 @@
|
||||||
|
package org.apache.lucene.index.values;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.concurrent.atomic.AtomicLong;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.IndexFileNames;
|
||||||
|
import org.apache.lucene.index.values.IndexDocValues.Source;
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.store.IndexInput;
|
||||||
|
import org.apache.lucene.store.IndexOutput;
|
||||||
|
import org.apache.lucene.util.AttributeSource;
|
||||||
|
import org.apache.lucene.util.CodecUtil;
|
||||||
|
import org.apache.lucene.util.FloatsRef;
|
||||||
|
import org.apache.lucene.util.IOUtils;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Exposes {@link Writer} and reader ({@link Source}) for 32 bit and 64 bit
|
||||||
|
* floating point values.
|
||||||
|
* <p>
|
||||||
|
* Current implementations store either 4 byte or 8 byte floating points with
|
||||||
|
* full precision without any compression.
|
||||||
|
*
|
||||||
|
* @lucene.experimental
|
||||||
|
*/
|
||||||
|
public class Floats {
|
||||||
|
// TODO - add bulk copy where possible
|
||||||
|
private static final String CODEC_NAME = "SimpleFloats";
|
||||||
|
static final int VERSION_START = 0;
|
||||||
|
static final int VERSION_CURRENT = VERSION_START;
|
||||||
|
private static final int INT_DEFAULT = Float
|
||||||
|
.floatToRawIntBits(0.0f);
|
||||||
|
private static final long LONG_DEFAULT = Double
|
||||||
|
.doubleToRawLongBits(0.0d);
|
||||||
|
|
||||||
|
|
||||||
|
public static Writer getWriter(Directory dir, String id, int precisionBytes,
|
||||||
|
AtomicLong bytesUsed) throws IOException {
|
||||||
|
if (precisionBytes != 4 && precisionBytes != 8) {
|
||||||
|
throw new IllegalArgumentException("precisionBytes must be 4 or 8; got "
|
||||||
|
+ precisionBytes);
|
||||||
|
}
|
||||||
|
if (precisionBytes == 4) {
|
||||||
|
return new Float4Writer(dir, id, bytesUsed);
|
||||||
|
} else {
|
||||||
|
return new Float8Writer(dir, id, bytesUsed);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static IndexDocValues getValues(Directory dir, String id, int maxDoc)
|
||||||
|
throws IOException {
|
||||||
|
return new FloatsReader(dir, id, maxDoc);
|
||||||
|
}
|
||||||
|
|
||||||
|
abstract static class FloatsWriter extends Writer {
|
||||||
|
private final String id;
|
||||||
|
private FloatsRef floatsRef;
|
||||||
|
protected int lastDocId = -1;
|
||||||
|
protected IndexOutput datOut;
|
||||||
|
private final byte precision;
|
||||||
|
|
||||||
|
protected FloatsWriter(Directory dir, String id, int precision,
|
||||||
|
AtomicLong bytesUsed) throws IOException {
|
||||||
|
super(bytesUsed);
|
||||||
|
this.id = id;
|
||||||
|
this.precision = (byte) precision;
|
||||||
|
datOut = dir.createOutput(IndexFileNames.segmentFileName(id, "",
|
||||||
|
Writer.DATA_EXTENSION));
|
||||||
|
boolean success = false;
|
||||||
|
try {
|
||||||
|
CodecUtil.writeHeader(datOut, CODEC_NAME, VERSION_CURRENT);
|
||||||
|
assert datOut.getFilePointer() == CodecUtil.headerLength(CODEC_NAME);
|
||||||
|
datOut.writeByte(this.precision);
|
||||||
|
success = true;
|
||||||
|
} finally {
|
||||||
|
if (!success) {
|
||||||
|
IOUtils.closeSafely(true, datOut);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public long ramBytesUsed() {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void add(int docID) throws IOException {
|
||||||
|
add(docID, floatsRef.get());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void add(int docID, PerDocFieldValues docValues) throws IOException {
|
||||||
|
add(docID, docValues.getFloat());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void setNextEnum(ValuesEnum valuesEnum) {
|
||||||
|
floatsRef = valuesEnum.getFloat();
|
||||||
|
}
|
||||||
|
|
||||||
|
protected abstract int fillDefault(int num) throws IOException;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void merge(MergeState state) throws IOException {
|
||||||
|
if (state.bits == null && state.reader instanceof FloatsReader) {
|
||||||
|
// no deletes - bulk copy
|
||||||
|
// TODO: should be do bulks with deletes too?
|
||||||
|
final FloatsReader reader = (FloatsReader) state.reader;
|
||||||
|
assert reader.precisionBytes == (int) precision;
|
||||||
|
if (reader.maxDoc == 0)
|
||||||
|
return;
|
||||||
|
final int docBase = state.docBase;
|
||||||
|
if (docBase - lastDocId > 1) {
|
||||||
|
// fill with default values
|
||||||
|
lastDocId += fillDefault(docBase - lastDocId - 1);
|
||||||
|
}
|
||||||
|
lastDocId += reader.transferTo(datOut);
|
||||||
|
} else
|
||||||
|
super.merge(state);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void files(Collection<String> files) throws IOException {
|
||||||
|
files.add(IndexFileNames.segmentFileName(id, "", Writer.DATA_EXTENSION));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Writes 4 bytes (float) per value
|
||||||
|
static class Float4Writer extends FloatsWriter {
|
||||||
|
|
||||||
|
protected Float4Writer(Directory dir, String id, AtomicLong bytesUsed)
|
||||||
|
throws IOException {
|
||||||
|
super(dir, id, 4, bytesUsed);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void add(final int docID, final double v)
|
||||||
|
throws IOException {
|
||||||
|
assert docID > lastDocId : "docID: " + docID
|
||||||
|
+ " must be greater than the last added doc id: " + lastDocId;
|
||||||
|
if (docID - lastDocId > 1) {
|
||||||
|
// fill with default values
|
||||||
|
lastDocId += fillDefault(docID - lastDocId - 1);
|
||||||
|
}
|
||||||
|
assert datOut != null;
|
||||||
|
datOut.writeInt(Float.floatToRawIntBits((float) v));
|
||||||
|
++lastDocId;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void finish(int docCount) throws IOException {
|
||||||
|
try {
|
||||||
|
if (docCount > lastDocId + 1)
|
||||||
|
for (int i = lastDocId; i < docCount; i++) {
|
||||||
|
datOut.writeInt(INT_DEFAULT); // default value
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
datOut.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected int fillDefault(int numValues) throws IOException {
|
||||||
|
for (int i = 0; i < numValues; i++) {
|
||||||
|
datOut.writeInt(INT_DEFAULT);
|
||||||
|
}
|
||||||
|
return numValues;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Writes 8 bytes (double) per value
|
||||||
|
static class Float8Writer extends FloatsWriter {
|
||||||
|
|
||||||
|
protected Float8Writer(Directory dir, String id, AtomicLong bytesUsed)
|
||||||
|
throws IOException {
|
||||||
|
super(dir, id, 8, bytesUsed);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void add(int docID, double v) throws IOException {
|
||||||
|
assert docID > lastDocId : "docID: " + docID
|
||||||
|
+ " must be greater than the last added doc id: " + lastDocId;
|
||||||
|
if (docID - lastDocId > 1) {
|
||||||
|
// fill with default values
|
||||||
|
lastDocId += fillDefault(docID - lastDocId - 1);
|
||||||
|
}
|
||||||
|
assert datOut != null;
|
||||||
|
datOut.writeLong(Double.doubleToRawLongBits(v));
|
||||||
|
++lastDocId;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void finish(int docCount) throws IOException {
|
||||||
|
try {
|
||||||
|
if (docCount > lastDocId + 1)
|
||||||
|
for (int i = lastDocId; i < docCount; i++) {
|
||||||
|
datOut.writeLong(LONG_DEFAULT); // default value
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
datOut.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected int fillDefault(int numValues) throws IOException {
|
||||||
|
for (int i = 0; i < numValues; i++) {
|
||||||
|
datOut.writeLong(LONG_DEFAULT);
|
||||||
|
}
|
||||||
|
return numValues;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Opens all necessary files, but does not read any data in until you call
|
||||||
|
* {@link #load}.
|
||||||
|
*/
|
||||||
|
static class FloatsReader extends IndexDocValues {
|
||||||
|
|
||||||
|
private final IndexInput datIn;
|
||||||
|
private final int precisionBytes;
|
||||||
|
// TODO(simonw) is ByteBuffer the way to go here?
|
||||||
|
private final int maxDoc;
|
||||||
|
|
||||||
|
protected FloatsReader(Directory dir, String id, int maxDoc)
|
||||||
|
throws IOException {
|
||||||
|
datIn = dir.openInput(IndexFileNames.segmentFileName(id, "",
|
||||||
|
Writer.DATA_EXTENSION));
|
||||||
|
CodecUtil.checkHeader(datIn, CODEC_NAME, VERSION_START, VERSION_START);
|
||||||
|
precisionBytes = datIn.readByte();
|
||||||
|
assert precisionBytes == 4 || precisionBytes == 8;
|
||||||
|
this.maxDoc = maxDoc;
|
||||||
|
}
|
||||||
|
|
||||||
|
int transferTo(IndexOutput out) throws IOException {
|
||||||
|
IndexInput indexInput = (IndexInput) datIn.clone();
|
||||||
|
try {
|
||||||
|
indexInput.seek(CodecUtil.headerLength(CODEC_NAME));
|
||||||
|
// skip precision:
|
||||||
|
indexInput.readByte();
|
||||||
|
out.copyBytes(indexInput, precisionBytes * maxDoc);
|
||||||
|
} finally {
|
||||||
|
indexInput.close();
|
||||||
|
}
|
||||||
|
return maxDoc;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Loads the actual values. You may call this more than once, eg if you
|
||||||
|
* already previously loaded but then discarded the Source.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public Source load() throws IOException {
|
||||||
|
/* we always read BIG_ENDIAN here since the writer uses
|
||||||
|
* DataOutput#writeInt() / writeLong() we can simply read the ints / longs
|
||||||
|
* back in using readInt / readLong */
|
||||||
|
final IndexInput indexInput = (IndexInput) datIn.clone();
|
||||||
|
indexInput.seek(CodecUtil.headerLength(CODEC_NAME));
|
||||||
|
// skip precision:
|
||||||
|
indexInput.readByte();
|
||||||
|
if (precisionBytes == 4) {
|
||||||
|
final float[] values = new float[(4 * maxDoc) >> 2];
|
||||||
|
assert values.length == maxDoc;
|
||||||
|
for (int i = 0; i < values.length; i++) {
|
||||||
|
values[i] = Float.intBitsToFloat(indexInput.readInt());
|
||||||
|
}
|
||||||
|
return new Source4(values);
|
||||||
|
} else {
|
||||||
|
final double[] values = new double[(8 * maxDoc) >> 3];
|
||||||
|
assert values.length == maxDoc;
|
||||||
|
for (int i = 0; i < values.length; i++) {
|
||||||
|
values[i] = Double.longBitsToDouble(indexInput.readLong());
|
||||||
|
}
|
||||||
|
return new Source8(values);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private class Source4 extends Source {
|
||||||
|
private final float[] values;
|
||||||
|
|
||||||
|
Source4(final float[] values ) throws IOException {
|
||||||
|
this.values = values;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public double getFloat(int docID) {
|
||||||
|
return values[docID];
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ValuesEnum getEnum(AttributeSource attrSource)
|
||||||
|
throws IOException {
|
||||||
|
return new SourceEnum(attrSource, ValueType.FLOAT_32, this, maxDoc) {
|
||||||
|
@Override
|
||||||
|
public int advance(int target) throws IOException {
|
||||||
|
if (target >= numDocs)
|
||||||
|
return pos = NO_MORE_DOCS;
|
||||||
|
floatsRef.floats[floatsRef.offset] = source.getFloat(target);
|
||||||
|
return pos = target;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ValueType type() {
|
||||||
|
return ValueType.FLOAT_32;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private class Source8 extends Source {
|
||||||
|
private final double[] values;
|
||||||
|
|
||||||
|
Source8(final double[] values) throws IOException {
|
||||||
|
this.values = values;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public double getFloat(int docID) {
|
||||||
|
return values[docID];
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ValuesEnum getEnum(AttributeSource attrSource)
|
||||||
|
throws IOException {
|
||||||
|
return new SourceEnum(attrSource, type(), this, maxDoc) {
|
||||||
|
@Override
|
||||||
|
public int advance(int target) throws IOException {
|
||||||
|
if (target >= numDocs)
|
||||||
|
return pos = NO_MORE_DOCS;
|
||||||
|
floatsRef.floats[floatsRef.offset] = source.getFloat(target);
|
||||||
|
return pos = target;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ValueType type() {
|
||||||
|
return ValueType.FLOAT_64;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void close() throws IOException {
|
||||||
|
super.close();
|
||||||
|
datIn.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ValuesEnum getEnum(AttributeSource source) throws IOException {
|
||||||
|
IndexInput indexInput = (IndexInput) datIn.clone();
|
||||||
|
indexInput.seek(CodecUtil.headerLength(CODEC_NAME));
|
||||||
|
// skip precision:
|
||||||
|
indexInput.readByte();
|
||||||
|
return precisionBytes == 4 ? new Floats4Enum(source, indexInput, maxDoc)
|
||||||
|
: new Floats8EnumImpl(source, indexInput, maxDoc);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ValueType type() {
|
||||||
|
return precisionBytes == 4 ? ValueType.FLOAT_32
|
||||||
|
: ValueType.FLOAT_64;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static final class Floats4Enum extends FloatsEnumImpl {
|
||||||
|
|
||||||
|
Floats4Enum(AttributeSource source, IndexInput dataIn, int maxDoc)
|
||||||
|
throws IOException {
|
||||||
|
super(source, dataIn, 4, maxDoc, ValueType.FLOAT_32);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int advance(int target) throws IOException {
|
||||||
|
if (target >= maxDoc)
|
||||||
|
return pos = NO_MORE_DOCS;
|
||||||
|
dataIn.seek(fp + (target * precision));
|
||||||
|
final int intBits = dataIn.readInt();
|
||||||
|
floatsRef.floats[0] = Float.intBitsToFloat(intBits);
|
||||||
|
floatsRef.offset = 0;
|
||||||
|
return pos = target;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int docID() {
|
||||||
|
return pos;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int nextDoc() throws IOException {
|
||||||
|
if (pos >= maxDoc) {
|
||||||
|
return pos = NO_MORE_DOCS;
|
||||||
|
}
|
||||||
|
return advance(pos + 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static final class Floats8EnumImpl extends FloatsEnumImpl {
|
||||||
|
|
||||||
|
Floats8EnumImpl(AttributeSource source, IndexInput dataIn, int maxDoc)
|
||||||
|
throws IOException {
|
||||||
|
super(source, dataIn, 8, maxDoc, ValueType.FLOAT_64);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int advance(int target) throws IOException {
|
||||||
|
if (target >= maxDoc) {
|
||||||
|
return pos = NO_MORE_DOCS;
|
||||||
|
}
|
||||||
|
dataIn.seek(fp + (target * precision));
|
||||||
|
final long value = dataIn.readLong();
|
||||||
|
floatsRef.floats[floatsRef.offset] = Double.longBitsToDouble(value);
|
||||||
|
return pos = target;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int docID() {
|
||||||
|
return pos;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int nextDoc() throws IOException {
|
||||||
|
if (pos >= maxDoc) {
|
||||||
|
return pos = NO_MORE_DOCS;
|
||||||
|
}
|
||||||
|
return advance(pos + 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static abstract class FloatsEnumImpl extends ValuesEnum {
|
||||||
|
protected final IndexInput dataIn;
|
||||||
|
protected int pos = -1;
|
||||||
|
protected final int precision;
|
||||||
|
protected final int maxDoc;
|
||||||
|
protected final long fp;
|
||||||
|
|
||||||
|
FloatsEnumImpl(AttributeSource source, IndexInput dataIn, int precision,
|
||||||
|
int maxDoc, ValueType type) throws IOException {
|
||||||
|
super(source, precision == 4 ? ValueType.FLOAT_32
|
||||||
|
: ValueType.FLOAT_64);
|
||||||
|
this.dataIn = dataIn;
|
||||||
|
this.precision = precision;
|
||||||
|
this.maxDoc = maxDoc;
|
||||||
|
fp = dataIn.getFilePointer();
|
||||||
|
floatsRef.offset = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void close() throws IOException {
|
||||||
|
dataIn.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,364 @@
|
||||||
|
package org.apache.lucene.index.values;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
import java.io.Closeable;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Comparator;
|
||||||
|
|
||||||
|
import org.apache.lucene.document.IndexDocValuesField;
|
||||||
|
import org.apache.lucene.index.Fields;
|
||||||
|
import org.apache.lucene.index.FieldsEnum;
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.index.codecs.Codec;
|
||||||
|
import org.apache.lucene.index.codecs.CodecProvider;
|
||||||
|
import org.apache.lucene.util.AttributeSource;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* {@link IndexDocValues} provides a dense per-document typed storage for fast
|
||||||
|
* value access based on the lucene internal document id. {@link IndexDocValues}
|
||||||
|
* exposes two distinct APIs:
|
||||||
|
* <ul>
|
||||||
|
* <li>via {@link Source} an entirely RAM resident API for random access</li>
|
||||||
|
* <li>via {@link ValuesEnum} a disk resident API for sequential access</li>
|
||||||
|
* </ul> {@link IndexDocValues} are exposed via
|
||||||
|
* {@link IndexReader#perDocValues()} on a per-segment basis. For best
|
||||||
|
* performance {@link IndexDocValues} should be consumed per-segment just like
|
||||||
|
* IndexReader.
|
||||||
|
* <p>
|
||||||
|
* {@link IndexDocValues} are fully integrated into the {@link Codec} API.
|
||||||
|
* Custom implementations can be exposed on a per field basis via
|
||||||
|
* {@link CodecProvider}.
|
||||||
|
*
|
||||||
|
* @see ValueType for limitations and default implementation documentation
|
||||||
|
* @see IndexDocValuesField for adding values to the index
|
||||||
|
* @see Codec#docsConsumer(org.apache.lucene.index.PerDocWriteState) for
|
||||||
|
* customization
|
||||||
|
* @lucene.experimental
|
||||||
|
*/
|
||||||
|
public abstract class IndexDocValues implements Closeable {
|
||||||
|
/*
|
||||||
|
* TODO: it might be useful to add another Random Access enum for some
|
||||||
|
* implementations like packed ints and only return such a random access enum
|
||||||
|
* if the impl supports random access. For super large segments it might be
|
||||||
|
* useful or even required in certain environements to have disc based random
|
||||||
|
* access
|
||||||
|
*/
|
||||||
|
public static final IndexDocValues[] EMPTY_ARRAY = new IndexDocValues[0];
|
||||||
|
|
||||||
|
private SourceCache cache = new SourceCache.DirectSourceCache();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns an iterator that steps through all documents values for this
|
||||||
|
* {@link IndexDocValues} field instance. {@link ValuesEnum} will skip document
|
||||||
|
* without a value if applicable.
|
||||||
|
*/
|
||||||
|
public ValuesEnum getEnum() throws IOException {
|
||||||
|
return getEnum(null);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns an iterator that steps through all documents values for this
|
||||||
|
* {@link IndexDocValues} field instance. {@link ValuesEnum} will skip document
|
||||||
|
* without a value if applicable.
|
||||||
|
* <p>
|
||||||
|
* If an {@link AttributeSource} is supplied to this method the
|
||||||
|
* {@link ValuesEnum} will use the given source to access implementation
|
||||||
|
* related attributes.
|
||||||
|
*/
|
||||||
|
public abstract ValuesEnum getEnum(AttributeSource attrSource)
|
||||||
|
throws IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Loads a new {@link Source} instance for this {@link IndexDocValues} field
|
||||||
|
* instance. Source instances returned from this method are not cached. It is
|
||||||
|
* the callers responsibility to maintain the instance and release its
|
||||||
|
* resources once the source is not needed anymore.
|
||||||
|
* <p>
|
||||||
|
* This method will return null iff this {@link IndexDocValues} represent a
|
||||||
|
* {@link SortedSource}.
|
||||||
|
* <p>
|
||||||
|
* For managed {@link Source} instances see {@link #getSource()}.
|
||||||
|
*
|
||||||
|
* @see #getSource()
|
||||||
|
* @see #setCache(SourceCache)
|
||||||
|
*/
|
||||||
|
public abstract Source load() throws IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a {@link Source} instance through the current {@link SourceCache}.
|
||||||
|
* Iff no {@link Source} has been loaded into the cache so far the source will
|
||||||
|
* be loaded through {@link #load()} and passed to the {@link SourceCache}.
|
||||||
|
* The caller of this method should not close the obtained {@link Source}
|
||||||
|
* instance unless it is not needed for the rest of its life time.
|
||||||
|
* <p>
|
||||||
|
* {@link Source} instances obtained from this method are closed / released
|
||||||
|
* from the cache once this {@link IndexDocValues} instance is closed by the
|
||||||
|
* {@link IndexReader}, {@link Fields} or {@link FieldsEnum} the
|
||||||
|
* {@link IndexDocValues} was created from.
|
||||||
|
* <p>
|
||||||
|
* This method will return null iff this {@link IndexDocValues} represent a
|
||||||
|
* {@link SortedSource}.
|
||||||
|
*/
|
||||||
|
public Source getSource() throws IOException {
|
||||||
|
return cache.load(this);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a {@link SortedSource} instance for this {@link IndexDocValues} field
|
||||||
|
* instance like {@link #getSource()}.
|
||||||
|
* <p>
|
||||||
|
* This method will return null iff this {@link IndexDocValues} represent a
|
||||||
|
* {@link Source} instead of a {@link SortedSource}.
|
||||||
|
*/
|
||||||
|
public SortedSource getSortedSorted(Comparator<BytesRef> comparator)
|
||||||
|
throws IOException {
|
||||||
|
return cache.loadSorted(this, comparator);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Loads and returns a {@link SortedSource} instance for this
|
||||||
|
* {@link IndexDocValues} field instance like {@link #load()}.
|
||||||
|
* <p>
|
||||||
|
* This method will return null iff this {@link IndexDocValues} represent a
|
||||||
|
* {@link Source} instead of a {@link SortedSource}.
|
||||||
|
*/
|
||||||
|
public SortedSource loadSorted(Comparator<BytesRef> comparator)
|
||||||
|
throws IOException {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the {@link ValueType} of this {@link IndexDocValues} instance
|
||||||
|
*/
|
||||||
|
public abstract ValueType type();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Closes this {@link IndexDocValues} instance. This method should only be called
|
||||||
|
* by the creator of this {@link IndexDocValues} instance. API users should not
|
||||||
|
* close {@link IndexDocValues} instances.
|
||||||
|
*/
|
||||||
|
public void close() throws IOException {
|
||||||
|
cache.close(this);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets the {@link SourceCache} used by this {@link IndexDocValues} instance. This
|
||||||
|
* method should be called before {@link #load()} or
|
||||||
|
* {@link #loadSorted(Comparator)} is called. All {@link Source} or
|
||||||
|
* {@link SortedSource} instances in the currently used cache will be closed
|
||||||
|
* before the new cache is installed.
|
||||||
|
* <p>
|
||||||
|
* Note: All instances previously obtained from {@link #load()} or
|
||||||
|
* {@link #loadSorted(Comparator)} will be closed.
|
||||||
|
*
|
||||||
|
* @throws IllegalArgumentException
|
||||||
|
* if the given cache is <code>null</code>
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public void setCache(SourceCache cache) {
|
||||||
|
if (cache == null)
|
||||||
|
throw new IllegalArgumentException("cache must not be null");
|
||||||
|
synchronized (this.cache) {
|
||||||
|
this.cache.close(this);
|
||||||
|
this.cache = cache;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Source of per document values like long, double or {@link BytesRef}
|
||||||
|
* depending on the {@link IndexDocValues} fields {@link ValueType}. Source
|
||||||
|
* implementations provide random access semantics similar to array lookups
|
||||||
|
* and typically are entirely memory resident.
|
||||||
|
* <p>
|
||||||
|
* {@link Source} defines 3 {@link ValueType} //TODO finish this
|
||||||
|
*/
|
||||||
|
public static abstract class Source {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a <tt>long</tt> for the given document id or throws an
|
||||||
|
* {@link UnsupportedOperationException} if this source doesn't support
|
||||||
|
* <tt>long</tt> values.
|
||||||
|
*
|
||||||
|
* @throws UnsupportedOperationException
|
||||||
|
* if this source doesn't support <tt>long</tt> values.
|
||||||
|
*/
|
||||||
|
public long getInt(int docID) {
|
||||||
|
throw new UnsupportedOperationException("ints are not supported");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a <tt>double</tt> for the given document id or throws an
|
||||||
|
* {@link UnsupportedOperationException} if this source doesn't support
|
||||||
|
* <tt>double</tt> values.
|
||||||
|
*
|
||||||
|
* @throws UnsupportedOperationException
|
||||||
|
* if this source doesn't support <tt>double</tt> values.
|
||||||
|
*/
|
||||||
|
public double getFloat(int docID) {
|
||||||
|
throw new UnsupportedOperationException("floats are not supported");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a {@link BytesRef} for the given document id or throws an
|
||||||
|
* {@link UnsupportedOperationException} if this source doesn't support
|
||||||
|
* <tt>byte[]</tt> values.
|
||||||
|
*
|
||||||
|
* @throws UnsupportedOperationException
|
||||||
|
* if this source doesn't support <tt>byte[]</tt> values.
|
||||||
|
*/
|
||||||
|
public BytesRef getBytes(int docID, BytesRef ref) {
|
||||||
|
throw new UnsupportedOperationException("bytes are not supported");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns number of unique values. Some implementations may throw
|
||||||
|
* UnsupportedOperationException.
|
||||||
|
*/
|
||||||
|
public int getValueCount() {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a {@link ValuesEnum} for this source.
|
||||||
|
*/
|
||||||
|
public ValuesEnum getEnum() throws IOException {
|
||||||
|
return getEnum(null);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the {@link ValueType} of this source.
|
||||||
|
*
|
||||||
|
* @return the {@link ValueType} of this source.
|
||||||
|
*/
|
||||||
|
public abstract ValueType type();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a {@link ValuesEnum} for this source which uses the given
|
||||||
|
* {@link AttributeSource}.
|
||||||
|
*/
|
||||||
|
public abstract ValuesEnum getEnum(AttributeSource attrSource)
|
||||||
|
throws IOException;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* {@link ValuesEnum} utility for {@link Source} implemenations.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public abstract static class SourceEnum extends ValuesEnum {
|
||||||
|
protected final Source source;
|
||||||
|
protected final int numDocs;
|
||||||
|
protected int pos = -1;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a new {@link SourceEnum}
|
||||||
|
*
|
||||||
|
* @param attrs
|
||||||
|
* the {@link AttributeSource} for this enum
|
||||||
|
* @param type
|
||||||
|
* the enums {@link ValueType}
|
||||||
|
* @param source
|
||||||
|
* the source this enum operates on
|
||||||
|
* @param numDocs
|
||||||
|
* the number of documents within the source
|
||||||
|
*/
|
||||||
|
protected SourceEnum(AttributeSource attrs, ValueType type, Source source,
|
||||||
|
int numDocs) {
|
||||||
|
super(attrs, type);
|
||||||
|
this.source = source;
|
||||||
|
this.numDocs = numDocs;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void close() throws IOException {
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int docID() {
|
||||||
|
return pos;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int nextDoc() throws IOException {
|
||||||
|
if (pos == NO_MORE_DOCS)
|
||||||
|
return NO_MORE_DOCS;
|
||||||
|
return advance(pos + 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A sorted variant of {@link Source} for <tt>byte[]</tt> values per document.
|
||||||
|
* <p>
|
||||||
|
* Note: {@link ValuesEnum} obtained from a {@link SortedSource} will
|
||||||
|
* enumerate values in document order and not in sorted order.
|
||||||
|
*/
|
||||||
|
public static abstract class SortedSource extends Source {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public BytesRef getBytes(int docID, BytesRef bytesRef) {
|
||||||
|
final int ord = ord(docID);
|
||||||
|
if (ord < 0) {
|
||||||
|
bytesRef.length = 0;
|
||||||
|
} else {
|
||||||
|
getByOrd(ord , bytesRef);
|
||||||
|
}
|
||||||
|
return bytesRef;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns ord for specified docID. If this docID had not been added to the
|
||||||
|
* Writer, the ord is 0. Ord is dense, ie, starts at 0, then increments by 1
|
||||||
|
* for the next (as defined by {@link Comparator} value.
|
||||||
|
*/
|
||||||
|
public abstract int ord(int docID);
|
||||||
|
|
||||||
|
/** Returns value for specified ord. */
|
||||||
|
public abstract BytesRef getByOrd(int ord, BytesRef bytesRef);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Finds the ordinal whose value is greater or equal to the given value.
|
||||||
|
*
|
||||||
|
* @return the given values ordinal if found or otherwise
|
||||||
|
* <code>(-(ord)-1)</code>, defined as the ordinal of the first
|
||||||
|
* element that is greater than the given value. This guarantees
|
||||||
|
* that the return value will always be >= 0 if the given value
|
||||||
|
* is found.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public final int getByValue(BytesRef value) {
|
||||||
|
return getByValue(value, new BytesRef());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Performs a lookup by value.
|
||||||
|
*
|
||||||
|
* @param value
|
||||||
|
* the value to look up
|
||||||
|
* @param tmpRef
|
||||||
|
* a temporary {@link BytesRef} instance used to compare internal
|
||||||
|
* values to the given value. Must not be <code>null</code>
|
||||||
|
* @return the given values ordinal if found or otherwise
|
||||||
|
* <code>(-(ord)-1)</code>, defined as the ordinal of the first
|
||||||
|
* element that is greater than the given value. This guarantees
|
||||||
|
* that the return value will always be >= 0 if the given value
|
||||||
|
* is found.
|
||||||
|
*/
|
||||||
|
public abstract int getByValue(BytesRef value, BytesRef tmpRef);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,46 @@
|
||||||
|
package org.apache.lucene.index.values;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.concurrent.atomic.AtomicLong;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.values.IntsImpl.IntsReader;
|
||||||
|
import org.apache.lucene.index.values.IntsImpl.IntsWriter;
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @lucene.experimental
|
||||||
|
*/
|
||||||
|
public class Ints {
|
||||||
|
// TODO - add bulk copy where possible
|
||||||
|
|
||||||
|
private Ints() {
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Writer getWriter(Directory dir, String id,
|
||||||
|
boolean useFixedArray, AtomicLong bytesUsed) throws IOException {
|
||||||
|
// TODO - implement fixed?!
|
||||||
|
return new IntsWriter(dir, id, bytesUsed);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static IndexDocValues getValues(Directory dir, String id,
|
||||||
|
boolean useFixedArray) throws IOException {
|
||||||
|
return new IntsReader(dir, id);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,429 @@
|
||||||
|
package org.apache.lucene.index.values;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.concurrent.atomic.AtomicLong;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.IndexFileNames;
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.store.IndexInput;
|
||||||
|
import org.apache.lucene.store.IndexOutput;
|
||||||
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
|
import org.apache.lucene.util.AttributeSource;
|
||||||
|
import org.apache.lucene.util.CodecUtil;
|
||||||
|
import org.apache.lucene.util.IOUtils;
|
||||||
|
import org.apache.lucene.util.LongsRef;
|
||||||
|
import org.apache.lucene.util.RamUsageEstimator;
|
||||||
|
import org.apache.lucene.util.packed.PackedInts;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Stores ints packed with fixed-bit precision.
|
||||||
|
*
|
||||||
|
* @lucene.experimental
|
||||||
|
* */
|
||||||
|
class IntsImpl {
|
||||||
|
|
||||||
|
private static final String CODEC_NAME = "Ints";
|
||||||
|
private static final byte PACKED = 0x00;
|
||||||
|
private static final byte FIXED = 0x01;
|
||||||
|
|
||||||
|
static final int VERSION_START = 0;
|
||||||
|
static final int VERSION_CURRENT = VERSION_START;
|
||||||
|
|
||||||
|
static class IntsWriter extends Writer {
|
||||||
|
|
||||||
|
// TODO: can we bulkcopy this on a merge?
|
||||||
|
private LongsRef intsRef;
|
||||||
|
private long[] docToValue;
|
||||||
|
private long minValue;
|
||||||
|
private long maxValue;
|
||||||
|
private boolean started;
|
||||||
|
private final String id;
|
||||||
|
private int lastDocId = -1;
|
||||||
|
private IndexOutput datOut;
|
||||||
|
|
||||||
|
protected IntsWriter(Directory dir, String id, AtomicLong bytesUsed)
|
||||||
|
throws IOException {
|
||||||
|
super(bytesUsed);
|
||||||
|
datOut = dir.createOutput(IndexFileNames.segmentFileName(id, "",
|
||||||
|
DATA_EXTENSION));
|
||||||
|
boolean success = false;
|
||||||
|
try {
|
||||||
|
CodecUtil.writeHeader(datOut, CODEC_NAME, VERSION_CURRENT);
|
||||||
|
this.id = id;
|
||||||
|
docToValue = new long[1];
|
||||||
|
bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_LONG); // TODO the
|
||||||
|
// bitset
|
||||||
|
// needs memory
|
||||||
|
// too
|
||||||
|
success = true;
|
||||||
|
} finally {
|
||||||
|
if (!success) {
|
||||||
|
datOut.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void add(int docID, long v) throws IOException {
|
||||||
|
assert lastDocId < docID;
|
||||||
|
if (!started) {
|
||||||
|
started = true;
|
||||||
|
minValue = maxValue = v;
|
||||||
|
} else {
|
||||||
|
if (v < minValue) {
|
||||||
|
minValue = v;
|
||||||
|
} else if (v > maxValue) {
|
||||||
|
maxValue = v;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
lastDocId = docID;
|
||||||
|
|
||||||
|
if (docID >= docToValue.length) {
|
||||||
|
final long len = docToValue.length;
|
||||||
|
docToValue = ArrayUtil.grow(docToValue, 1 + docID);
|
||||||
|
bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_LONG
|
||||||
|
* ((docToValue.length) - len));
|
||||||
|
}
|
||||||
|
docToValue[docID] = v;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void finish(int docCount) throws IOException {
|
||||||
|
try {
|
||||||
|
if (!started) {
|
||||||
|
minValue = maxValue = 0;
|
||||||
|
}
|
||||||
|
// if we exceed the range of positive longs we must switch to fixed ints
|
||||||
|
if ((maxValue - minValue) < (((long)1) << 63) && (maxValue - minValue) >= 0) {
|
||||||
|
writePackedInts(docCount);
|
||||||
|
} else {
|
||||||
|
writeFixedInts(docCount);
|
||||||
|
}
|
||||||
|
|
||||||
|
} finally {
|
||||||
|
datOut.close();
|
||||||
|
bytesUsed
|
||||||
|
.addAndGet(-(RamUsageEstimator.NUM_BYTES_LONG * docToValue.length));
|
||||||
|
docToValue = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void writeFixedInts(int docCount) throws IOException {
|
||||||
|
datOut.writeByte(FIXED);
|
||||||
|
datOut.writeInt(docCount);
|
||||||
|
for (int i = 0; i < docToValue.length; i++) {
|
||||||
|
datOut.writeLong(docToValue[i]); // write full array - we use 0 as default
|
||||||
|
}
|
||||||
|
for (int i = docToValue.length; i < docCount; i++) {
|
||||||
|
datOut.writeLong(0); // fill with defaults values
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void writePackedInts(int docCount) throws IOException {
|
||||||
|
datOut.writeByte(PACKED);
|
||||||
|
datOut.writeLong(minValue);
|
||||||
|
// write a default value to recognize docs without a value for that
|
||||||
|
// field
|
||||||
|
final long defaultValue = maxValue>= 0 && minValue <=0 ? 0-minValue : ++maxValue-minValue;
|
||||||
|
datOut.writeLong(defaultValue);
|
||||||
|
PackedInts.Writer w = PackedInts.getWriter(datOut, docCount,
|
||||||
|
PackedInts.bitsRequired(maxValue-minValue));
|
||||||
|
final int limit = docToValue.length > docCount ? docCount : docToValue.length;
|
||||||
|
for (int i = 0; i < limit; i++) {
|
||||||
|
w.add(docToValue[i] == 0 ? defaultValue : docToValue[i] - minValue);
|
||||||
|
}
|
||||||
|
for (int i = limit; i < docCount; i++) {
|
||||||
|
w.add(defaultValue);
|
||||||
|
}
|
||||||
|
|
||||||
|
w.finish();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void add(int docID) throws IOException {
|
||||||
|
add(docID, intsRef.get());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void setNextEnum(ValuesEnum valuesEnum) {
|
||||||
|
intsRef = valuesEnum.getInt();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void add(int docID, PerDocFieldValues docValues) throws IOException {
|
||||||
|
add(docID, docValues.getInt());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void files(Collection<String> files) throws IOException {
|
||||||
|
files.add(IndexFileNames.segmentFileName(id, "", DATA_EXTENSION));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Opens all necessary files, but does not read any data in until you call
|
||||||
|
* {@link #load}.
|
||||||
|
*/
|
||||||
|
static class IntsReader extends IndexDocValues {
|
||||||
|
private final IndexInput datIn;
|
||||||
|
private final boolean packed;
|
||||||
|
|
||||||
|
protected IntsReader(Directory dir, String id) throws IOException {
|
||||||
|
datIn = dir.openInput(IndexFileNames.segmentFileName(id, "",
|
||||||
|
Writer.DATA_EXTENSION));
|
||||||
|
boolean success = false;
|
||||||
|
try {
|
||||||
|
CodecUtil.checkHeader(datIn, CODEC_NAME, VERSION_START, VERSION_START);
|
||||||
|
packed = PACKED == datIn.readByte();
|
||||||
|
success = true;
|
||||||
|
} finally {
|
||||||
|
if (!success) {
|
||||||
|
IOUtils.closeSafely(true, datIn);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Loads the actual values. You may call this more than once, eg if you
|
||||||
|
* already previously loaded but then discarded the Source.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public Source load() throws IOException {
|
||||||
|
final IndexInput input = (IndexInput) datIn.clone();
|
||||||
|
boolean success = false;
|
||||||
|
try {
|
||||||
|
final Source source = packed ? new PackedIntsSource(input)
|
||||||
|
: new FixedIntsSource(input);
|
||||||
|
success = true;
|
||||||
|
return source;
|
||||||
|
} finally {
|
||||||
|
if (!success) {
|
||||||
|
IOUtils.closeSafely(true, datIn);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static class FixedIntsSource extends Source {
|
||||||
|
private final long[] values;
|
||||||
|
public FixedIntsSource(IndexInput dataIn) throws IOException {
|
||||||
|
dataIn.seek(CodecUtil.headerLength(CODEC_NAME) + 1);
|
||||||
|
final int numDocs = dataIn.readInt();
|
||||||
|
values = new long[numDocs];
|
||||||
|
for (int i = 0; i < values.length; i++) {
|
||||||
|
values[i] = dataIn.readLong();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long getInt(int docID) {
|
||||||
|
assert docID >= 0 && docID < values.length;
|
||||||
|
return values[docID];
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ValueType type() {
|
||||||
|
return ValueType.INTS;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ValuesEnum getEnum(AttributeSource attrSource)
|
||||||
|
throws IOException {
|
||||||
|
return new SourceEnum(attrSource, type(), this, values.length) {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int advance(int target) throws IOException {
|
||||||
|
if (target >= numDocs)
|
||||||
|
return pos = NO_MORE_DOCS;
|
||||||
|
intsRef.ints[intsRef.offset] = values[target];
|
||||||
|
return pos = target;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static class PackedIntsSource extends Source {
|
||||||
|
private final long minValue;
|
||||||
|
private final long defaultValue;
|
||||||
|
private final PackedInts.Reader values;
|
||||||
|
|
||||||
|
public PackedIntsSource(IndexInput dataIn) throws IOException {
|
||||||
|
dataIn.seek(CodecUtil.headerLength(CODEC_NAME) + 1);
|
||||||
|
minValue = dataIn.readLong();
|
||||||
|
defaultValue = dataIn.readLong();
|
||||||
|
values = PackedInts.getReader(dataIn);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long getInt(int docID) {
|
||||||
|
// TODO -- can we somehow avoid 2X method calls
|
||||||
|
// on each get? must push minValue down, and make
|
||||||
|
// PackedInts implement Ints.Source
|
||||||
|
assert docID >= 0;
|
||||||
|
final long value = values.get(docID);
|
||||||
|
return value == defaultValue ? 0 : minValue + value;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ValuesEnum getEnum(AttributeSource attrSource)
|
||||||
|
throws IOException {
|
||||||
|
return new SourceEnum(attrSource, type(), this, values.size()) {
|
||||||
|
@Override
|
||||||
|
public int advance(int target) throws IOException {
|
||||||
|
if (target >= numDocs)
|
||||||
|
return pos = NO_MORE_DOCS;
|
||||||
|
intsRef.ints[intsRef.offset] = source.getInt(target);
|
||||||
|
return pos = target;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ValueType type() {
|
||||||
|
return ValueType.INTS;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void close() throws IOException {
|
||||||
|
super.close();
|
||||||
|
datIn.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ValuesEnum getEnum(AttributeSource source) throws IOException {
|
||||||
|
final IndexInput input = (IndexInput) datIn.clone();
|
||||||
|
boolean success = false;
|
||||||
|
try {
|
||||||
|
ValuesEnum inst = packed ? new PackedIntsEnumImpl(source, input)
|
||||||
|
: new FixedIntsEnumImpl(source, input);
|
||||||
|
success = true;
|
||||||
|
return inst;
|
||||||
|
} finally {
|
||||||
|
if (!success) {
|
||||||
|
IOUtils.closeSafely(true, input);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ValueType type() {
|
||||||
|
return ValueType.INTS;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static final class PackedIntsEnumImpl extends ValuesEnum {
|
||||||
|
private final PackedInts.ReaderIterator ints;
|
||||||
|
private long minValue;
|
||||||
|
private final IndexInput dataIn;
|
||||||
|
private final long defaultValue;
|
||||||
|
private final int maxDoc;
|
||||||
|
private int pos = -1;
|
||||||
|
|
||||||
|
private PackedIntsEnumImpl(AttributeSource source, IndexInput dataIn)
|
||||||
|
throws IOException {
|
||||||
|
super(source, ValueType.INTS);
|
||||||
|
intsRef.offset = 0;
|
||||||
|
this.dataIn = dataIn;
|
||||||
|
dataIn.seek(CodecUtil.headerLength(CODEC_NAME) + 1);
|
||||||
|
minValue = dataIn.readLong();
|
||||||
|
defaultValue = dataIn.readLong();
|
||||||
|
this.ints = PackedInts.getReaderIterator(dataIn);
|
||||||
|
maxDoc = ints.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void close() throws IOException {
|
||||||
|
ints.close();
|
||||||
|
dataIn.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int advance(int target) throws IOException {
|
||||||
|
if (target >= maxDoc) {
|
||||||
|
return pos = NO_MORE_DOCS;
|
||||||
|
}
|
||||||
|
final long val = ints.advance(target);
|
||||||
|
intsRef.ints[intsRef.offset] = val == defaultValue ? 0 : minValue + val;
|
||||||
|
return pos = target;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int docID() {
|
||||||
|
return pos;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int nextDoc() throws IOException {
|
||||||
|
if (pos >= maxDoc) {
|
||||||
|
return pos = NO_MORE_DOCS;
|
||||||
|
}
|
||||||
|
return advance(pos + 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static final class FixedIntsEnumImpl extends ValuesEnum {
|
||||||
|
private final IndexInput dataIn;
|
||||||
|
private final int maxDoc;
|
||||||
|
private int pos = -1;
|
||||||
|
|
||||||
|
private FixedIntsEnumImpl(AttributeSource source, IndexInput dataIn)
|
||||||
|
throws IOException {
|
||||||
|
super(source, ValueType.INTS);
|
||||||
|
intsRef.offset = 0;
|
||||||
|
this.dataIn = dataIn;
|
||||||
|
dataIn.seek(CodecUtil.headerLength(CODEC_NAME) + 1);
|
||||||
|
maxDoc = dataIn.readInt();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void close() throws IOException {
|
||||||
|
dataIn.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int advance(int target) throws IOException {
|
||||||
|
if (target >= maxDoc) {
|
||||||
|
return pos = NO_MORE_DOCS;
|
||||||
|
}
|
||||||
|
assert target > pos;
|
||||||
|
if (target > pos+1) {
|
||||||
|
dataIn.seek(dataIn.getFilePointer() + ((target - pos - 1) * 8));
|
||||||
|
}
|
||||||
|
intsRef.ints[intsRef.offset] = dataIn.readLong();
|
||||||
|
return pos = target;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int docID() {
|
||||||
|
return pos;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int nextDoc() throws IOException {
|
||||||
|
if (pos >= maxDoc) {
|
||||||
|
return pos = NO_MORE_DOCS;
|
||||||
|
}
|
||||||
|
return advance(pos + 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,269 @@
|
||||||
|
package org.apache.lucene.index.values;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
import org.apache.lucene.util.AttributeSource;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.ReaderUtil;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A wrapper for compound IndexReader providing access to per segment
|
||||||
|
* {@link IndexDocValues}
|
||||||
|
*
|
||||||
|
* @lucene.experimental
|
||||||
|
*/
|
||||||
|
public class MultiIndexDocValues extends IndexDocValues {
|
||||||
|
|
||||||
|
public static class DocValuesIndex {
|
||||||
|
public final static DocValuesIndex[] EMPTY_ARRAY = new DocValuesIndex[0];
|
||||||
|
final int start;
|
||||||
|
final int length;
|
||||||
|
final IndexDocValues docValues;
|
||||||
|
|
||||||
|
public DocValuesIndex(IndexDocValues docValues, int start, int length) {
|
||||||
|
this.docValues = docValues;
|
||||||
|
this.start = start;
|
||||||
|
this.length = length;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private DocValuesIndex[] docValuesIdx;
|
||||||
|
private int[] starts;
|
||||||
|
|
||||||
|
public MultiIndexDocValues() {
|
||||||
|
starts = new int[0];
|
||||||
|
docValuesIdx = new DocValuesIndex[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
public MultiIndexDocValues(DocValuesIndex[] docValuesIdx) {
|
||||||
|
reset(docValuesIdx);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ValuesEnum getEnum(AttributeSource source) throws IOException {
|
||||||
|
return new MultiValuesEnum(docValuesIdx, starts);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Source load() throws IOException {
|
||||||
|
return new MultiSource(docValuesIdx, starts);
|
||||||
|
}
|
||||||
|
|
||||||
|
public IndexDocValues reset(DocValuesIndex[] docValuesIdx) {
|
||||||
|
int[] start = new int[docValuesIdx.length];
|
||||||
|
for (int i = 0; i < docValuesIdx.length; i++) {
|
||||||
|
start[i] = docValuesIdx[i].start;
|
||||||
|
}
|
||||||
|
this.starts = start;
|
||||||
|
this.docValuesIdx = docValuesIdx;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static class DummyDocValues extends IndexDocValues {
|
||||||
|
final int maxDoc;
|
||||||
|
final Source emptySoruce;
|
||||||
|
|
||||||
|
public DummyDocValues(int maxDoc, ValueType type) {
|
||||||
|
this.maxDoc = maxDoc;
|
||||||
|
this.emptySoruce = new EmptySource(type);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ValuesEnum getEnum(AttributeSource attrSource) throws IOException {
|
||||||
|
return emptySoruce.getEnum(attrSource);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Source load() throws IOException {
|
||||||
|
return emptySoruce;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ValueType type() {
|
||||||
|
return emptySoruce.type();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static class MultiValuesEnum extends ValuesEnum {
|
||||||
|
private DocValuesIndex[] docValuesIdx;
|
||||||
|
private final int maxDoc;
|
||||||
|
private int currentStart;
|
||||||
|
private int currentMax;
|
||||||
|
private int currentDoc = -1;
|
||||||
|
private ValuesEnum currentEnum;
|
||||||
|
private final int[] starts;
|
||||||
|
|
||||||
|
public MultiValuesEnum(DocValuesIndex[] docValuesIdx, int[] starts)
|
||||||
|
throws IOException {
|
||||||
|
super(docValuesIdx[0].docValues.type());
|
||||||
|
this.docValuesIdx = docValuesIdx;
|
||||||
|
final DocValuesIndex last = docValuesIdx[docValuesIdx.length - 1];
|
||||||
|
maxDoc = last.start + last.length;
|
||||||
|
final DocValuesIndex idx = docValuesIdx[0];
|
||||||
|
currentEnum = idx.docValues.getEnum(this.attributes());
|
||||||
|
currentEnum.copyFrom(this);
|
||||||
|
intsRef = currentEnum.intsRef;
|
||||||
|
currentMax = idx.length;
|
||||||
|
currentStart = 0;
|
||||||
|
this.starts = starts;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void close() throws IOException {
|
||||||
|
currentEnum.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int advance(int target) throws IOException {
|
||||||
|
assert target > currentDoc : "target " + target
|
||||||
|
+ " must be > than the current doc " + currentDoc;
|
||||||
|
int relativeDoc = target - currentStart;
|
||||||
|
do {
|
||||||
|
if (target >= maxDoc) {// we are beyond max doc
|
||||||
|
return currentDoc = NO_MORE_DOCS;
|
||||||
|
}
|
||||||
|
if (target >= currentMax) {
|
||||||
|
final int idx = ReaderUtil.subIndex(target, starts);
|
||||||
|
currentEnum.close();
|
||||||
|
currentEnum = docValuesIdx[idx].docValues.getEnum();
|
||||||
|
currentEnum.copyFrom(this);
|
||||||
|
currentStart = docValuesIdx[idx].start;
|
||||||
|
currentMax = currentStart + docValuesIdx[idx].length;
|
||||||
|
relativeDoc = target - currentStart;
|
||||||
|
}
|
||||||
|
target = currentMax; // make sure that we advance to the next enum if the current is exhausted
|
||||||
|
|
||||||
|
} while ((relativeDoc = currentEnum.advance(relativeDoc)) == NO_MORE_DOCS);
|
||||||
|
return currentDoc = currentStart + relativeDoc;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int docID() {
|
||||||
|
return currentDoc;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int nextDoc() throws IOException {
|
||||||
|
return advance(currentDoc + 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static class MultiSource extends Source {
|
||||||
|
private int numDocs = 0;
|
||||||
|
private int start = 0;
|
||||||
|
private Source current;
|
||||||
|
private final int[] starts;
|
||||||
|
private final DocValuesIndex[] docValuesIdx;
|
||||||
|
|
||||||
|
public MultiSource(DocValuesIndex[] docValuesIdx, int[] starts) {
|
||||||
|
this.docValuesIdx = docValuesIdx;
|
||||||
|
this.starts = starts;
|
||||||
|
assert docValuesIdx.length != 0;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public long getInt(int docID) {
|
||||||
|
final int doc = ensureSource(docID);
|
||||||
|
return current.getInt(doc);
|
||||||
|
}
|
||||||
|
|
||||||
|
private final int ensureSource(int docID) {
|
||||||
|
if (docID >= start && docID < start+numDocs) {
|
||||||
|
return docID - start;
|
||||||
|
} else {
|
||||||
|
final int idx = ReaderUtil.subIndex(docID, starts);
|
||||||
|
assert idx >= 0 && idx < docValuesIdx.length : "idx was " + idx
|
||||||
|
+ " for doc id: " + docID + " slices : " + Arrays.toString(starts);
|
||||||
|
assert docValuesIdx[idx] != null;
|
||||||
|
try {
|
||||||
|
current = docValuesIdx[idx].docValues.getSource();
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new RuntimeException("load failed", e); // TODO how should we
|
||||||
|
// handle this
|
||||||
|
}
|
||||||
|
|
||||||
|
start = docValuesIdx[idx].start;
|
||||||
|
numDocs = docValuesIdx[idx].length;
|
||||||
|
return docID - start;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public double getFloat(int docID) {
|
||||||
|
final int doc = ensureSource(docID);
|
||||||
|
return current.getFloat(doc);
|
||||||
|
}
|
||||||
|
|
||||||
|
public BytesRef getBytes(int docID, BytesRef bytesRef) {
|
||||||
|
final int doc = ensureSource(docID);
|
||||||
|
return current.getBytes(doc, bytesRef);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ValuesEnum getEnum(AttributeSource attrSource) throws IOException {
|
||||||
|
throw new UnsupportedOperationException(); // TODO
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ValueType type() {
|
||||||
|
return docValuesIdx[0].docValues.type();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static class EmptySource extends Source {
|
||||||
|
private final ValueType type;
|
||||||
|
|
||||||
|
public EmptySource(ValueType type) {
|
||||||
|
this.type = type;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public BytesRef getBytes(int docID, BytesRef ref) {
|
||||||
|
ref.length = 0;
|
||||||
|
return ref;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public double getFloat(int docID) {
|
||||||
|
return 0d;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long getInt(int docID) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ValuesEnum getEnum(AttributeSource attrSource) throws IOException {
|
||||||
|
return ValuesEnum.emptyEnum(type);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ValueType type() {
|
||||||
|
return type;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ValueType type() {
|
||||||
|
return this.docValuesIdx[0].docValues.type();
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,101 @@
|
||||||
|
package org.apache.lucene.index.values;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
import java.util.Comparator;
|
||||||
|
|
||||||
|
import org.apache.lucene.document.IndexDocValuesField;
|
||||||
|
import org.apache.lucene.document.Fieldable;
|
||||||
|
import org.apache.lucene.index.codecs.DocValuesConsumer;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Per document and field values consumed by {@link DocValuesConsumer}.
|
||||||
|
* @see IndexDocValuesField
|
||||||
|
* @see Fieldable#setDocValues(PerDocFieldValues)
|
||||||
|
*
|
||||||
|
* @lucene.experimental
|
||||||
|
*/
|
||||||
|
public interface PerDocFieldValues {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets the given <code>long</code> value.
|
||||||
|
*/
|
||||||
|
public void setInt(long value);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets the given <code>float</code> value.
|
||||||
|
*/
|
||||||
|
public void setFloat(float value);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets the given <code>double</code> value.
|
||||||
|
*/
|
||||||
|
public void setFloat(double value);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets the given {@link BytesRef} value and the field's {@link ValueType}. The
|
||||||
|
* comparator for this field is set to <code>null</code>. If a
|
||||||
|
* <code>null</code> comparator is set the default comparator for the given
|
||||||
|
* {@link ValueType} is used.
|
||||||
|
*/
|
||||||
|
public void setBytes(BytesRef value, ValueType type);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets the given {@link BytesRef} value, the field's {@link ValueType} and the
|
||||||
|
* field's comparator. If the {@link Comparator} is set to <code>null</code>
|
||||||
|
* the default for the given {@link ValueType} is used instead.
|
||||||
|
*/
|
||||||
|
public void setBytes(BytesRef value, ValueType type, Comparator<BytesRef> comp);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the set {@link BytesRef} or <code>null</code> if not set.
|
||||||
|
*/
|
||||||
|
public BytesRef getBytes();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the set {@link BytesRef} comparator or <code>null</code> if not set
|
||||||
|
*/
|
||||||
|
public Comparator<BytesRef> bytesComparator();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the set floating point value or <code>0.0d</code> if not set.
|
||||||
|
*/
|
||||||
|
public double getFloat();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the set <code>long</code> value of <code>0</code> if not set.
|
||||||
|
*/
|
||||||
|
public long getInt();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets the {@link BytesRef} comparator for this field. If the field has a
|
||||||
|
* numeric {@link ValueType} the comparator will be ignored.
|
||||||
|
*/
|
||||||
|
public void setBytesComparator(Comparator<BytesRef> comp);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets the {@link ValueType}
|
||||||
|
*/
|
||||||
|
public void setType(ValueType type);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the {@link ValueType}
|
||||||
|
*/
|
||||||
|
public ValueType type();
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,120 @@
|
||||||
|
package org.apache.lucene.index.values;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Comparator;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.index.values.IndexDocValues.SortedSource;
|
||||||
|
import org.apache.lucene.index.values.IndexDocValues.Source;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Abstract base class for {@link IndexDocValues} {@link Source} /
|
||||||
|
* {@link SortedSource} cache.
|
||||||
|
* <p>
|
||||||
|
* {@link Source} and {@link SortedSource} instances loaded via
|
||||||
|
* {@link IndexDocValues#load()} and {@link IndexDocValues#loadSorted(Comparator)} are
|
||||||
|
* entirely memory resident and need to be maintained by the caller. Each call
|
||||||
|
* to {@link IndexDocValues#load()} or {@link IndexDocValues#loadSorted(Comparator)} will
|
||||||
|
* cause an entire reload of the underlying data. Source and
|
||||||
|
* {@link SortedSource} instances obtained from {@link IndexDocValues#getSource()}
|
||||||
|
* and {@link IndexDocValues#getSource()} respectively are maintained by a
|
||||||
|
* {@link SourceCache} that is closed ({@link #close(IndexDocValues)}) once the
|
||||||
|
* {@link IndexReader} that created the {@link IndexDocValues} instance is closed.
|
||||||
|
* <p>
|
||||||
|
* Unless {@link Source} and {@link SortedSource} instances are managed by
|
||||||
|
* another entity it is recommended to use the cached variants to obtain a
|
||||||
|
* source instance.
|
||||||
|
* <p>
|
||||||
|
* Implementation of this API must be thread-safe.
|
||||||
|
*
|
||||||
|
* @see IndexDocValues#setCache(SourceCache)
|
||||||
|
* @see IndexDocValues#getSource()
|
||||||
|
* @see IndexDocValues#getSortedSorted(Comparator)
|
||||||
|
*
|
||||||
|
* @lucene.experimental
|
||||||
|
*/
|
||||||
|
public abstract class SourceCache {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Atomically loads a {@link Source} into the cache from the given
|
||||||
|
* {@link IndexDocValues} and returns it iff no other {@link Source} has already
|
||||||
|
* been cached. Otherwise the cached source is returned.
|
||||||
|
* <p>
|
||||||
|
* This method will not return <code>null</code>
|
||||||
|
*/
|
||||||
|
public abstract Source load(IndexDocValues values) throws IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Atomically loads a {@link SortedSource} into the cache from the given
|
||||||
|
* {@link IndexDocValues} and returns it iff no other {@link SortedSource} has
|
||||||
|
* already been cached. Otherwise the cached source is returned.
|
||||||
|
* <p>
|
||||||
|
* This method will not return <code>null</code>
|
||||||
|
*/
|
||||||
|
public abstract SortedSource loadSorted(IndexDocValues values,
|
||||||
|
Comparator<BytesRef> comp) throws IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Atomically invalidates the cached {@link Source} and {@link SortedSource}
|
||||||
|
* instances if any and empties the cache.
|
||||||
|
*/
|
||||||
|
public abstract void invalidate(IndexDocValues values);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Atomically closes the cache and frees all resources.
|
||||||
|
*/
|
||||||
|
public synchronized void close(IndexDocValues values) {
|
||||||
|
invalidate(values);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Simple per {@link IndexDocValues} instance cache implementation that holds a
|
||||||
|
* {@link Source} and {@link SortedSource} reference as a member variable.
|
||||||
|
* <p>
|
||||||
|
* If a {@link DirectSourceCache} instance is closed or invalidated the cached
|
||||||
|
* reference are simply set to <code>null</code>
|
||||||
|
*/
|
||||||
|
public static final class DirectSourceCache extends SourceCache {
|
||||||
|
private Source ref;
|
||||||
|
private SortedSource sortedRef;
|
||||||
|
|
||||||
|
public synchronized Source load(IndexDocValues values) throws IOException {
|
||||||
|
if (ref == null) {
|
||||||
|
ref = values.load();
|
||||||
|
}
|
||||||
|
return ref;
|
||||||
|
}
|
||||||
|
|
||||||
|
public synchronized SortedSource loadSorted(IndexDocValues values,
|
||||||
|
Comparator<BytesRef> comp) throws IOException {
|
||||||
|
if (sortedRef == null) {
|
||||||
|
sortedRef = values.loadSorted(comp);
|
||||||
|
}
|
||||||
|
return sortedRef;
|
||||||
|
}
|
||||||
|
|
||||||
|
public synchronized void invalidate(IndexDocValues values) {
|
||||||
|
ref = null;
|
||||||
|
sortedRef = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,184 @@
|
||||||
|
package org.apache.lucene.index.values;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.index.codecs.Codec;
|
||||||
|
import org.apache.lucene.index.codecs.PerDocConsumer;
|
||||||
|
import org.apache.lucene.index.values.IndexDocValues.SortedSource;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.packed.PackedInts;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* <code>ValueType</code> specifies the {@link IndexDocValues} type for a
|
||||||
|
* certain field. A <code>ValueType</code> only defines the data type for a field
|
||||||
|
* while the actual implementation used to encode and decode the values depends
|
||||||
|
* on the the {@link Codec#docsConsumer} and {@link Codec#docsProducer} methods.
|
||||||
|
*
|
||||||
|
* @lucene.experimental
|
||||||
|
*/
|
||||||
|
public enum ValueType {
|
||||||
|
/*
|
||||||
|
* TODO: Add INT_32 INT_64 INT_16 & INT_8?!
|
||||||
|
*/
|
||||||
|
/**
|
||||||
|
* A 64 bit integer value. By default this type uses
|
||||||
|
* {@link PackedInts} to compress the values, as an offset
|
||||||
|
* from the minimum value, as long as the value range
|
||||||
|
* fits into 2<sup>63</sup>-1. Otherwise,
|
||||||
|
* the default implementation falls back to fixed size 64bit
|
||||||
|
* integers.
|
||||||
|
* <p>
|
||||||
|
* NOTE: this type uses <tt>0</tt> as the default value without any
|
||||||
|
* distinction between provided <tt>0</tt> values during indexing. All
|
||||||
|
* documents without an explicit value will use <tt>0</tt> instead. In turn,
|
||||||
|
* {@link ValuesEnum} instances will not skip documents without an explicit
|
||||||
|
* value assigned. Custom default values must be assigned explicitly.
|
||||||
|
* </p>
|
||||||
|
*/
|
||||||
|
INTS,
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A 32 bit floating point value. By default there is no compression
|
||||||
|
* applied. To fit custom float values into less than 32bit either a custom
|
||||||
|
* implementation is needed or values must be encoded into a
|
||||||
|
* {@link #BYTES_FIXED_STRAIGHT} type.
|
||||||
|
* <p>
|
||||||
|
* NOTE: this type uses <tt>0.0f</tt> as the default value without any
|
||||||
|
* distinction between provided <tt>0.0f</tt> values during indexing. All
|
||||||
|
* documents without an explicit value will use <tt>0.0f</tt> instead. In
|
||||||
|
* turn, {@link ValuesEnum} instances will not skip documents without an
|
||||||
|
* explicit value assigned. Custom default values must be assigned explicitly.
|
||||||
|
* </p>
|
||||||
|
*/
|
||||||
|
FLOAT_32,
|
||||||
|
/**
|
||||||
|
* A 64 bit floating point value. By default there is no compression
|
||||||
|
* applied. To fit custom float values into less than 64bit either a custom
|
||||||
|
* implementation is needed or values must be encoded into a
|
||||||
|
* {@link #BYTES_FIXED_STRAIGHT} type.
|
||||||
|
* <p>
|
||||||
|
* NOTE: this type uses <tt>0.0d</tt> as the default value without any
|
||||||
|
* distinction between provided <tt>0.0d</tt> values during indexing. All
|
||||||
|
* documents without an explicit value will use <tt>0.0d</tt> instead. In
|
||||||
|
* turn, {@link ValuesEnum} instances will not skip documents without an
|
||||||
|
* explicit value assigned. Custom default values must be assigned explicitly.
|
||||||
|
* </p>
|
||||||
|
*/
|
||||||
|
FLOAT_64,
|
||||||
|
|
||||||
|
// TODO(simonw): -- shouldn't lucene decide/detect straight vs
|
||||||
|
// deref, as well fixed vs var?
|
||||||
|
/**
|
||||||
|
* A fixed length straight byte[]. All values added to
|
||||||
|
* such a field must be of the same length. All bytes are stored sequentially
|
||||||
|
* for fast offset access.
|
||||||
|
* <p>
|
||||||
|
* NOTE: this type uses <tt>0 byte</tt> filled byte[] based on the length of the first seen
|
||||||
|
* value as the default value without any distinction between explicitly
|
||||||
|
* provided values during indexing. All documents without an explicit value
|
||||||
|
* will use the default instead. In turn, {@link ValuesEnum} instances will
|
||||||
|
* not skip documents without an explicit value assigned. Custom default
|
||||||
|
* values must be assigned explicitly.
|
||||||
|
* </p>
|
||||||
|
*/
|
||||||
|
BYTES_FIXED_STRAIGHT,
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A fixed length dereferenced byte[] variant. Fields with
|
||||||
|
* this type only store distinct byte values and store an additional offset
|
||||||
|
* pointer per document to dereference the shared byte[].
|
||||||
|
* Use this type if your documents may share the same byte[].
|
||||||
|
* <p>
|
||||||
|
* NOTE: Fields of this type will not store values for documents without and
|
||||||
|
* explicitly provided value. If a documents value is accessed while no
|
||||||
|
* explicit value is stored the returned {@link BytesRef} will be a 0-length
|
||||||
|
* reference. In turn, {@link ValuesEnum} instances will skip over documents
|
||||||
|
* without an explicit value assigned. Custom default values must be assigned
|
||||||
|
* explicitly.
|
||||||
|
* </p>
|
||||||
|
*/
|
||||||
|
BYTES_FIXED_DEREF,
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A fixed length pre-sorted byte[] variant. Fields with this type only
|
||||||
|
* store distinct byte values and store an additional offset pointer per
|
||||||
|
* document to dereference the shared byte[]. The stored
|
||||||
|
* byte[] is presorted, by default by unsigned byte order,
|
||||||
|
* and allows access via document id, ordinal and by-value.
|
||||||
|
* Use this type if your documents may share the same byte[].
|
||||||
|
* <p>
|
||||||
|
* NOTE: Fields of this type will not store values for documents without and
|
||||||
|
* explicitly provided value. If a documents value is accessed while no
|
||||||
|
* explicit value is stored the returned {@link BytesRef} will be a 0-length
|
||||||
|
* reference. In turn, {@link ValuesEnum} instances will skip over documents
|
||||||
|
* without an explicit value assigned. Custom default values must be assigned
|
||||||
|
* explicitly.
|
||||||
|
* </p>
|
||||||
|
*
|
||||||
|
* @see SortedSource
|
||||||
|
*/
|
||||||
|
BYTES_FIXED_SORTED,
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Variable length straight stored byte[] variant. All bytes are
|
||||||
|
* stored sequentially for compactness. Usage of this type via the
|
||||||
|
* disk-resident API might yield performance degradation since no additional
|
||||||
|
* index is used to advance by more than one document value at a time.
|
||||||
|
* <p>
|
||||||
|
* NOTE: Fields of this type will not store values for documents without an
|
||||||
|
* explicitly provided value. If a documents value is accessed while no
|
||||||
|
* explicit value is stored the returned {@link BytesRef} will be a 0-length
|
||||||
|
* byte[] reference. In contrast to dereferenced variants, {@link ValuesEnum}
|
||||||
|
* instances will <b>not</b> skip over documents without an explicit value
|
||||||
|
* assigned. Custom default values must be assigned explicitly.
|
||||||
|
* </p>
|
||||||
|
*/
|
||||||
|
BYTES_VAR_STRAIGHT,
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A variable length dereferenced byte[]. Just like
|
||||||
|
* {@link #BYTES_FIXED_DEREF}, but allowing each
|
||||||
|
* document's value to be a different length.
|
||||||
|
* <p>
|
||||||
|
* NOTE: Fields of this type will not store values for documents without and
|
||||||
|
* explicitly provided value. If a documents value is accessed while no
|
||||||
|
* explicit value is stored the returned {@link BytesRef} will be a 0-length
|
||||||
|
* reference. In turn, {@link ValuesEnum} instances will skip over documents
|
||||||
|
* without an explicit value assigned. Custom default values must be assigned
|
||||||
|
* explicitly.
|
||||||
|
* </p>
|
||||||
|
*/
|
||||||
|
BYTES_VAR_DEREF,
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A variable length pre-sorted byte[] variant. Just like
|
||||||
|
* {@link #BYTES_FIXED_SORTED}, but allowing each
|
||||||
|
* document's value to be a different length.
|
||||||
|
* <p>
|
||||||
|
* NOTE: Fields of this type will not store values for documents without and
|
||||||
|
* explicitly provided value. If a documents value is accessed while no
|
||||||
|
* explicit value is stored the returned {@link BytesRef} will be a 0-length
|
||||||
|
* reference. In turn, {@link ValuesEnum} instances will skip over documents
|
||||||
|
* without an explicit value assigned. Custom default values must be assigned
|
||||||
|
* explicitly.
|
||||||
|
* </p>
|
||||||
|
*
|
||||||
|
* @see SortedSource
|
||||||
|
*/
|
||||||
|
BYTES_VAR_SORTED
|
||||||
|
}
|
|
@ -0,0 +1,173 @@
|
||||||
|
package org.apache.lucene.index.values;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
|
import org.apache.lucene.util.AttributeSource;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.FloatsRef;
|
||||||
|
import org.apache.lucene.util.LongsRef;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* {@link ValuesEnum} is a {@link DocIdSetIterator} iterating <tt>byte[]</tt>
|
||||||
|
* , <tt>long</tt> and <tt>double</tt> stored per document. Depending on the
|
||||||
|
* enum's {@link ValueType} ({@link #type()}) the enum might skip over documents that
|
||||||
|
* have no value stored. Types like {@link ValueType#BYTES_VAR_STRAIGHT} might not
|
||||||
|
* skip over documents even if there is no value associated with a document. The
|
||||||
|
* value for document without values again depends on the types implementation
|
||||||
|
* although a reference for a {@link ValueType} returned from a accessor method
|
||||||
|
* {@link #getFloat()}, {@link #getInt()} or {@link #bytes()} will never be
|
||||||
|
* <code>null</code> even if a document has no value.
|
||||||
|
* <p>
|
||||||
|
* Note: Only the reference for the enum's type are initialized to non
|
||||||
|
* <code>null</code> ie. {@link #getInt()} will always return <code>null</code>
|
||||||
|
* if the enum's Type is {@link ValueType#FLOAT_32}.
|
||||||
|
*
|
||||||
|
* @lucene.experimental
|
||||||
|
*/
|
||||||
|
public abstract class ValuesEnum extends DocIdSetIterator {
|
||||||
|
private AttributeSource source;
|
||||||
|
private final ValueType enumType;
|
||||||
|
protected BytesRef bytesRef;
|
||||||
|
protected FloatsRef floatsRef;
|
||||||
|
protected LongsRef intsRef;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a new {@link ValuesEnum} for the given type. The
|
||||||
|
* {@link AttributeSource} for this enum is set to <code>null</code>
|
||||||
|
*/
|
||||||
|
protected ValuesEnum(ValueType enumType) {
|
||||||
|
this(null, enumType);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a new {@link ValuesEnum} for the given type.
|
||||||
|
*/
|
||||||
|
protected ValuesEnum(AttributeSource source, ValueType enumType) {
|
||||||
|
this.source = source;
|
||||||
|
this.enumType = enumType;
|
||||||
|
switch (enumType) {
|
||||||
|
case BYTES_FIXED_DEREF:
|
||||||
|
case BYTES_FIXED_SORTED:
|
||||||
|
case BYTES_FIXED_STRAIGHT:
|
||||||
|
case BYTES_VAR_DEREF:
|
||||||
|
case BYTES_VAR_SORTED:
|
||||||
|
case BYTES_VAR_STRAIGHT:
|
||||||
|
bytesRef = new BytesRef();
|
||||||
|
break;
|
||||||
|
case INTS:
|
||||||
|
intsRef = new LongsRef(1);
|
||||||
|
break;
|
||||||
|
case FLOAT_32:
|
||||||
|
case FLOAT_64:
|
||||||
|
floatsRef = new FloatsRef(1);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the type of this enum
|
||||||
|
*/
|
||||||
|
public ValueType type() {
|
||||||
|
return enumType;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a {@link BytesRef} or <code>null</code> if this enum doesn't
|
||||||
|
* enumerate byte[] values
|
||||||
|
*/
|
||||||
|
public BytesRef bytes() {
|
||||||
|
return bytesRef;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a {@link FloatsRef} or <code>null</code> if this enum doesn't
|
||||||
|
* enumerate floating point values
|
||||||
|
*/
|
||||||
|
public FloatsRef getFloat() {
|
||||||
|
return floatsRef;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a {@link LongsRef} or <code>null</code> if this enum doesn't
|
||||||
|
* enumerate integer values.
|
||||||
|
*/
|
||||||
|
public LongsRef getInt() {
|
||||||
|
return intsRef;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Copies the internal state from the given enum
|
||||||
|
*/
|
||||||
|
protected void copyFrom(ValuesEnum valuesEnum) {
|
||||||
|
intsRef = valuesEnum.intsRef;
|
||||||
|
floatsRef = valuesEnum.floatsRef;
|
||||||
|
bytesRef = valuesEnum.bytesRef;
|
||||||
|
source = valuesEnum.source;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the {@link AttributeSource} associated with this enum.
|
||||||
|
* <p>
|
||||||
|
* Note: this method might create a new AttribueSource if no
|
||||||
|
* {@link AttributeSource} has been provided during enum creation.
|
||||||
|
*/
|
||||||
|
public AttributeSource attributes() {
|
||||||
|
if (source == null) {
|
||||||
|
source = new AttributeSource();
|
||||||
|
}
|
||||||
|
return source;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Closes the enum
|
||||||
|
*
|
||||||
|
* @throws IOException
|
||||||
|
* if an {@link IOException} occurs
|
||||||
|
*/
|
||||||
|
public abstract void close() throws IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns an empty {@link ValuesEnum} for the given {@link ValueType}.
|
||||||
|
*/
|
||||||
|
public static ValuesEnum emptyEnum(ValueType type) {
|
||||||
|
return new ValuesEnum(type) {
|
||||||
|
@Override
|
||||||
|
public int nextDoc() throws IOException {
|
||||||
|
return NO_MORE_DOCS;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int docID() {
|
||||||
|
return NO_MORE_DOCS;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int advance(int target) throws IOException {
|
||||||
|
return NO_MORE_DOCS;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void close() throws IOException {
|
||||||
|
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,287 @@
|
||||||
|
package org.apache.lucene.index.values;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.concurrent.atomic.AtomicLong;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.values.Bytes.BytesBaseSource;
|
||||||
|
import org.apache.lucene.index.values.Bytes.BytesReaderBase;
|
||||||
|
import org.apache.lucene.index.values.Bytes.BytesWriterBase;
|
||||||
|
import org.apache.lucene.index.values.FixedDerefBytesImpl.Reader.DerefBytesEnum;
|
||||||
|
import org.apache.lucene.store.DataOutput;
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.store.IndexInput;
|
||||||
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
|
import org.apache.lucene.util.AttributeSource;
|
||||||
|
import org.apache.lucene.util.ByteBlockPool;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.BytesRefHash;
|
||||||
|
import org.apache.lucene.util.CodecUtil;
|
||||||
|
import org.apache.lucene.util.PagedBytes;
|
||||||
|
import org.apache.lucene.util.RamUsageEstimator;
|
||||||
|
import org.apache.lucene.util.ByteBlockPool.Allocator;
|
||||||
|
import org.apache.lucene.util.ByteBlockPool.DirectTrackingAllocator;
|
||||||
|
import org.apache.lucene.util.BytesRefHash.TrackingDirectBytesStartArray;
|
||||||
|
import org.apache.lucene.util.packed.PackedInts;
|
||||||
|
|
||||||
|
// Stores variable-length byte[] by deref, ie when two docs
|
||||||
|
// have the same value, they store only 1 byte[] and both
|
||||||
|
// docs reference that single source
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @lucene.experimental
|
||||||
|
*/
|
||||||
|
class VarDerefBytesImpl {
|
||||||
|
|
||||||
|
static final String CODEC_NAME = "VarDerefBytes";
|
||||||
|
static final int VERSION_START = 0;
|
||||||
|
static final int VERSION_CURRENT = VERSION_START;
|
||||||
|
|
||||||
|
private static final class AddressByteStartArray extends
|
||||||
|
TrackingDirectBytesStartArray {
|
||||||
|
int[] address;
|
||||||
|
|
||||||
|
AddressByteStartArray(int size, AtomicLong bytesUsed) {
|
||||||
|
super(size, bytesUsed);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public AtomicLong bytesUsed() {
|
||||||
|
return bytesUsed;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int[] clear() {
|
||||||
|
if (address != null) {
|
||||||
|
bytesUsed.addAndGet(-address.length * RamUsageEstimator.NUM_BYTES_INT);
|
||||||
|
address = null;
|
||||||
|
}
|
||||||
|
return super.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int[] grow() {
|
||||||
|
assert address != null;
|
||||||
|
final int oldSize = address.length;
|
||||||
|
final int[] retVal = super.grow();
|
||||||
|
address = ArrayUtil.grow(address, retVal.length);
|
||||||
|
bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT
|
||||||
|
* (address.length - oldSize));
|
||||||
|
return retVal;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int[] init() {
|
||||||
|
if (address == null) {
|
||||||
|
address = new int[ArrayUtil.oversize(initSize,
|
||||||
|
RamUsageEstimator.NUM_BYTES_INT)];
|
||||||
|
bytesUsed.addAndGet((address.length) * RamUsageEstimator.NUM_BYTES_INT);
|
||||||
|
}
|
||||||
|
return super.init();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* TODO: if impls like this are merged we are bound to the amount of memory we
|
||||||
|
* can store into a BytesRefHash and therefore how much memory a ByteBlockPool
|
||||||
|
* can address. This is currently limited to 2GB. While we could extend that
|
||||||
|
* and use 64bit for addressing this still limits us to the existing main
|
||||||
|
* memory as all distinct bytes will be loaded up into main memory. We could
|
||||||
|
* move the byte[] writing to #finish(int) and store the bytes in sorted
|
||||||
|
* order and merge them in a streamed fashion.
|
||||||
|
*/
|
||||||
|
static class Writer extends BytesWriterBase {
|
||||||
|
private int[] docToAddress;
|
||||||
|
private int address = 1;
|
||||||
|
|
||||||
|
private final AddressByteStartArray array = new AddressByteStartArray(1,
|
||||||
|
bytesUsed);
|
||||||
|
private final BytesRefHash hash = new BytesRefHash(pool, 16, array);
|
||||||
|
|
||||||
|
public Writer(Directory dir, String id, AtomicLong bytesUsed)
|
||||||
|
throws IOException {
|
||||||
|
this(dir, id, new DirectTrackingAllocator(ByteBlockPool.BYTE_BLOCK_SIZE, bytesUsed),
|
||||||
|
bytesUsed);
|
||||||
|
}
|
||||||
|
|
||||||
|
public Writer(Directory dir, String id, Allocator allocator,
|
||||||
|
AtomicLong bytesUsed) throws IOException {
|
||||||
|
super(dir, id, CODEC_NAME, VERSION_CURRENT, true,
|
||||||
|
new ByteBlockPool(allocator), bytesUsed);
|
||||||
|
docToAddress = new int[1];
|
||||||
|
bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void add(int docID, BytesRef bytes) throws IOException {
|
||||||
|
if (bytes.length == 0)
|
||||||
|
return; // default
|
||||||
|
final int e = hash.add(bytes);
|
||||||
|
|
||||||
|
if (docID >= docToAddress.length) {
|
||||||
|
final int oldSize = docToAddress.length;
|
||||||
|
docToAddress = ArrayUtil.grow(docToAddress, 1 + docID);
|
||||||
|
bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT
|
||||||
|
* (docToAddress.length - oldSize));
|
||||||
|
}
|
||||||
|
final int docAddress;
|
||||||
|
if (e >= 0) {
|
||||||
|
docAddress = array.address[e] = address;
|
||||||
|
address += writePrefixLength(datOut, bytes);
|
||||||
|
datOut.writeBytes(bytes.bytes, bytes.offset, bytes.length);
|
||||||
|
address += bytes.length;
|
||||||
|
} else {
|
||||||
|
docAddress = array.address[(-e) - 1];
|
||||||
|
}
|
||||||
|
docToAddress[docID] = docAddress;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static int writePrefixLength(DataOutput datOut, BytesRef bytes)
|
||||||
|
throws IOException {
|
||||||
|
if (bytes.length < 128) {
|
||||||
|
datOut.writeByte((byte) bytes.length);
|
||||||
|
return 1;
|
||||||
|
} else {
|
||||||
|
datOut.writeByte((byte) (0x80 | (bytes.length >> 8)));
|
||||||
|
datOut.writeByte((byte) (bytes.length & 0xff));
|
||||||
|
return 2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Important that we get docCount, in case there were
|
||||||
|
// some last docs that we didn't see
|
||||||
|
@Override
|
||||||
|
public void finish(int docCount) throws IOException {
|
||||||
|
try {
|
||||||
|
idxOut.writeInt(address - 1);
|
||||||
|
// write index
|
||||||
|
// TODO(simonw): -- allow forcing fixed array (not -1)
|
||||||
|
// TODO(simonw): check the address calculation / make it more intuitive
|
||||||
|
final PackedInts.Writer w = PackedInts.getWriter(idxOut, docCount,
|
||||||
|
PackedInts.bitsRequired(address - 1));
|
||||||
|
final int limit;
|
||||||
|
if (docCount > docToAddress.length) {
|
||||||
|
limit = docToAddress.length;
|
||||||
|
} else {
|
||||||
|
limit = docCount;
|
||||||
|
}
|
||||||
|
for (int i = 0; i < limit; i++) {
|
||||||
|
w.add(docToAddress[i]);
|
||||||
|
}
|
||||||
|
for (int i = limit; i < docCount; i++) {
|
||||||
|
w.add(0);
|
||||||
|
}
|
||||||
|
w.finish();
|
||||||
|
} finally {
|
||||||
|
hash.close();
|
||||||
|
super.finish(docCount);
|
||||||
|
bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT
|
||||||
|
* (-docToAddress.length));
|
||||||
|
docToAddress = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static class Reader extends BytesReaderBase {
|
||||||
|
|
||||||
|
Reader(Directory dir, String id, int maxDoc) throws IOException {
|
||||||
|
super(dir, id, CODEC_NAME, VERSION_START, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Source load() throws IOException {
|
||||||
|
final IndexInput data = cloneData();
|
||||||
|
final IndexInput index = cloneIndex();
|
||||||
|
data.seek(CodecUtil.headerLength(CODEC_NAME));
|
||||||
|
index.seek(CodecUtil.headerLength(CODEC_NAME));
|
||||||
|
final long totalBytes = index.readInt(); // should be long
|
||||||
|
return new Source(data, index, totalBytes);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static class Source extends BytesBaseSource {
|
||||||
|
private final PackedInts.Reader index;
|
||||||
|
|
||||||
|
public Source(IndexInput datIn, IndexInput idxIn, long totalBytes)
|
||||||
|
throws IOException {
|
||||||
|
super(datIn, idxIn, new PagedBytes(PAGED_BYTES_BITS), totalBytes);
|
||||||
|
index = PackedInts.getReader(idxIn);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public BytesRef getBytes(int docID, BytesRef bytesRef) {
|
||||||
|
long address = index.get(docID);
|
||||||
|
bytesRef.length = 0;
|
||||||
|
return address == 0 ? bytesRef : data.fillSliceWithPrefix(bytesRef,
|
||||||
|
--address);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int getValueCount() {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ValueType type() {
|
||||||
|
return ValueType.BYTES_VAR_DEREF;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected int maxDoc() {
|
||||||
|
return index.size();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ValuesEnum getEnum(AttributeSource source) throws IOException {
|
||||||
|
return new VarDerefBytesEnum(source, cloneData(), cloneIndex());
|
||||||
|
}
|
||||||
|
|
||||||
|
static class VarDerefBytesEnum extends DerefBytesEnum {
|
||||||
|
|
||||||
|
public VarDerefBytesEnum(AttributeSource source, IndexInput datIn,
|
||||||
|
IndexInput idxIn) throws IOException {
|
||||||
|
super(source, datIn, idxIn, -1, ValueType.BYTES_VAR_DEREF);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void fill(long address, BytesRef ref) throws IOException {
|
||||||
|
datIn.seek(fp + --address);
|
||||||
|
final byte sizeByte = datIn.readByte();
|
||||||
|
final int size;
|
||||||
|
if ((sizeByte & 128) == 0) {
|
||||||
|
// length is 1 byte
|
||||||
|
size = sizeByte;
|
||||||
|
} else {
|
||||||
|
size = ((sizeByte & 0x7f) << 8) | ((datIn.readByte() & 0xff));
|
||||||
|
}
|
||||||
|
if (ref.bytes.length < size)
|
||||||
|
ref.grow(size);
|
||||||
|
ref.length = size;
|
||||||
|
ref.offset = 0;
|
||||||
|
datIn.readBytes(ref.bytes, 0, size);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ValueType type() {
|
||||||
|
return ValueType.BYTES_VAR_DEREF;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,315 @@
|
||||||
|
package org.apache.lucene.index.values;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Comparator;
|
||||||
|
import java.util.concurrent.atomic.AtomicLong;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.values.Bytes.BytesBaseSortedSource;
|
||||||
|
import org.apache.lucene.index.values.Bytes.BytesReaderBase;
|
||||||
|
import org.apache.lucene.index.values.Bytes.BytesWriterBase;
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.store.IndexInput;
|
||||||
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
|
import org.apache.lucene.util.AttributeSource;
|
||||||
|
import org.apache.lucene.util.ByteBlockPool;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.BytesRefHash;
|
||||||
|
import org.apache.lucene.util.PagedBytes;
|
||||||
|
import org.apache.lucene.util.RamUsageEstimator;
|
||||||
|
import org.apache.lucene.util.ByteBlockPool.Allocator;
|
||||||
|
import org.apache.lucene.util.ByteBlockPool.DirectTrackingAllocator;
|
||||||
|
import org.apache.lucene.util.BytesRefHash.TrackingDirectBytesStartArray;
|
||||||
|
import org.apache.lucene.util.packed.PackedInts;
|
||||||
|
|
||||||
|
// Stores variable-length byte[] by deref, ie when two docs
|
||||||
|
// have the same value, they store only 1 byte[] and both
|
||||||
|
// docs reference that single source
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @lucene.experimental
|
||||||
|
*/
|
||||||
|
class VarSortedBytesImpl {
|
||||||
|
|
||||||
|
static final String CODEC_NAME = "VarDerefBytes";
|
||||||
|
static final int VERSION_START = 0;
|
||||||
|
static final int VERSION_CURRENT = VERSION_START;
|
||||||
|
|
||||||
|
static class Writer extends BytesWriterBase {
|
||||||
|
private int[] docToEntry;
|
||||||
|
private final Comparator<BytesRef> comp;
|
||||||
|
|
||||||
|
private final BytesRefHash hash = new BytesRefHash(pool,
|
||||||
|
BytesRefHash.DEFAULT_CAPACITY, new TrackingDirectBytesStartArray(
|
||||||
|
BytesRefHash.DEFAULT_CAPACITY, bytesUsed));
|
||||||
|
|
||||||
|
public Writer(Directory dir, String id, Comparator<BytesRef> comp,
|
||||||
|
AtomicLong bytesUsed) throws IOException {
|
||||||
|
this(dir, id, comp, new DirectTrackingAllocator(ByteBlockPool.BYTE_BLOCK_SIZE, bytesUsed),
|
||||||
|
bytesUsed);
|
||||||
|
}
|
||||||
|
|
||||||
|
public Writer(Directory dir, String id, Comparator<BytesRef> comp,
|
||||||
|
Allocator allocator, AtomicLong bytesUsed) throws IOException {
|
||||||
|
super(dir, id, CODEC_NAME, VERSION_CURRENT, true,
|
||||||
|
new ByteBlockPool(allocator), bytesUsed);
|
||||||
|
this.comp = comp;
|
||||||
|
docToEntry = new int[1];
|
||||||
|
docToEntry[0] = -1;
|
||||||
|
bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void add(int docID, BytesRef bytes) throws IOException {
|
||||||
|
if (bytes.length == 0)
|
||||||
|
return;// default
|
||||||
|
if (docID >= docToEntry.length) {
|
||||||
|
int[] newArray = new int[ArrayUtil.oversize(1 + docID,
|
||||||
|
RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
|
||||||
|
System.arraycopy(docToEntry, 0, newArray, 0, docToEntry.length);
|
||||||
|
Arrays.fill(newArray, docToEntry.length, newArray.length, -1);
|
||||||
|
bytesUsed.addAndGet((newArray.length - docToEntry.length)
|
||||||
|
* RamUsageEstimator.NUM_BYTES_INT);
|
||||||
|
docToEntry = newArray;
|
||||||
|
}
|
||||||
|
final int e = hash.add(bytes);
|
||||||
|
docToEntry[docID] = e < 0 ? (-e) - 1 : e;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Important that we get docCount, in case there were
|
||||||
|
// some last docs that we didn't see
|
||||||
|
@Override
|
||||||
|
public void finish(int docCount) throws IOException {
|
||||||
|
final int count = hash.size();
|
||||||
|
try {
|
||||||
|
final int[] sortedEntries = hash.sort(comp);
|
||||||
|
// first dump bytes data, recording index & offset as
|
||||||
|
// we go
|
||||||
|
long offset = 0;
|
||||||
|
long lastOffset = 0;
|
||||||
|
final int[] index = new int[count];
|
||||||
|
final long[] offsets = new long[count];
|
||||||
|
for (int i = 0; i < count; i++) {
|
||||||
|
final int e = sortedEntries[i];
|
||||||
|
offsets[i] = offset;
|
||||||
|
index[e] = 1 + i;
|
||||||
|
|
||||||
|
final BytesRef bytes = hash.get(e, new BytesRef());
|
||||||
|
// TODO: we could prefix code...
|
||||||
|
datOut.writeBytes(bytes.bytes, bytes.offset, bytes.length);
|
||||||
|
lastOffset = offset;
|
||||||
|
offset += bytes.length;
|
||||||
|
}
|
||||||
|
|
||||||
|
// total bytes of data
|
||||||
|
idxOut.writeLong(offset);
|
||||||
|
|
||||||
|
// write index -- first doc -> 1+ord
|
||||||
|
// TODO(simonw): allow not -1:
|
||||||
|
final PackedInts.Writer indexWriter = PackedInts.getWriter(idxOut,
|
||||||
|
docCount, PackedInts.bitsRequired(count));
|
||||||
|
final int limit = docCount > docToEntry.length ? docToEntry.length
|
||||||
|
: docCount;
|
||||||
|
for (int i = 0; i < limit; i++) {
|
||||||
|
final int e = docToEntry[i];
|
||||||
|
indexWriter.add(e == -1 ? 0 : index[e]);
|
||||||
|
}
|
||||||
|
for (int i = limit; i < docCount; i++) {
|
||||||
|
indexWriter.add(0);
|
||||||
|
}
|
||||||
|
indexWriter.finish();
|
||||||
|
|
||||||
|
// next ord (0-based) -> offset
|
||||||
|
// TODO(simonw): -- allow not -1:
|
||||||
|
PackedInts.Writer offsetWriter = PackedInts.getWriter(idxOut, count,
|
||||||
|
PackedInts.bitsRequired(lastOffset));
|
||||||
|
for (int i = 0; i < count; i++) {
|
||||||
|
offsetWriter.add(offsets[i]);
|
||||||
|
}
|
||||||
|
offsetWriter.finish();
|
||||||
|
} finally {
|
||||||
|
super.finish(docCount);
|
||||||
|
bytesUsed.addAndGet((-docToEntry.length)
|
||||||
|
* RamUsageEstimator.NUM_BYTES_INT);
|
||||||
|
hash.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static class Reader extends BytesReaderBase {
|
||||||
|
|
||||||
|
Reader(Directory dir, String id, int maxDoc) throws IOException {
|
||||||
|
super(dir, id, CODEC_NAME, VERSION_START, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public org.apache.lucene.index.values.IndexDocValues.Source load()
|
||||||
|
throws IOException {
|
||||||
|
return loadSorted(null);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public SortedSource loadSorted(Comparator<BytesRef> comp)
|
||||||
|
throws IOException {
|
||||||
|
IndexInput indexIn = cloneIndex();
|
||||||
|
return new Source(cloneData(), indexIn, comp, indexIn.readLong());
|
||||||
|
}
|
||||||
|
|
||||||
|
private static class Source extends BytesBaseSortedSource {
|
||||||
|
private final PackedInts.Reader docToOrdIndex;
|
||||||
|
private final PackedInts.Reader ordToOffsetIndex; // 0-based
|
||||||
|
private final long totBytes;
|
||||||
|
private final int valueCount;
|
||||||
|
|
||||||
|
public Source(IndexInput datIn, IndexInput idxIn,
|
||||||
|
Comparator<BytesRef> comp, long dataLength) throws IOException {
|
||||||
|
super(datIn, idxIn, comp, new PagedBytes(PAGED_BYTES_BITS), dataLength);
|
||||||
|
totBytes = dataLength;
|
||||||
|
docToOrdIndex = PackedInts.getReader(idxIn);
|
||||||
|
ordToOffsetIndex = PackedInts.getReader(idxIn);
|
||||||
|
valueCount = ordToOffsetIndex.size();
|
||||||
|
closeIndexInput();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int ord(int docID) {
|
||||||
|
return (int) docToOrdIndex.get(docID) - 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int getByValue(BytesRef bytes, BytesRef tmpRef) {
|
||||||
|
return binarySearch(bytes, tmpRef, 0, valueCount - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int getValueCount() {
|
||||||
|
return valueCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ord is 0-based
|
||||||
|
@Override
|
||||||
|
protected BytesRef deref(int ord, BytesRef bytesRef) {
|
||||||
|
final long nextOffset;
|
||||||
|
if (ord == valueCount - 1) {
|
||||||
|
nextOffset = totBytes;
|
||||||
|
} else {
|
||||||
|
nextOffset = ordToOffsetIndex.get(1 + ord);
|
||||||
|
}
|
||||||
|
final long offset = ordToOffsetIndex.get(ord);
|
||||||
|
data.fillSlice(bytesRef, offset, (int) (nextOffset - offset));
|
||||||
|
return bytesRef;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ValueType type() {
|
||||||
|
return ValueType.BYTES_VAR_SORTED;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected int maxDoc() {
|
||||||
|
return docToOrdIndex.size();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ValuesEnum getEnum(AttributeSource source) throws IOException {
|
||||||
|
return new VarSortedBytesEnum(source, cloneData(), cloneIndex());
|
||||||
|
}
|
||||||
|
|
||||||
|
private static class VarSortedBytesEnum extends ValuesEnum {
|
||||||
|
private PackedInts.Reader docToOrdIndex;
|
||||||
|
private PackedInts.Reader ordToOffsetIndex;
|
||||||
|
private IndexInput idxIn;
|
||||||
|
private IndexInput datIn;
|
||||||
|
private int valueCount;
|
||||||
|
private long totBytes;
|
||||||
|
private int docCount;
|
||||||
|
private int pos = -1;
|
||||||
|
private final long fp;
|
||||||
|
|
||||||
|
protected VarSortedBytesEnum(AttributeSource source, IndexInput datIn,
|
||||||
|
IndexInput idxIn) throws IOException {
|
||||||
|
super(source, ValueType.BYTES_VAR_SORTED);
|
||||||
|
totBytes = idxIn.readLong();
|
||||||
|
// keep that in memory to prevent lots of disk seeks
|
||||||
|
docToOrdIndex = PackedInts.getReader(idxIn);
|
||||||
|
ordToOffsetIndex = PackedInts.getReader(idxIn);
|
||||||
|
valueCount = ordToOffsetIndex.size();
|
||||||
|
docCount = docToOrdIndex.size();
|
||||||
|
fp = datIn.getFilePointer();
|
||||||
|
this.idxIn = idxIn;
|
||||||
|
this.datIn = datIn;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void close() throws IOException {
|
||||||
|
idxIn.close();
|
||||||
|
datIn.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int advance(int target) throws IOException {
|
||||||
|
if (target >= docCount) {
|
||||||
|
return pos = NO_MORE_DOCS;
|
||||||
|
}
|
||||||
|
int ord;
|
||||||
|
while ((ord = (int) docToOrdIndex.get(target)) == 0) {
|
||||||
|
if (++target >= docCount) {
|
||||||
|
return pos = NO_MORE_DOCS;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
final long offset = ordToOffsetIndex.get(--ord);
|
||||||
|
final long nextOffset;
|
||||||
|
if (ord == valueCount - 1) {
|
||||||
|
nextOffset = totBytes;
|
||||||
|
} else {
|
||||||
|
nextOffset = ordToOffsetIndex.get(1 + ord);
|
||||||
|
}
|
||||||
|
final int length = (int) (nextOffset - offset);
|
||||||
|
datIn.seek(fp + offset);
|
||||||
|
if (bytesRef.bytes.length < length)
|
||||||
|
bytesRef.grow(length);
|
||||||
|
datIn.readBytes(bytesRef.bytes, 0, length);
|
||||||
|
bytesRef.length = length;
|
||||||
|
bytesRef.offset = 0;
|
||||||
|
return pos = target;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int docID() {
|
||||||
|
return pos;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int nextDoc() throws IOException {
|
||||||
|
if (pos >= docCount) {
|
||||||
|
return pos = NO_MORE_DOCS;
|
||||||
|
}
|
||||||
|
return advance(pos + 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ValueType type() {
|
||||||
|
return ValueType.BYTES_VAR_SORTED;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,247 @@
|
||||||
|
package org.apache.lucene.index.values;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.concurrent.atomic.AtomicLong;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.values.Bytes.BytesBaseSource;
|
||||||
|
import org.apache.lucene.index.values.Bytes.BytesReaderBase;
|
||||||
|
import org.apache.lucene.index.values.Bytes.BytesWriterBase;
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.store.IndexInput;
|
||||||
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
|
import org.apache.lucene.util.AttributeSource;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.PagedBytes;
|
||||||
|
import org.apache.lucene.util.RamUsageEstimator;
|
||||||
|
import org.apache.lucene.util.packed.PackedInts;
|
||||||
|
|
||||||
|
// Variable length byte[] per document, no sharing
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @lucene.experimental
|
||||||
|
*/
|
||||||
|
class VarStraightBytesImpl {
|
||||||
|
|
||||||
|
static final String CODEC_NAME = "VarStraightBytes";
|
||||||
|
static final int VERSION_START = 0;
|
||||||
|
static final int VERSION_CURRENT = VERSION_START;
|
||||||
|
|
||||||
|
static class Writer extends BytesWriterBase {
|
||||||
|
private long address;
|
||||||
|
// start at -1 if the first added value is > 0
|
||||||
|
private int lastDocID = -1;
|
||||||
|
private long[] docToAddress;
|
||||||
|
|
||||||
|
public Writer(Directory dir, String id, AtomicLong bytesUsed)
|
||||||
|
throws IOException {
|
||||||
|
super(dir, id, CODEC_NAME, VERSION_CURRENT, true, null, bytesUsed);
|
||||||
|
docToAddress = new long[1];
|
||||||
|
bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT);
|
||||||
|
}
|
||||||
|
|
||||||
|
public Writer(Directory dir, String id) throws IOException {
|
||||||
|
this(dir, id, new AtomicLong());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fills up to but not including this docID
|
||||||
|
private void fill(final int docID) {
|
||||||
|
if (docID >= docToAddress.length) {
|
||||||
|
int oldSize = docToAddress.length;
|
||||||
|
docToAddress = ArrayUtil.grow(docToAddress, 1 + docID);
|
||||||
|
bytesUsed.addAndGet((docToAddress.length - oldSize)
|
||||||
|
* RamUsageEstimator.NUM_BYTES_INT);
|
||||||
|
}
|
||||||
|
for (int i = lastDocID + 1; i < docID; i++) {
|
||||||
|
docToAddress[i] = address;
|
||||||
|
}
|
||||||
|
lastDocID = docID;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void add(int docID, BytesRef bytes) throws IOException {
|
||||||
|
if (bytes.length == 0)
|
||||||
|
return; // default
|
||||||
|
fill(docID);
|
||||||
|
docToAddress[docID] = address;
|
||||||
|
datOut.writeBytes(bytes.bytes, bytes.offset, bytes.length);
|
||||||
|
address += bytes.length;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void finish(int docCount) throws IOException {
|
||||||
|
try {
|
||||||
|
if (lastDocID == -1) {
|
||||||
|
idxOut.writeVLong(0);
|
||||||
|
final PackedInts.Writer w = PackedInts.getWriter(idxOut, docCount,
|
||||||
|
PackedInts.bitsRequired(0));
|
||||||
|
for (int i = 0; i < docCount; i++) {
|
||||||
|
w.add(0);
|
||||||
|
}
|
||||||
|
w.finish();
|
||||||
|
} else {
|
||||||
|
fill(docCount);
|
||||||
|
idxOut.writeVLong(address);
|
||||||
|
final PackedInts.Writer w = PackedInts.getWriter(idxOut, docCount,
|
||||||
|
PackedInts.bitsRequired(address));
|
||||||
|
for (int i = 0; i < docCount; i++) {
|
||||||
|
w.add(docToAddress[i]);
|
||||||
|
}
|
||||||
|
w.finish();
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
bytesUsed.addAndGet(-(docToAddress.length)
|
||||||
|
* RamUsageEstimator.NUM_BYTES_INT);
|
||||||
|
docToAddress = null;
|
||||||
|
super.finish(docCount);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public long ramBytesUsed() {
|
||||||
|
return bytesUsed.get();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static class Reader extends BytesReaderBase {
|
||||||
|
private final int maxDoc;
|
||||||
|
|
||||||
|
Reader(Directory dir, String id, int maxDoc) throws IOException {
|
||||||
|
super(dir, id, CODEC_NAME, VERSION_START, true);
|
||||||
|
this.maxDoc = maxDoc;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Source load() throws IOException {
|
||||||
|
return new Source(cloneData(), cloneIndex());
|
||||||
|
}
|
||||||
|
|
||||||
|
private class Source extends BytesBaseSource {
|
||||||
|
private final PackedInts.Reader addresses;
|
||||||
|
|
||||||
|
public Source(IndexInput datIn, IndexInput idxIn) throws IOException {
|
||||||
|
super(datIn, idxIn, new PagedBytes(PAGED_BYTES_BITS), idxIn.readVLong());
|
||||||
|
addresses = PackedInts.getReader(idxIn);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public BytesRef getBytes(int docID, BytesRef bytesRef) {
|
||||||
|
final long address = addresses.get(docID);
|
||||||
|
final int length = docID == maxDoc - 1 ? (int) (totalLengthInBytes - address)
|
||||||
|
: (int) (addresses.get(1 + docID) - address);
|
||||||
|
return data.fillSlice(bytesRef, address, length);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ValuesEnum getEnum(AttributeSource attrSource) throws IOException {
|
||||||
|
return new SourceEnum(attrSource, type(), this, maxDoc()) {
|
||||||
|
@Override
|
||||||
|
public int advance(int target) throws IOException {
|
||||||
|
if (target >= numDocs) {
|
||||||
|
return pos = NO_MORE_DOCS;
|
||||||
|
}
|
||||||
|
source.getBytes(target, bytesRef);
|
||||||
|
return pos = target;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int getValueCount() {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ValueType type() {
|
||||||
|
return ValueType.BYTES_VAR_STRAIGHT;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected int maxDoc() {
|
||||||
|
return addresses.size();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ValuesEnum getEnum(AttributeSource source) throws IOException {
|
||||||
|
return new VarStraightBytesEnum(source, cloneData(), cloneIndex());
|
||||||
|
}
|
||||||
|
|
||||||
|
private class VarStraightBytesEnum extends ValuesEnum {
|
||||||
|
private final PackedInts.Reader addresses;
|
||||||
|
private final IndexInput datIn;
|
||||||
|
private final IndexInput idxIn;
|
||||||
|
private final long fp;
|
||||||
|
private final long totBytes;
|
||||||
|
private int pos = -1;
|
||||||
|
|
||||||
|
protected VarStraightBytesEnum(AttributeSource source, IndexInput datIn,
|
||||||
|
IndexInput idxIn) throws IOException {
|
||||||
|
super(source, ValueType.BYTES_VAR_STRAIGHT);
|
||||||
|
totBytes = idxIn.readVLong();
|
||||||
|
fp = datIn.getFilePointer();
|
||||||
|
addresses = PackedInts.getReader(idxIn);
|
||||||
|
this.datIn = datIn;
|
||||||
|
this.idxIn = idxIn;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void close() throws IOException {
|
||||||
|
datIn.close();
|
||||||
|
idxIn.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int advance(final int target) throws IOException {
|
||||||
|
if (target >= maxDoc) {
|
||||||
|
return pos = NO_MORE_DOCS;
|
||||||
|
}
|
||||||
|
final long addr = addresses.get(target);
|
||||||
|
if (addr == totBytes) { // empty values at the end
|
||||||
|
bytesRef.length = 0;
|
||||||
|
bytesRef.offset = 0;
|
||||||
|
return pos = target;
|
||||||
|
}
|
||||||
|
datIn.seek(fp + addr);
|
||||||
|
final int size = (int) (target == maxDoc - 1 ? totBytes - addr
|
||||||
|
: addresses.get(target + 1) - addr);
|
||||||
|
if (bytesRef.bytes.length < size) {
|
||||||
|
bytesRef.grow(size);
|
||||||
|
}
|
||||||
|
bytesRef.length = size;
|
||||||
|
datIn.readBytes(bytesRef.bytes, 0, size);
|
||||||
|
return pos = target;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int docID() {
|
||||||
|
return pos;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int nextDoc() throws IOException {
|
||||||
|
return advance(pos + 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ValueType type() {
|
||||||
|
return ValueType.BYTES_VAR_STRAIGHT;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,228 @@
|
||||||
|
package org.apache.lucene.index.values;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Comparator;
|
||||||
|
import java.util.concurrent.atomic.AtomicLong;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.codecs.DocValuesConsumer;
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.util.Bits;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Abstract API for per-document stored primitive values of type <tt>byte[]</tt>
|
||||||
|
* , <tt>long</tt> or <tt>double</tt>. The API accepts a single value for each
|
||||||
|
* document. The underlying storage mechanism, file formats, data-structures and
|
||||||
|
* representations depend on the actual implementation.
|
||||||
|
* <p>
|
||||||
|
* Document IDs passed to this API must always be increasing unless stated
|
||||||
|
* otherwise.
|
||||||
|
* </p>
|
||||||
|
*
|
||||||
|
* @lucene.experimental
|
||||||
|
*/
|
||||||
|
public abstract class Writer extends DocValuesConsumer {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a new {@link Writer}.
|
||||||
|
*
|
||||||
|
* @param bytesUsed
|
||||||
|
* bytes-usage tracking reference used by implementation to track
|
||||||
|
* internally allocated memory. All tracked bytes must be released
|
||||||
|
* once {@link #finish(int)} has been called.
|
||||||
|
*/
|
||||||
|
protected Writer(AtomicLong bytesUsed) {
|
||||||
|
super(bytesUsed);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Filename extension for index files
|
||||||
|
*/
|
||||||
|
public static final String INDEX_EXTENSION = "idx";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Filename extension for data files.
|
||||||
|
*/
|
||||||
|
public static final String DATA_EXTENSION = "dat";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Records the specified <tt>long</tt> value for the docID or throws an
|
||||||
|
* {@link UnsupportedOperationException} if this {@link Writer} doesn't record
|
||||||
|
* <tt>long</tt> values.
|
||||||
|
*
|
||||||
|
* @throws UnsupportedOperationException
|
||||||
|
* if this writer doesn't record <tt>long</tt> values
|
||||||
|
*/
|
||||||
|
public void add(int docID, long value) throws IOException {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Records the specified <tt>double</tt> value for the docID or throws an
|
||||||
|
* {@link UnsupportedOperationException} if this {@link Writer} doesn't record
|
||||||
|
* <tt>double</tt> values.
|
||||||
|
*
|
||||||
|
* @throws UnsupportedOperationException
|
||||||
|
* if this writer doesn't record <tt>double</tt> values
|
||||||
|
*/
|
||||||
|
public void add(int docID, double value) throws IOException {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Records the specified {@link BytesRef} value for the docID or throws an
|
||||||
|
* {@link UnsupportedOperationException} if this {@link Writer} doesn't record
|
||||||
|
* {@link BytesRef} values.
|
||||||
|
*
|
||||||
|
* @throws UnsupportedOperationException
|
||||||
|
* if this writer doesn't record {@link BytesRef} values
|
||||||
|
*/
|
||||||
|
public void add(int docID, BytesRef value) throws IOException {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Records a value from the given document id. The methods implementation
|
||||||
|
* obtains the value for the document id from the last {@link ValuesEnum}
|
||||||
|
* set to {@link #setNextEnum(ValuesEnum)}.
|
||||||
|
* <p>
|
||||||
|
* This method is used during merging to provide implementation agnostic
|
||||||
|
* default merge implementation.
|
||||||
|
* </p>
|
||||||
|
* <p>
|
||||||
|
* The given document id must be the same document id returned from
|
||||||
|
* {@link ValuesEnum#docID()} when this method is called. All documents IDs
|
||||||
|
* between the given ID and the previously given ID or <tt>0</tt> if the
|
||||||
|
* method is call the first time are filled with default values depending on
|
||||||
|
* the {@link Writer} implementation. The given document ID must always be
|
||||||
|
* greater than the previous ID or <tt>0</tt> if called the first time.
|
||||||
|
*/
|
||||||
|
protected abstract void add(int docID) throws IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets the next {@link ValuesEnum} to consume values from on calls to
|
||||||
|
* {@link #add(int)}
|
||||||
|
*
|
||||||
|
* @param valuesEnum
|
||||||
|
* the next {@link ValuesEnum}, this must not be null
|
||||||
|
*/
|
||||||
|
protected abstract void setNextEnum(ValuesEnum valuesEnum);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Finish writing and close any files and resources used by this Writer.
|
||||||
|
*
|
||||||
|
* @param docCount
|
||||||
|
* the total number of documents for this writer. This must be
|
||||||
|
* greater that or equal to the largest document id passed to one of
|
||||||
|
* the add methods after the {@link Writer} was created.
|
||||||
|
*/
|
||||||
|
public abstract void finish(int docCount) throws IOException;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void merge(MergeState state) throws IOException {
|
||||||
|
// This enables bulk copies in subclasses per MergeState, subclasses can
|
||||||
|
// simply override this and decide if they want to merge
|
||||||
|
// segments using this generic implementation or if a bulk merge is possible
|
||||||
|
// / feasible.
|
||||||
|
final ValuesEnum valEnum = state.reader.getEnum();
|
||||||
|
assert valEnum != null;
|
||||||
|
try {
|
||||||
|
setNextEnum(valEnum); // set the current enum we are working on - the
|
||||||
|
// impl. will get the correct reference for the type
|
||||||
|
// it supports
|
||||||
|
int docID = state.docBase;
|
||||||
|
final Bits bits = state.bits;
|
||||||
|
final int docCount = state.docCount;
|
||||||
|
int currentDocId;
|
||||||
|
if ((currentDocId = valEnum.advance(0)) != ValuesEnum.NO_MORE_DOCS) {
|
||||||
|
for (int i = 0; i < docCount; i++) {
|
||||||
|
if (bits == null || !bits.get(i)) {
|
||||||
|
if (currentDocId < i) {
|
||||||
|
if ((currentDocId = valEnum.advance(i)) == ValuesEnum.NO_MORE_DOCS) {
|
||||||
|
break; // advance can jump over default values
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (currentDocId == i) { // we are on the doc to merge
|
||||||
|
add(docID);
|
||||||
|
}
|
||||||
|
++docID;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
valEnum.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Factory method to create a {@link Writer} instance for a given type. This
|
||||||
|
* method returns default implementations for each of the different types
|
||||||
|
* defined in the {@link ValueType} enumeration.
|
||||||
|
*
|
||||||
|
* @param type
|
||||||
|
* the {@link ValueType} to create the {@link Writer} for
|
||||||
|
* @param id
|
||||||
|
* the file name id used to create files within the writer.
|
||||||
|
* @param directory
|
||||||
|
* the {@link Directory} to create the files from.
|
||||||
|
* @param comp
|
||||||
|
* a {@link BytesRef} comparator used for {@link Bytes} variants. If
|
||||||
|
* <code>null</code>
|
||||||
|
* {@link BytesRef#getUTF8SortedAsUnicodeComparator()} is used as the
|
||||||
|
* default.
|
||||||
|
* @param bytesUsed
|
||||||
|
* a byte-usage tracking reference
|
||||||
|
* @return a new {@link Writer} instance for the given {@link ValueType}
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public static Writer create(ValueType type, String id, Directory directory,
|
||||||
|
Comparator<BytesRef> comp, AtomicLong bytesUsed) throws IOException {
|
||||||
|
if (comp == null) {
|
||||||
|
comp = BytesRef.getUTF8SortedAsUnicodeComparator();
|
||||||
|
}
|
||||||
|
switch (type) {
|
||||||
|
case INTS:
|
||||||
|
return Ints.getWriter(directory, id, true, bytesUsed);
|
||||||
|
case FLOAT_32:
|
||||||
|
return Floats.getWriter(directory, id, 4, bytesUsed);
|
||||||
|
case FLOAT_64:
|
||||||
|
return Floats.getWriter(directory, id, 8, bytesUsed);
|
||||||
|
case BYTES_FIXED_STRAIGHT:
|
||||||
|
return Bytes.getWriter(directory, id, Bytes.Mode.STRAIGHT, comp, true,
|
||||||
|
bytesUsed);
|
||||||
|
case BYTES_FIXED_DEREF:
|
||||||
|
return Bytes.getWriter(directory, id, Bytes.Mode.DEREF, comp, true,
|
||||||
|
bytesUsed);
|
||||||
|
case BYTES_FIXED_SORTED:
|
||||||
|
return Bytes.getWriter(directory, id, Bytes.Mode.SORTED, comp, true,
|
||||||
|
bytesUsed);
|
||||||
|
case BYTES_VAR_STRAIGHT:
|
||||||
|
return Bytes.getWriter(directory, id, Bytes.Mode.STRAIGHT, comp, false,
|
||||||
|
bytesUsed);
|
||||||
|
case BYTES_VAR_DEREF:
|
||||||
|
return Bytes.getWriter(directory, id, Bytes.Mode.DEREF, comp, false,
|
||||||
|
bytesUsed);
|
||||||
|
case BYTES_VAR_SORTED:
|
||||||
|
return Bytes.getWriter(directory, id, Bytes.Mode.SORTED, comp, false,
|
||||||
|
bytesUsed);
|
||||||
|
default:
|
||||||
|
throw new IllegalArgumentException("Unknown Values: " + type);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -32,7 +32,6 @@ import org.apache.lucene.util.Version;
|
||||||
/**
|
/**
|
||||||
* A QueryParser which constructs queries to search multiple fields.
|
* A QueryParser which constructs queries to search multiple fields.
|
||||||
*
|
*
|
||||||
* @version $Revision$
|
|
||||||
*/
|
*/
|
||||||
public class MultiFieldQueryParser extends QueryParser
|
public class MultiFieldQueryParser extends QueryParser
|
||||||
{
|
{
|
||||||
|
|
|
@ -20,8 +20,10 @@ package org.apache.lucene.search;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||||
import org.apache.lucene.search.FieldCache.DocTermsIndex;
|
import org.apache.lucene.index.values.IndexDocValues;
|
||||||
|
import org.apache.lucene.index.values.IndexDocValues.Source;
|
||||||
import org.apache.lucene.search.FieldCache.DocTerms;
|
import org.apache.lucene.search.FieldCache.DocTerms;
|
||||||
|
import org.apache.lucene.search.FieldCache.DocTermsIndex;
|
||||||
import org.apache.lucene.search.cache.ByteValuesCreator;
|
import org.apache.lucene.search.cache.ByteValuesCreator;
|
||||||
import org.apache.lucene.search.cache.CachedArray;
|
import org.apache.lucene.search.cache.CachedArray;
|
||||||
import org.apache.lucene.search.cache.CachedArrayCreator;
|
import org.apache.lucene.search.cache.CachedArrayCreator;
|
||||||
|
@ -38,9 +40,9 @@ import org.apache.lucene.search.cache.CachedArray.LongValues;
|
||||||
import org.apache.lucene.search.cache.CachedArray.ShortValues;
|
import org.apache.lucene.search.cache.CachedArray.ShortValues;
|
||||||
import org.apache.lucene.util.Bits;
|
import org.apache.lucene.util.Bits;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.packed.Direct8;
|
|
||||||
import org.apache.lucene.util.packed.Direct16;
|
import org.apache.lucene.util.packed.Direct16;
|
||||||
import org.apache.lucene.util.packed.Direct32;
|
import org.apache.lucene.util.packed.Direct32;
|
||||||
|
import org.apache.lucene.util.packed.Direct8;
|
||||||
import org.apache.lucene.util.packed.PackedInts;
|
import org.apache.lucene.util.packed.PackedInts;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -157,7 +159,6 @@ public abstract class FieldComparator {
|
||||||
* comparators can just return "this" to reuse the same
|
* comparators can just return "this" to reuse the same
|
||||||
* comparator across segments
|
* comparator across segments
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
* @throws IOException
|
|
||||||
*/
|
*/
|
||||||
public abstract FieldComparator setNextReader(AtomicReaderContext context) throws IOException;
|
public abstract FieldComparator setNextReader(AtomicReaderContext context) throws IOException;
|
||||||
|
|
||||||
|
@ -328,6 +329,68 @@ public abstract class FieldComparator {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Uses float index values to sort by ascending value */
|
||||||
|
public static final class FloatDocValuesComparator extends FieldComparator {
|
||||||
|
private final double[] values;
|
||||||
|
private Source currentReaderValues;
|
||||||
|
private final String field;
|
||||||
|
private double bottom;
|
||||||
|
|
||||||
|
FloatDocValuesComparator(int numHits, String field) {
|
||||||
|
values = new double[numHits];
|
||||||
|
this.field = field;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int compare(int slot1, int slot2) {
|
||||||
|
final double v1 = values[slot1];
|
||||||
|
final double v2 = values[slot2];
|
||||||
|
if (v1 > v2) {
|
||||||
|
return 1;
|
||||||
|
} else if (v1 < v2) {
|
||||||
|
return -1;
|
||||||
|
} else {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int compareBottom(int doc) {
|
||||||
|
final double v2 = currentReaderValues.getFloat(doc);
|
||||||
|
if (bottom > v2) {
|
||||||
|
return 1;
|
||||||
|
} else if (bottom < v2) {
|
||||||
|
return -1;
|
||||||
|
} else {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void copy(int slot, int doc) {
|
||||||
|
values[slot] = currentReaderValues.getFloat(doc);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public FieldComparator setNextReader(AtomicReaderContext context) throws IOException {
|
||||||
|
final IndexDocValues docValues = context.reader.docValues(field);
|
||||||
|
if (docValues != null) {
|
||||||
|
currentReaderValues = docValues.getSource();
|
||||||
|
}
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setBottom(final int bottom) {
|
||||||
|
this.bottom = values[bottom];
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Comparable<Double> value(int slot) {
|
||||||
|
return Double.valueOf(values[slot]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/** Parses field's values as float (using {@link
|
/** Parses field's values as float (using {@link
|
||||||
* FieldCache#getFloats} and sorts by ascending value */
|
* FieldCache#getFloats} and sorts by ascending value */
|
||||||
public static final class FloatComparator extends NumericComparator<FloatValues> {
|
public static final class FloatComparator extends NumericComparator<FloatValues> {
|
||||||
|
@ -536,6 +599,72 @@ public abstract class FieldComparator {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Loads int index values and sorts by ascending value. */
|
||||||
|
public static final class IntDocValuesComparator extends FieldComparator {
|
||||||
|
private final long[] values;
|
||||||
|
private Source currentReaderValues;
|
||||||
|
private final String field;
|
||||||
|
private long bottom;
|
||||||
|
|
||||||
|
IntDocValuesComparator(int numHits, String field) {
|
||||||
|
values = new long[numHits];
|
||||||
|
this.field = field;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int compare(int slot1, int slot2) {
|
||||||
|
// TODO: there are sneaky non-branch ways to compute
|
||||||
|
// -1/+1/0 sign
|
||||||
|
final long v1 = values[slot1];
|
||||||
|
final long v2 = values[slot2];
|
||||||
|
if (v1 > v2) {
|
||||||
|
return 1;
|
||||||
|
} else if (v1 < v2) {
|
||||||
|
return -1;
|
||||||
|
} else {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int compareBottom(int doc) {
|
||||||
|
// TODO: there are sneaky non-branch ways to compute
|
||||||
|
// -1/+1/0 sign
|
||||||
|
final long v2 = currentReaderValues.getInt(doc);
|
||||||
|
if (bottom > v2) {
|
||||||
|
return 1;
|
||||||
|
} else if (bottom < v2) {
|
||||||
|
return -1;
|
||||||
|
} else {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void copy(int slot, int doc) {
|
||||||
|
values[slot] = currentReaderValues.getInt(doc);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public FieldComparator setNextReader(AtomicReaderContext context) throws IOException {
|
||||||
|
IndexDocValues docValues = context.reader.docValues(field);
|
||||||
|
if (docValues != null) {
|
||||||
|
currentReaderValues = docValues.getSource();
|
||||||
|
}
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setBottom(final int bottom) {
|
||||||
|
this.bottom = values[bottom];
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Comparable<Long> value(int slot) {
|
||||||
|
return Long.valueOf(values[slot]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/** Parses field's values as long (using {@link
|
/** Parses field's values as long (using {@link
|
||||||
* FieldCache#getLongs} and sorts by ascending value */
|
* FieldCache#getLongs} and sorts by ascending value */
|
||||||
public static final class LongComparator extends NumericComparator<LongValues> {
|
public static final class LongComparator extends NumericComparator<LongValues> {
|
||||||
|
|
|
@ -40,7 +40,6 @@ import org.apache.lucene.util.Bits;
|
||||||
* prefix using IndexReader.terms(Term), and use MultiPhraseQuery.add(Term[]
|
* prefix using IndexReader.terms(Term), and use MultiPhraseQuery.add(Term[]
|
||||||
* terms) to add them to the query.
|
* terms) to add them to the query.
|
||||||
*
|
*
|
||||||
* @version 1.0
|
|
||||||
*/
|
*/
|
||||||
public class MultiPhraseQuery extends Query {
|
public class MultiPhraseQuery extends Query {
|
||||||
private String field;
|
private String field;
|
||||||
|
|
|
@ -23,7 +23,7 @@ import java.io.IOException;
|
||||||
/** A Scorer for queries with a required subscorer
|
/** A Scorer for queries with a required subscorer
|
||||||
* and an excluding (prohibited) sub DocIdSetIterator.
|
* and an excluding (prohibited) sub DocIdSetIterator.
|
||||||
* <br>
|
* <br>
|
||||||
* This <code>Scorer</code> implements {@link Scorer#skipTo(int)},
|
* This <code>Scorer</code> implements {@link Scorer#advance(int)},
|
||||||
* and it uses the skipTo() on the given scorers.
|
* and it uses the skipTo() on the given scorers.
|
||||||
*/
|
*/
|
||||||
class ReqExclScorer extends Scorer {
|
class ReqExclScorer extends Scorer {
|
||||||
|
|
|
@ -21,7 +21,7 @@ import java.io.IOException;
|
||||||
/** A Scorer for queries with a required part and an optional part.
|
/** A Scorer for queries with a required part and an optional part.
|
||||||
* Delays skipTo() on the optional part until a score() is needed.
|
* Delays skipTo() on the optional part until a score() is needed.
|
||||||
* <br>
|
* <br>
|
||||||
* This <code>Scorer</code> implements {@link Scorer#skipTo(int)}.
|
* This <code>Scorer</code> implements {@link Scorer#advance(int)}.
|
||||||
*/
|
*/
|
||||||
class ReqOptSumScorer extends Scorer {
|
class ReqOptSumScorer extends Scorer {
|
||||||
/** The scorers passed from the constructor.
|
/** The scorers passed from the constructor.
|
||||||
|
|
|
@ -18,9 +18,15 @@ package org.apache.lucene.search;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.Comparator;
|
||||||
|
|
||||||
import org.apache.lucene.search.cache.*;
|
import org.apache.lucene.search.cache.*;
|
||||||
import org.apache.lucene.util.StringHelper;
|
import org.apache.lucene.util.StringHelper;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
|
||||||
|
// TODO(simonw) -- for cleaner transition, maybe we should make
|
||||||
|
// a new SortField that subclasses this one and always uses
|
||||||
|
// index values?
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Stores information about how to sort documents by terms in an individual
|
* Stores information about how to sort documents by terms in an individual
|
||||||
|
@ -81,6 +87,9 @@ public class SortField {
|
||||||
* uses ordinals to do the sorting. */
|
* uses ordinals to do the sorting. */
|
||||||
public static final int STRING_VAL = 11;
|
public static final int STRING_VAL = 11;
|
||||||
|
|
||||||
|
/** Sort use byte[] index values. */
|
||||||
|
public static final int BYTES = 12;
|
||||||
|
|
||||||
/** Represents sorting by document score (relevance). */
|
/** Represents sorting by document score (relevance). */
|
||||||
public static final SortField FIELD_SCORE = new SortField (null, SCORE);
|
public static final SortField FIELD_SCORE = new SortField (null, SCORE);
|
||||||
|
|
||||||
|
@ -390,6 +399,26 @@ public class SortField {
|
||||||
return hash;
|
return hash;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private boolean useIndexValues;
|
||||||
|
|
||||||
|
public void setUseIndexValues(boolean b) {
|
||||||
|
useIndexValues = b;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean getUseIndexValues() {
|
||||||
|
return useIndexValues;
|
||||||
|
}
|
||||||
|
|
||||||
|
private Comparator<BytesRef> bytesComparator = BytesRef.getUTF8SortedAsUnicodeComparator();
|
||||||
|
|
||||||
|
public void setBytesComparator(Comparator<BytesRef> b) {
|
||||||
|
bytesComparator = b;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Comparator<BytesRef> getBytesComparator() {
|
||||||
|
return bytesComparator;
|
||||||
|
}
|
||||||
|
|
||||||
/** Returns the {@link FieldComparator} to use for
|
/** Returns the {@link FieldComparator} to use for
|
||||||
* sorting.
|
* sorting.
|
||||||
*
|
*
|
||||||
|
@ -412,10 +441,18 @@ public class SortField {
|
||||||
return new FieldComparator.DocComparator(numHits);
|
return new FieldComparator.DocComparator(numHits);
|
||||||
|
|
||||||
case SortField.INT:
|
case SortField.INT:
|
||||||
return new FieldComparator.IntComparator(numHits, (IntValuesCreator)creator, (Integer)missingValue );
|
if (useIndexValues) {
|
||||||
|
return new FieldComparator.IntDocValuesComparator(numHits, field);
|
||||||
|
} else {
|
||||||
|
return new FieldComparator.IntComparator(numHits, (IntValuesCreator)creator, (Integer) missingValue);
|
||||||
|
}
|
||||||
|
|
||||||
case SortField.FLOAT:
|
case SortField.FLOAT:
|
||||||
return new FieldComparator.FloatComparator(numHits, (FloatValuesCreator)creator, (Float)missingValue );
|
if (useIndexValues) {
|
||||||
|
return new FieldComparator.FloatDocValuesComparator(numHits, field);
|
||||||
|
} else {
|
||||||
|
return new FieldComparator.FloatComparator(numHits, (FloatValuesCreator) creator, (Float) missingValue);
|
||||||
|
}
|
||||||
|
|
||||||
case SortField.LONG:
|
case SortField.LONG:
|
||||||
return new FieldComparator.LongComparator(numHits, (LongValuesCreator)creator, (Long)missingValue );
|
return new FieldComparator.LongComparator(numHits, (LongValuesCreator)creator, (Long)missingValue );
|
||||||
|
|
|
@ -0,0 +1,114 @@
|
||||||
|
package org.apache.lucene.search.function;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||||
|
import org.apache.lucene.index.values.IndexDocValues;
|
||||||
|
import org.apache.lucene.index.values.ValueType;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Expert: obtains numeric field values from a {@link IndexDocValues} field.
|
||||||
|
* This {@link ValueSource} is compatible with all numerical
|
||||||
|
* {@link IndexDocValues}
|
||||||
|
*
|
||||||
|
* @lucene.experimental
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public class NumericIndexDocValueSource extends ValueSource {
|
||||||
|
|
||||||
|
private final String field;
|
||||||
|
|
||||||
|
public NumericIndexDocValueSource(String field) {
|
||||||
|
this.field = field;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DocValues getValues(AtomicReaderContext context) throws IOException {
|
||||||
|
final IndexDocValues.Source source = context.reader.docValues(field)
|
||||||
|
.getSource();
|
||||||
|
ValueType type = source.type();
|
||||||
|
switch (type) {
|
||||||
|
case FLOAT_32:
|
||||||
|
case FLOAT_64:
|
||||||
|
return new DocValues() {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString(int doc) {
|
||||||
|
return "float: [" + floatVal(doc) + "]";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public float floatVal(int doc) {
|
||||||
|
return (float) source.getFloat(doc);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
case INTS:
|
||||||
|
return new DocValues() {
|
||||||
|
@Override
|
||||||
|
public String toString(int doc) {
|
||||||
|
return "float: [" + floatVal(doc) + "]";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public float floatVal(int doc) {
|
||||||
|
return (float) source.getInt(doc);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
default:
|
||||||
|
throw new IOException("Type: " + type + "is not numeric");
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String description() {
|
||||||
|
return toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
final int prime = 31;
|
||||||
|
int result = 1;
|
||||||
|
result = prime * result + ((field == null) ? 0 : field.hashCode());
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean equals(Object obj) {
|
||||||
|
if (this == obj)
|
||||||
|
return true;
|
||||||
|
if (obj == null)
|
||||||
|
return false;
|
||||||
|
if (getClass() != obj.getClass())
|
||||||
|
return false;
|
||||||
|
NumericIndexDocValueSource other = (NumericIndexDocValueSource) obj;
|
||||||
|
if (field == null) {
|
||||||
|
if (other.field != null)
|
||||||
|
return false;
|
||||||
|
} else if (!field.equals(other.field))
|
||||||
|
return false;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return "DocValues float(" + field + ')';
|
||||||
|
}
|
||||||
|
}
|
|
@ -255,6 +255,19 @@ public final class ArrayUtil {
|
||||||
return grow(array, 1 + array.length);
|
return grow(array, 1 + array.length);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static double[] grow(double[] array, int minSize) {
|
||||||
|
if (array.length < minSize) {
|
||||||
|
double[] newArray = new double[oversize(minSize, RamUsageEstimator.NUM_BYTES_DOUBLE)];
|
||||||
|
System.arraycopy(array, 0, newArray, 0, array.length);
|
||||||
|
return newArray;
|
||||||
|
} else
|
||||||
|
return array;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static double[] grow(double[] array) {
|
||||||
|
return grow(array, 1 + array.length);
|
||||||
|
}
|
||||||
|
|
||||||
public static short[] shrink(short[] array, int targetSize) {
|
public static short[] shrink(short[] array, int targetSize) {
|
||||||
final int newSize = getShrinkSize(array.length, targetSize, RamUsageEstimator.NUM_BYTES_SHORT);
|
final int newSize = getShrinkSize(array.length, targetSize, RamUsageEstimator.NUM_BYTES_SHORT);
|
||||||
if (newSize != array.length) {
|
if (newSize != array.length) {
|
||||||
|
|
|
@ -18,6 +18,8 @@ package org.apache.lucene.util;
|
||||||
*/
|
*/
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.concurrent.atomic.AtomicLong;
|
||||||
|
|
||||||
import static org.apache.lucene.util.RamUsageEstimator.NUM_BYTES_OBJECT_REF;
|
import static org.apache.lucene.util.RamUsageEstimator.NUM_BYTES_OBJECT_REF;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -78,6 +80,33 @@ public final class ByteBlockPool {
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static class DirectTrackingAllocator extends Allocator {
|
||||||
|
private final AtomicLong bytesUsed;
|
||||||
|
|
||||||
|
public DirectTrackingAllocator(AtomicLong bytesUsed) {
|
||||||
|
this(BYTE_BLOCK_SIZE, bytesUsed);
|
||||||
|
}
|
||||||
|
|
||||||
|
public DirectTrackingAllocator(int blockSize, AtomicLong bytesUsed) {
|
||||||
|
super(blockSize);
|
||||||
|
this.bytesUsed = bytesUsed;
|
||||||
|
}
|
||||||
|
|
||||||
|
public byte[] getByteBlock() {
|
||||||
|
bytesUsed.addAndGet(blockSize);
|
||||||
|
return new byte[blockSize];
|
||||||
|
}
|
||||||
|
@Override
|
||||||
|
public void recycleByteBlocks(byte[][] blocks, int start, int end) {
|
||||||
|
bytesUsed.addAndGet(-((end-start)* blockSize));
|
||||||
|
for (int i = start; i < end; i++) {
|
||||||
|
blocks[i] = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
public byte[][] buffers = new byte[10][];
|
public byte[][] buffers = new byte[10][];
|
||||||
|
|
||||||
|
@ -92,6 +121,20 @@ public final class ByteBlockPool {
|
||||||
public ByteBlockPool(Allocator allocator) {
|
public ByteBlockPool(Allocator allocator) {
|
||||||
this.allocator = allocator;
|
this.allocator = allocator;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void dropBuffersAndReset() {
|
||||||
|
if (bufferUpto != -1) {
|
||||||
|
// Recycle all but the first buffer
|
||||||
|
allocator.recycleByteBlocks(buffers, 0, 1+bufferUpto);
|
||||||
|
|
||||||
|
// Re-use the first buffer
|
||||||
|
bufferUpto = -1;
|
||||||
|
byteUpto = BYTE_BLOCK_SIZE;
|
||||||
|
byteOffset = -BYTE_BLOCK_SIZE;
|
||||||
|
buffers = new byte[10][];
|
||||||
|
buffer = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public void reset() {
|
public void reset() {
|
||||||
if (bufferUpto != -1) {
|
if (bufferUpto != -1) {
|
||||||
|
@ -115,7 +158,7 @@ public final class ByteBlockPool {
|
||||||
buffer = buffers[0];
|
buffer = buffers[0];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void nextBuffer() {
|
public void nextBuffer() {
|
||||||
if (1+bufferUpto == buffers.length) {
|
if (1+bufferUpto == buffers.length) {
|
||||||
byte[][] newBuffers = new byte[ArrayUtil.oversize(buffers.length+1,
|
byte[][] newBuffers = new byte[ArrayUtil.oversize(buffers.length+1,
|
||||||
|
|
|
@ -227,8 +227,9 @@ public final class BytesRefHash {
|
||||||
public void clear(boolean resetPool) {
|
public void clear(boolean resetPool) {
|
||||||
lastCount = count;
|
lastCount = count;
|
||||||
count = 0;
|
count = 0;
|
||||||
if (resetPool)
|
if (resetPool) {
|
||||||
pool.reset();
|
pool.dropBuffersAndReset();
|
||||||
|
}
|
||||||
bytesStart = bytesStartArray.clear();
|
bytesStart = bytesStartArray.clear();
|
||||||
if (lastCount != -1 && shrink(lastCount)) {
|
if (lastCount != -1 && shrink(lastCount)) {
|
||||||
// shrink clears the hash entries
|
// shrink clears the hash entries
|
||||||
|
@ -240,6 +241,16 @@ public final class BytesRefHash {
|
||||||
public void clear() {
|
public void clear() {
|
||||||
clear(true);
|
clear(true);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Closes the BytesRefHash and releases all internally used memory
|
||||||
|
*/
|
||||||
|
public void close() {
|
||||||
|
clear(true);
|
||||||
|
ords = null;
|
||||||
|
bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT
|
||||||
|
* -hashSize);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Adds a new {@link BytesRef}
|
* Adds a new {@link BytesRef}
|
||||||
|
@ -332,6 +343,7 @@ public final class BytesRefHash {
|
||||||
// 1 byte to store length
|
// 1 byte to store length
|
||||||
buffer[bufferUpto] = (byte) length;
|
buffer[bufferUpto] = (byte) length;
|
||||||
pool.byteUpto += length + 1;
|
pool.byteUpto += length + 1;
|
||||||
|
assert length >= 0: "Length must be positive: " + length;
|
||||||
System.arraycopy(bytes.bytes, bytes.offset, buffer, bufferUpto + 1,
|
System.arraycopy(bytes.bytes, bytes.offset, buffer, bufferUpto + 1,
|
||||||
length);
|
length);
|
||||||
} else {
|
} else {
|
||||||
|
@ -452,8 +464,14 @@ public final class BytesRefHash {
|
||||||
* effect.
|
* effect.
|
||||||
*/
|
*/
|
||||||
public void reinit() {
|
public void reinit() {
|
||||||
if (bytesStart == null)
|
if (bytesStart == null) {
|
||||||
bytesStart = bytesStartArray.init();
|
bytesStart = bytesStartArray.init();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ords == null) {
|
||||||
|
ords = new int[hashSize];
|
||||||
|
bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT * hashSize);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -514,17 +532,62 @@ public final class BytesRefHash {
|
||||||
*/
|
*/
|
||||||
public abstract AtomicLong bytesUsed();
|
public abstract AtomicLong bytesUsed();
|
||||||
}
|
}
|
||||||
|
|
||||||
public static class DirectBytesStartArray extends BytesStartArray {
|
/**
|
||||||
|
* A direct {@link BytesStartArray} that tracks all memory allocation using an {@link AtomicLong} instance.
|
||||||
|
*/
|
||||||
|
public static class TrackingDirectBytesStartArray extends BytesStartArray {
|
||||||
protected final int initSize;
|
protected final int initSize;
|
||||||
private int[] bytesStart;
|
private int[] bytesStart;
|
||||||
private final AtomicLong bytesUsed = new AtomicLong(0);
|
protected final AtomicLong bytesUsed;
|
||||||
|
|
||||||
|
public TrackingDirectBytesStartArray(int initSize, AtomicLong bytesUsed) {
|
||||||
|
this.initSize = initSize;
|
||||||
|
this.bytesUsed = bytesUsed;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int[] clear() {
|
||||||
|
if (bytesStart != null) {
|
||||||
|
bytesUsed.addAndGet(-bytesStart.length * RamUsageEstimator.NUM_BYTES_INT);
|
||||||
|
}
|
||||||
|
return bytesStart = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int[] grow() {
|
||||||
|
assert bytesStart != null;
|
||||||
|
final int oldSize = bytesStart.length;
|
||||||
|
bytesStart = ArrayUtil.grow(bytesStart, bytesStart.length + 1);
|
||||||
|
bytesUsed.addAndGet((bytesStart.length - oldSize) * RamUsageEstimator.NUM_BYTES_INT);
|
||||||
|
return bytesStart;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int[] init() {
|
||||||
|
bytesStart = new int[ArrayUtil.oversize(initSize,
|
||||||
|
RamUsageEstimator.NUM_BYTES_INT)];
|
||||||
|
bytesUsed.addAndGet((bytesStart.length) * RamUsageEstimator.NUM_BYTES_INT);
|
||||||
|
return bytesStart;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public AtomicLong bytesUsed() {
|
||||||
|
return bytesUsed;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static class DirectBytesStartArray extends BytesStartArray {
|
||||||
|
protected final int initSize;
|
||||||
|
private int[] bytesStart;
|
||||||
|
private final AtomicLong bytesUsed;
|
||||||
|
|
||||||
public DirectBytesStartArray(int initSize) {
|
public DirectBytesStartArray(int initSize) {
|
||||||
|
this.bytesUsed = new AtomicLong(0);
|
||||||
this.initSize = initSize;
|
this.initSize = initSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int[] clear() {
|
public int[] clear() {
|
||||||
return bytesStart = null;
|
return bytesStart = null;
|
||||||
|
@ -546,6 +609,5 @@ public final class BytesRefHash {
|
||||||
public AtomicLong bytesUsed() {
|
public AtomicLong bytesUsed() {
|
||||||
return bytesUsed;
|
return bytesUsed;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,109 @@
|
||||||
|
package org.apache.lucene.util;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Represents double[], as a slice (offset + length) into an existing double[].
|
||||||
|
*
|
||||||
|
* @lucene.internal
|
||||||
|
*/
|
||||||
|
public final class FloatsRef implements Cloneable {
|
||||||
|
public double[] floats;
|
||||||
|
public int offset;
|
||||||
|
public int length;
|
||||||
|
|
||||||
|
public FloatsRef() {
|
||||||
|
}
|
||||||
|
|
||||||
|
public FloatsRef(int capacity) {
|
||||||
|
floats = new double[capacity];
|
||||||
|
}
|
||||||
|
|
||||||
|
public void set(double value) {
|
||||||
|
floats[offset] = value;
|
||||||
|
}
|
||||||
|
|
||||||
|
public double get() {
|
||||||
|
return floats[offset];
|
||||||
|
}
|
||||||
|
|
||||||
|
public FloatsRef(double[] floats, int offset, int length) {
|
||||||
|
this.floats = floats;
|
||||||
|
this.offset = offset;
|
||||||
|
this.length = length;
|
||||||
|
}
|
||||||
|
|
||||||
|
public FloatsRef(FloatsRef other) {
|
||||||
|
copy(other);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Object clone() {
|
||||||
|
return new FloatsRef(this);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
final int prime = 31;
|
||||||
|
int result = 0;
|
||||||
|
final int end = offset + length;
|
||||||
|
for(int i = offset; i < end; i++) {
|
||||||
|
long value = Double.doubleToLongBits(floats[i]);
|
||||||
|
result = prime * result + (int) (value ^ (value >>> 32));
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean equals(Object other) {
|
||||||
|
return other instanceof FloatsRef && this.floatsEquals((FloatsRef) other);
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean floatsEquals(FloatsRef other) {
|
||||||
|
if (length == other.length) {
|
||||||
|
int otherUpto = other.offset;
|
||||||
|
final double[] otherFloats = other.floats;
|
||||||
|
final int end = offset + length;
|
||||||
|
for(int upto=offset;upto<end;upto++,otherUpto++) {
|
||||||
|
if (floats[upto] != otherFloats[otherUpto]) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void copy(FloatsRef other) {
|
||||||
|
if (floats == null) {
|
||||||
|
floats = new double[other.length];
|
||||||
|
} else {
|
||||||
|
floats = ArrayUtil.grow(floats, other.length);
|
||||||
|
}
|
||||||
|
System.arraycopy(other.floats, other.offset, floats, 0, other.length);
|
||||||
|
length = other.length;
|
||||||
|
offset = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void grow(int newLength) {
|
||||||
|
if (floats.length < newLength) {
|
||||||
|
floats = ArrayUtil.grow(floats, newLength);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue