mirror of https://github.com/apache/lucene.git
Merged /lucene/dev/trunk:r1432062-1433030
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene4547@1433035 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
commit
b6c9791358
|
@ -197,6 +197,9 @@ def checkAll(dirName):
|
|||
elif link.find('lucene.apache.org/java/docs/discussion.html') != -1:
|
||||
# OK
|
||||
pass
|
||||
elif link.find('lucene.apache.org/core/discussion.html') != -1:
|
||||
# OK
|
||||
pass
|
||||
elif link.find('lucene.apache.org/solr/mirrors-solr-latest-redir.html') != -1:
|
||||
# OK
|
||||
pass
|
||||
|
|
|
@ -308,7 +308,7 @@ def checkSigs(project, urlString, version, tmpDir, isSigned):
|
|||
artifact = text
|
||||
artifactURL = subURL
|
||||
if project == 'solr':
|
||||
expected = 'apache-solr-%s' % version
|
||||
expected = 'solr-%s' % version
|
||||
else:
|
||||
expected = 'lucene-%s' % version
|
||||
if not artifact.startswith(expected):
|
||||
|
@ -334,9 +334,9 @@ def checkSigs(project, urlString, version, tmpDir, isSigned):
|
|||
'lucene-%s.tgz' % version,
|
||||
'lucene-%s.zip' % version]
|
||||
else:
|
||||
expected = ['apache-solr-%s-src.tgz' % version,
|
||||
'apache-solr-%s.tgz' % version,
|
||||
'apache-solr-%s.zip' % version]
|
||||
expected = ['solr-%s-src.tgz' % version,
|
||||
'solr-%s.tgz' % version,
|
||||
'solr-%s.zip' % version]
|
||||
|
||||
actual = [x[0] for x in artifacts]
|
||||
if expected != actual:
|
||||
|
@ -556,10 +556,7 @@ def unpackAndVerify(project, tmpDir, artifact, version):
|
|||
|
||||
# make sure it unpacks to proper subdir
|
||||
l = os.listdir(destDir)
|
||||
if project == 'solr':
|
||||
expected = 'apache-%s-%s' % (project, version)
|
||||
else:
|
||||
expected = '%s-%s' % (project, version)
|
||||
expected = '%s-%s' % (project, version)
|
||||
if l != [expected]:
|
||||
raise RuntimeError('unpack produced entries %s; expected only %s' % (l, expected))
|
||||
|
||||
|
@ -956,7 +953,6 @@ def getDistributionsForMavenChecks(tmpDir, version, baseURL):
|
|||
distributionFiles = defaultdict()
|
||||
for project in ('lucene', 'solr'):
|
||||
distribution = '%s-%s.tgz' % (project, version)
|
||||
if project == 'solr': distribution = 'apache-' + distribution
|
||||
if not os.path.exists('%s/%s' % (tmpDir, distribution)):
|
||||
distURL = '%s/%s/%s' % (baseURL, project, distribution)
|
||||
print(' download %s...' % distribution, end=' ')
|
||||
|
@ -1010,8 +1006,6 @@ def checkIdenticalMavenArtifacts(distributionFiles, nonMavenizedDeps, artifacts,
|
|||
distFilenames = dict()
|
||||
for file in distributionFiles[project]:
|
||||
baseName = os.path.basename(file)
|
||||
if project == 'solr': # Remove 'apache-' prefix to allow comparison to Maven artifacts
|
||||
baseName = baseName.replace('apache-', '')
|
||||
distFilenames[baseName] = file
|
||||
for artifact in artifacts[project]:
|
||||
if reJarWar.search(artifact):
|
||||
|
@ -1348,9 +1342,9 @@ def smokeTest(baseURL, version, tmpDir, isSigned):
|
|||
print()
|
||||
print('Test Solr...')
|
||||
checkSigs('solr', solrPath, version, tmpDir, isSigned)
|
||||
for artifact in ('apache-solr-%s.tgz' % version, 'apache-solr-%s.zip' % version):
|
||||
for artifact in ('solr-%s.tgz' % version, 'solr-%s.zip' % version):
|
||||
unpackAndVerify('solr', tmpDir, artifact, version)
|
||||
unpackAndVerify('solr', tmpDir, 'apache-solr-%s-src.tgz' % version, version)
|
||||
unpackAndVerify('solr', tmpDir, 'solr-%s-src.tgz' % version, version)
|
||||
|
||||
print()
|
||||
print('Test Maven artifacts for Lucene and Solr...')
|
||||
|
|
|
@ -19,6 +19,16 @@ Changes in backwards compatibility policy
|
|||
(Nikola Tanković, Uwe Schindler, Chris Male, Mike McCandless,
|
||||
Robert Muir)
|
||||
|
||||
* LUCENE-4677, LUCENE-4682: unpacked FSTs now use vInt to encode the node target,
|
||||
to reduce their size (Mike McCandless)
|
||||
|
||||
* LUCENE-4678: FST now uses a paged byte[] structure instead of a
|
||||
single byte[] internally, to avoid large memory spikes during
|
||||
building (James Dyer, Mike McCandless)
|
||||
|
||||
* LUCENE-3298: FST can now be larger than 2.1 GB / 2.1 B nodes.
|
||||
(James Dyer, Mike McCandless)
|
||||
|
||||
======================= Lucene 4.1.0 =======================
|
||||
|
||||
Changes in backwards compatibility policy
|
||||
|
@ -45,7 +55,7 @@ Changes in backwards compatibility policy
|
|||
Instead of calling refresh(), you should write similar code to how you reopen
|
||||
a regular DirectoryReader.
|
||||
- TaxonomyReader.openIfChanged (previously refresh()) no longer throws
|
||||
IncosistentTaxonomyException, and supports recreate. InconsistentTaxoEx
|
||||
InconsistentTaxonomyException, and supports recreate. InconsistentTaxoEx
|
||||
was removed.
|
||||
- ChildrenArrays was pulled out of TaxonomyReader into a top-level class.
|
||||
- TaxonomyReader was made an abstract class (instead of an interface), with
|
||||
|
@ -94,7 +104,7 @@ Changes in backwards compatibility policy
|
|||
Also, the entire IndexingParams chain is now immutable. If you need to override
|
||||
a setting, you should extend the relevant class.
|
||||
Additionally, FacetSearchParams is now immutable, and requires all FacetRequests
|
||||
to speified at initialization time. (Shai Erera)
|
||||
to specified at initialization time. (Shai Erera)
|
||||
|
||||
* LUCENE-4647: CategoryDocumentBuilder and EnhancementsDocumentBuilder are replaced
|
||||
by FacetFields and AssociationsFacetFields respectively. CategoryEnhancement and
|
||||
|
@ -115,6 +125,10 @@ Changes in backwards compatibility policy
|
|||
result, few other classes such as Aggregator and CategoryListIterator were
|
||||
changed to handle bulk category ordinals. (Shai Erera)
|
||||
|
||||
* LUCENE-4683: CategoryListIterator and Aggregator are now per-segment. As such
|
||||
their implementations no longer take a top-level IndexReader in the constructor
|
||||
but rather implement a setNextReader. (Shai Erera)
|
||||
|
||||
New Features
|
||||
|
||||
* LUCENE-4226: New experimental StoredFieldsFormat that compresses chunks of
|
||||
|
@ -152,11 +166,6 @@ New Features
|
|||
* LUCENE-4515: MemoryIndex now supports adding the same field multiple
|
||||
times. (Simon Willnauer)
|
||||
|
||||
* LUCENE-4540: Added an experimental Norm.setPackedLong, which allows
|
||||
the use of VAR_INTS-encoded norms. This can be useful for cases where
|
||||
you only need a few bits per-document, or where you might want exact
|
||||
document length, and so on. (Robert Muir)
|
||||
|
||||
* LUCENE-4489: Added consumeAllTokens option to LimitTokenCountFilter
|
||||
(hossman, Robert Muir)
|
||||
|
||||
|
@ -267,7 +276,7 @@ Bug Fixes
|
|||
allow 1+maxMergeCount merges threads to be created, instead of just
|
||||
maxMergeCount (Radim Kolar, Mike McCandless)
|
||||
|
||||
* LUCENE-4567: Fixed NullPointerException in analzying, fuzzy, and
|
||||
* LUCENE-4567: Fixed NullPointerException in analyzing, fuzzy, and
|
||||
WFST suggesters when no suggestions were added (selckin via Mike
|
||||
McCandless)
|
||||
|
||||
|
@ -527,7 +536,7 @@ API Changes
|
|||
StoredFieldVisitor API. (Mike McCandless)
|
||||
|
||||
* LUCENE-4343: Made Tokenizer.setReader final. This is a setter that should
|
||||
not be overriden by subclasses: per-stream initialization should happen
|
||||
not be overridden by subclasses: per-stream initialization should happen
|
||||
in reset(). (Robert Muir)
|
||||
|
||||
* LUCENE-4377: Remove IndexInput.copyBytes(IndexOutput, long).
|
||||
|
@ -753,7 +762,7 @@ API Changes
|
|||
|
||||
* LUCENE-4273: When pulling a DocsEnum, you can pass an int flags
|
||||
instead of the previous boolean needsFlags; consistent with the changes
|
||||
for DocsAndPositionsEnum in LUCENE-4230. Currently othe only flag
|
||||
for DocsAndPositionsEnum in LUCENE-4230. Currently the only flag
|
||||
is DocsEnum.FLAG_FREQS. (Robert Muir, Mike McCandless)
|
||||
|
||||
* LUCENE-3616: TextField(String, Reader, Store) was reduced to TextField(String, Reader),
|
||||
|
@ -825,7 +834,7 @@ Bug Fixes
|
|||
instance are already checked out and queued up but not yet flushed.
|
||||
(Simon Willnauer)
|
||||
|
||||
* LUCENE-4282: Automaton FuzzyQuery didnt always deliver all results.
|
||||
* LUCENE-4282: Automaton FuzzyQuery didn't always deliver all results.
|
||||
(Johannes Christen, Uwe Schindler, Robert Muir)
|
||||
|
||||
* LUCENE-4289: Fix minor idf inconsistencies/inefficiencies in highlighter.
|
||||
|
@ -1055,7 +1064,7 @@ Changes in backwards compatibility policy
|
|||
Query/Weight/Scorer. If you extended Similarity directly before, you should
|
||||
extend TFIDFSimilarity instead. Similarity is now a lower-level API to
|
||||
implement other scoring algorithms. See MIGRATE.txt for more details.
|
||||
(David Nemeskey, Simon Willnauer, Mike Mccandless, Robert Muir)
|
||||
(David Nemeskey, Simon Willnauer, Mike McCandless, Robert Muir)
|
||||
|
||||
* LUCENE-3330: The expert visitor API in Scorer has been simplified and
|
||||
extended to support arbitrary relationships. To navigate to a scorer's
|
||||
|
@ -1163,12 +1172,12 @@ Changes in Runtime Behavior
|
|||
omitNorms(true) for field "a" for 1000 documents, but then add a document with
|
||||
omitNorms(false) for field "a", all documents for field "a" will have no
|
||||
norms. Previously, Lucene would fill the first 1000 documents with
|
||||
"fake norms" from Similarity.getDefault(). (Robert Muir, Mike Mccandless)
|
||||
"fake norms" from Similarity.getDefault(). (Robert Muir, Mike McCandless)
|
||||
|
||||
* LUCENE-2846: When some documents contain field "a", and others do not, the
|
||||
documents that don't have the field get a norm byte value of 0. Previously,
|
||||
Lucene would populate "fake norms" with Similarity.getDefault() for these
|
||||
documents. (Robert Muir, Mike Mccandless)
|
||||
documents. (Robert Muir, Mike McCandless)
|
||||
|
||||
* LUCENE-2720: IndexWriter throws IndexFormatTooOldException on open, rather
|
||||
than later when e.g. a merge starts.
|
||||
|
@ -1201,13 +1210,13 @@ Changes in Runtime Behavior
|
|||
update or delete on IndexWriter. By default DWPTs are flushed either on
|
||||
maxBufferedDocs per DWPT or the global active used memory. Once the active
|
||||
memory exceeds ramBufferSizeMB only the largest DWPT is selected for
|
||||
flushing and the memory used by this DWPT is substracted from the active
|
||||
flushing and the memory used by this DWPT is subtracted from the active
|
||||
memory and added to a flushing memory pool, which can lead to temporarily
|
||||
higher memory usage due to ongoing indexing.
|
||||
|
||||
- IndexWriter now can utilize ramBufferSize > 2048 MB. Each DWPT can address
|
||||
up to 2048 MB memory such that the ramBufferSize is now bounded by the max
|
||||
number of DWPT avaliable in the used DocumentsWriterPerThreadPool.
|
||||
number of DWPT available in the used DocumentsWriterPerThreadPool.
|
||||
IndexWriters net memory consumption can grow far beyond the 2048 MB limit if
|
||||
the application can use all available DWPTs. To prevent a DWPT from
|
||||
exhausting its address space IndexWriter will forcefully flush a DWPT if its
|
||||
|
@ -1215,7 +1224,7 @@ Changes in Runtime Behavior
|
|||
via IndexWriterConfig and defaults to 1945 MB.
|
||||
Since IndexWriter flushes DWPT concurrently not all memory is released
|
||||
immediately. Applications should still use a ramBufferSize significantly
|
||||
lower than the JVMs avaliable heap memory since under high load multiple
|
||||
lower than the JVMs available heap memory since under high load multiple
|
||||
flushing DWPT can consume substantial transient memory when IO performance
|
||||
is slow relative to indexing rate.
|
||||
|
||||
|
@ -1223,7 +1232,7 @@ Changes in Runtime Behavior
|
|||
'currently' RAM resident documents to disk. Yet, flushes that occur while a
|
||||
a full flush is running are queued and will happen after all DWPT involved
|
||||
in the full flush are done flushing. Applications using multiple threads
|
||||
during indexing and trigger a full flush (eg call commmit() or open a new
|
||||
during indexing and trigger a full flush (eg call commit() or open a new
|
||||
NRT reader) can use significantly more transient memory.
|
||||
|
||||
- IndexWriter#addDocument and IndexWriter.updateDocument can block indexing
|
||||
|
@ -1266,7 +1275,7 @@ Changes in Runtime Behavior
|
|||
|
||||
* LUCENE-3455: QueryParserBase.newFieldQuery() will throw a ParseException if
|
||||
any of the calls to the Analyzer throw an IOException. QueryParseBase.analyzeRangePart()
|
||||
will throw a RuntimException if an IOException is thrown by the Analyzer.
|
||||
will throw a RuntimeException if an IOException is thrown by the Analyzer.
|
||||
|
||||
* LUCENE-4127: IndexWriter will now throw IllegalArgumentException if
|
||||
the first token of an indexed field has 0 positionIncrement
|
||||
|
@ -1356,7 +1365,7 @@ API Changes
|
|||
customized on a per-field basis. (Robert Muir)
|
||||
|
||||
* LUCENE-3308: DuplicateFilter keepMode and processingMode have been converted to
|
||||
enums DuplicateFilter.KeepMode and DuplicateFilter.ProcessingMode repsectively.
|
||||
enums DuplicateFilter.KeepMode and DuplicateFilter.ProcessingMode respectively.
|
||||
|
||||
* LUCENE-3483: Move Function grouping collectors from Solr to grouping module.
|
||||
(Martijn van Groningen)
|
||||
|
@ -1514,7 +1523,7 @@ New features
|
|||
|
||||
* LUCENE-2742: Add native per-field postings format support. Codec lets you now
|
||||
register a postings format for each field and which is in turn recorded
|
||||
into the index. Postings formtas are maintained on a per-segment basis and be
|
||||
into the index. Postings formats are maintained on a per-segment basis and be
|
||||
resolved without knowing the actual postings format used for writing the segment.
|
||||
(Simon Willnauer)
|
||||
|
||||
|
@ -1722,7 +1731,7 @@ New features
|
|||
- o.a.l.analysis.miscellaneous.CapitalizationFilter: A TokenFilter that applies
|
||||
capitalization rules to tokens.
|
||||
- o.a.l.analysis.pattern: Package for pattern-based analysis, containing a
|
||||
CharFilter, Tokenizer, and Tokenfilter for transforming text with regexes.
|
||||
CharFilter, Tokenizer, and TokenFilter for transforming text with regexes.
|
||||
- o.a.l.analysis.synonym.SynonymFilter: A synonym filter that supports multi-word
|
||||
synonyms.
|
||||
- o.a.l.analysis.phonetic: Package for phonetic search, containing various
|
||||
|
@ -1894,7 +1903,7 @@ Bug fixes
|
|||
DocsAndPositionsEnum while merging (Marc Sturlese, Erick Erickson,
|
||||
Robert Muir, Simon Willnauer, Mike McCandless)
|
||||
|
||||
* LUCENE-3589: BytesRef copy(short) didnt set length.
|
||||
* LUCENE-3589: BytesRef copy(short) didn't set length.
|
||||
(Peter Chang via Robert Muir)
|
||||
|
||||
* LUCENE-3045: fixed QueryNodeImpl.containsTag(String key) that was
|
||||
|
@ -1997,6 +2006,51 @@ Build
|
|||
XSL. (Greg Bowyer, Uwe Schindler)
|
||||
|
||||
|
||||
======================= Lucene 3.6.2 =======================
|
||||
|
||||
Bug Fixes
|
||||
|
||||
* LUCENE-4234: Exception when FacetsCollector is used with ScoreFacetRequest,
|
||||
and the number of matching documents is too large. (Gilad Barkai via Shai Erera)
|
||||
|
||||
* LUCENE-2686, LUCENE-3505, LUCENE-4401: Fix BooleanQuery scorers to
|
||||
return correct freq().
|
||||
(Koji Sekiguchi, Mike McCandless, Liu Chao, Robert Muir)
|
||||
|
||||
* LUCENE-2501: Fixed rare thread-safety issue that could cause
|
||||
ArrayIndexOutOfBoundsException inside ByteBlockPool (Robert Muir,
|
||||
Mike McCandless)
|
||||
|
||||
* LUCENE-4297: BooleanScorer2 would multiply the coord() factor
|
||||
twice for conjunctions: for most users this is no problem, but
|
||||
if you had a customized Similarity that returned something other
|
||||
than 1 when overlap == maxOverlap (always the case for conjunctions),
|
||||
then the score would be incorrect. (Pascal Chollet, Robert Muir)
|
||||
|
||||
* LUCENE-4300: BooleanQuery's rewrite was not always safe: if you
|
||||
had a custom Similarity where coord(1,1) != 1F, then the rewritten
|
||||
query would be scored differently. (Robert Muir)
|
||||
|
||||
* LUCENE-4398: If you index many different field names in your
|
||||
documents then due to a bug in how it measures its RAM
|
||||
usage, IndexWriter would flush each segment too early eventually
|
||||
reaching the point where it flushes after every doc. (Tim Smith via
|
||||
Mike McCandless)
|
||||
|
||||
* LUCENE-4411: when sampling is enabled for a FacetRequest, its depth
|
||||
parameter is reset to the default (1), even if set otherwise.
|
||||
(Gilad Barkai via Shai Erera)
|
||||
|
||||
* LUCENE-4635: Fixed ArrayIndexOutOfBoundsException when in-memory
|
||||
terms index requires more than 2.1 GB RAM (indices with billions of
|
||||
terms). (Tom Burton-West via Mike McCandless)
|
||||
|
||||
Documentation
|
||||
|
||||
* LUCENE-4302: Fix facet userguide to have HTML loose doctype like
|
||||
all other javadocs. (Karl Nicholas via Uwe Schindler)
|
||||
|
||||
|
||||
======================= Lucene 3.6.1 =======================
|
||||
More information about this release, including any errata related to the
|
||||
release notes, upgrade instructions, or other changes may be found online at:
|
||||
|
@ -2043,7 +2097,7 @@ Tests
|
|||
random graph tokens. (Mike McCandless)
|
||||
|
||||
* LUCENE-3968: factor out LookaheadTokenFilter from
|
||||
MockGraphTokenFilter (Mike Mccandless)
|
||||
MockGraphTokenFilter (Mike McCandless)
|
||||
|
||||
|
||||
======================= Lucene 3.6.0 =======================
|
||||
|
@ -2323,7 +2377,7 @@ Bug fixes
|
|||
|
||||
* LUCENE-3876: Fix bug where positions for a document exceeding
|
||||
Integer.MAX_VALUE/2 would produce a corrupt index.
|
||||
(Simon Willnauer, Mike Mccandless, Robert Muir)
|
||||
(Simon Willnauer, Mike McCandless, Robert Muir)
|
||||
|
||||
* LUCENE-3880: UAX29URLEmailTokenizer now recognizes emails when the mailto:
|
||||
scheme is prepended. (Kai Gülzau, Steve Rowe)
|
||||
|
|
|
@ -19,8 +19,8 @@ package org.apache.lucene.analysis.ja.dict;
|
|||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.util.fst.FST;
|
||||
import org.apache.lucene.util.fst.FST.Arc;
|
||||
import org.apache.lucene.util.fst.FST;
|
||||
|
||||
/**
|
||||
* Thin wrapper around an FST with root-arc caching for Japanese.
|
||||
|
@ -48,7 +48,7 @@ public final class TokenInfoFST {
|
|||
rootCache = cacheRootArcs();
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
@SuppressWarnings({"rawtypes","unchecked"})
|
||||
private FST.Arc<Long>[] cacheRootArcs() throws IOException {
|
||||
FST.Arc<Long> rootCache[] = new FST.Arc[1+(cacheCeiling-0x3040)];
|
||||
FST.Arc<Long> firstArc = new FST.Arc<Long>();
|
||||
|
|
Binary file not shown.
|
@ -132,7 +132,7 @@ public class TokenInfoDictionaryBuilder {
|
|||
System.out.println(" encode...");
|
||||
|
||||
PositiveIntOutputs fstOutput = PositiveIntOutputs.getSingleton(true);
|
||||
Builder<Long> fstBuilder = new Builder<Long>(FST.INPUT_TYPE.BYTE2, 0, 0, true, true, Integer.MAX_VALUE, fstOutput, null, true, true);
|
||||
Builder<Long> fstBuilder = new Builder<Long>(FST.INPUT_TYPE.BYTE2, 0, 0, true, true, Integer.MAX_VALUE, fstOutput, null, true, PackedInts.DEFAULT, true, 15);
|
||||
IntsRef scratch = new IntsRef();
|
||||
long ord = -1; // first ord will be 0
|
||||
String lastValue = null;
|
||||
|
|
|
@ -113,7 +113,7 @@ public final class MemoryPostingsFormat extends PostingsFormat {
|
|||
this.field = field;
|
||||
this.doPackFST = doPackFST;
|
||||
this.acceptableOverheadRatio = acceptableOverheadRatio;
|
||||
builder = new Builder<BytesRef>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, null, doPackFST, acceptableOverheadRatio, true);
|
||||
builder = new Builder<BytesRef>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, null, doPackFST, acceptableOverheadRatio, true, 15);
|
||||
}
|
||||
|
||||
private class PostingsWriter extends PostingsConsumer {
|
||||
|
|
|
@ -230,7 +230,7 @@ and proximity searches (though sentence identification is not provided by Lucene
|
|||
create, or a combination of existing and newly created components. Before
|
||||
pursuing this approach, you may find it worthwhile to explore the
|
||||
<a href="{@docRoot}/../analyzers-common/overview-summary.html">analyzers-common</a> library and/or ask on the
|
||||
<a href="http://lucene.apache.org/java/docs/mailinglists.html"
|
||||
<a href="http://lucene.apache.org/core/discussion.html"
|
||||
>java-user@lucene.apache.org mailing list</a> first to see if what you
|
||||
need already exists. If you are still committed to creating your own
|
||||
Analyzer, have a look at the source code of any one of the many samples
|
||||
|
|
|
@ -276,13 +276,13 @@ public class BlockTreeTermsReader extends FieldsProducer {
|
|||
*/
|
||||
public static class Stats {
|
||||
/** How many nodes in the index FST. */
|
||||
public int indexNodeCount;
|
||||
public long indexNodeCount;
|
||||
|
||||
/** How many arcs in the index FST. */
|
||||
public int indexArcCount;
|
||||
public long indexArcCount;
|
||||
|
||||
/** Byte size of the index. */
|
||||
public int indexNumBytes;
|
||||
public long indexNumBytes;
|
||||
|
||||
/** Total number of terms in the field. */
|
||||
public long totalTermCount;
|
||||
|
|
|
@ -23,7 +23,6 @@ import java.util.Comparator;
|
|||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
|
@ -41,6 +40,7 @@ import org.apache.lucene.util.fst.BytesRefFSTEnum;
|
|||
import org.apache.lucene.util.fst.FST;
|
||||
import org.apache.lucene.util.fst.NoOutputs;
|
||||
import org.apache.lucene.util.fst.Util;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
|
||||
/*
|
||||
TODO:
|
||||
|
@ -187,7 +187,7 @@ public class BlockTreeTermsWriter extends FieldsConsumer {
|
|||
public final static int DEFAULT_MAX_BLOCK_SIZE = 48;
|
||||
|
||||
//public final static boolean DEBUG = false;
|
||||
private final static boolean SAVE_DOT_FILES = false;
|
||||
//private final static boolean SAVE_DOT_FILES = false;
|
||||
|
||||
static final int OUTPUT_FLAGS_NUM_BITS = 2;
|
||||
static final int OUTPUT_FLAGS_MASK = 0x3;
|
||||
|
@ -419,7 +419,8 @@ public class BlockTreeTermsWriter extends FieldsConsumer {
|
|||
final ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton();
|
||||
final Builder<BytesRef> indexBuilder = new Builder<BytesRef>(FST.INPUT_TYPE.BYTE1,
|
||||
0, 0, true, false, Integer.MAX_VALUE,
|
||||
outputs, null, false, true);
|
||||
outputs, null, false,
|
||||
PackedInts.COMPACT, true, 15);
|
||||
//if (DEBUG) {
|
||||
// System.out.println(" compile index for prefix=" + prefix);
|
||||
//}
|
||||
|
@ -962,7 +963,9 @@ public class BlockTreeTermsWriter extends FieldsConsumer {
|
|||
0, 0, true,
|
||||
true, Integer.MAX_VALUE,
|
||||
noOutputs,
|
||||
new FindBlocks(), false, true);
|
||||
new FindBlocks(), false,
|
||||
PackedInts.COMPACT,
|
||||
true, 15);
|
||||
|
||||
postingsWriter.setField(fieldInfo);
|
||||
}
|
||||
|
|
|
@ -22,6 +22,7 @@ import java.io.IOException;
|
|||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.Date;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
|
@ -3475,6 +3476,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
|
|||
diagnostics.put("os.version", Constants.OS_VERSION);
|
||||
diagnostics.put("java.version", Constants.JAVA_VERSION);
|
||||
diagnostics.put("java.vendor", Constants.JAVA_VENDOR);
|
||||
diagnostics.put("timestamp", Long.toString(new Date().getTime()));
|
||||
if (details != null) {
|
||||
diagnostics.putAll(details);
|
||||
}
|
||||
|
|
|
@ -115,15 +115,6 @@ public final class Norm {
|
|||
setType(Type.FIXED_INTS_64);
|
||||
this.field.setLongValue(norm);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets a packed long norm value.
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public void setPackedLong(long norm) {
|
||||
setType(Type.VAR_INTS);
|
||||
this.field.setLongValue(norm);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets a byte norm value
|
||||
|
|
|
@ -38,7 +38,7 @@ import org.apache.lucene.search.DocIdSetIterator;
|
|||
|
||||
public final class FixedBitSet extends DocIdSet implements Bits {
|
||||
private final long[] bits;
|
||||
private int numBits;
|
||||
private final int numBits;
|
||||
|
||||
/** returns the number of 64 bit words it would take to hold numBits */
|
||||
public static int bits2words(int numBits) {
|
||||
|
|
|
@ -36,9 +36,13 @@ import org.apache.lucene.util.packed.PackedInts;
|
|||
* <p>NOTE: The algorithm is described at
|
||||
* http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.24.3698</p>
|
||||
*
|
||||
* The parameterized type T is the output type. See the
|
||||
* <p>The parameterized type T is the output type. See the
|
||||
* subclasses of {@link Outputs}.
|
||||
*
|
||||
* <p>FSTs larger than 2.1GB are now possible (as of Lucene
|
||||
* 4.2). FSTs containing more than 2.1B nodes are also now
|
||||
* possible, however they cannot be packed.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
|
||||
|
@ -84,22 +88,11 @@ public class Builder<T> {
|
|||
/**
|
||||
* Instantiates an FST/FSA builder without any pruning. A shortcut
|
||||
* to {@link #Builder(FST.INPUT_TYPE, int, int, boolean,
|
||||
* boolean, int, Outputs, FreezeTail, boolean, boolean)} with
|
||||
* pruning options turned off.
|
||||
* boolean, int, Outputs, FreezeTail, boolean, float,
|
||||
* boolean, int)} with pruning options turned off.
|
||||
*/
|
||||
public Builder(FST.INPUT_TYPE inputType, Outputs<T> outputs) {
|
||||
this(inputType, 0, 0, true, true, Integer.MAX_VALUE, outputs, null, false, PackedInts.COMPACT, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Instantiates an FST/FSA builder with {@link PackedInts#DEFAULT}
|
||||
* <code>acceptableOverheadRatio</code>.
|
||||
*/
|
||||
public Builder(FST.INPUT_TYPE inputType, int minSuffixCount1, int minSuffixCount2, boolean doShareSuffix,
|
||||
boolean doShareNonSingletonNodes, int shareMaxTailLength, Outputs<T> outputs,
|
||||
FreezeTail<T> freezeTail, boolean willPackFST, boolean allowArrayArcs) {
|
||||
this(inputType, minSuffixCount1, minSuffixCount2, doShareSuffix, doShareNonSingletonNodes,
|
||||
shareMaxTailLength, outputs, freezeTail, willPackFST, PackedInts.DEFAULT, allowArrayArcs);
|
||||
this(inputType, 0, 0, true, true, Integer.MAX_VALUE, outputs, null, false, PackedInts.COMPACT, true, 15);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -147,10 +140,16 @@ public class Builder<T> {
|
|||
* @param allowArrayArcs Pass false to disable the array arc optimization
|
||||
* while building the FST; this will make the resulting
|
||||
* FST smaller but slower to traverse.
|
||||
*
|
||||
* @param bytesPageBits How many bits wide to make each
|
||||
* byte[] block in the BytesStore; if you know the FST
|
||||
* will be large then make this larger. For example 15
|
||||
* bits = 32768 byte pages.
|
||||
*/
|
||||
public Builder(FST.INPUT_TYPE inputType, int minSuffixCount1, int minSuffixCount2, boolean doShareSuffix,
|
||||
boolean doShareNonSingletonNodes, int shareMaxTailLength, Outputs<T> outputs,
|
||||
FreezeTail<T> freezeTail, boolean doPackFST, float acceptableOverheadRatio, boolean allowArrayArcs) {
|
||||
FreezeTail<T> freezeTail, boolean doPackFST, float acceptableOverheadRatio, boolean allowArrayArcs,
|
||||
int bytesPageBits) {
|
||||
this.minSuffixCount1 = minSuffixCount1;
|
||||
this.minSuffixCount2 = minSuffixCount2;
|
||||
this.freezeTail = freezeTail;
|
||||
|
@ -158,9 +157,9 @@ public class Builder<T> {
|
|||
this.shareMaxTailLength = shareMaxTailLength;
|
||||
this.doPackFST = doPackFST;
|
||||
this.acceptableOverheadRatio = acceptableOverheadRatio;
|
||||
fst = new FST<T>(inputType, outputs, doPackFST, acceptableOverheadRatio, allowArrayArcs);
|
||||
fst = new FST<T>(inputType, outputs, doPackFST, acceptableOverheadRatio, allowArrayArcs, bytesPageBits);
|
||||
if (doShareSuffix) {
|
||||
dedupHash = new NodeHash<T>(fst);
|
||||
dedupHash = new NodeHash<T>(fst, fst.bytes.getReverseReader(false));
|
||||
} else {
|
||||
dedupHash = null;
|
||||
}
|
||||
|
@ -174,7 +173,7 @@ public class Builder<T> {
|
|||
}
|
||||
}
|
||||
|
||||
public int getTotStateCount() {
|
||||
public long getTotStateCount() {
|
||||
return fst.nodeCount;
|
||||
}
|
||||
|
||||
|
@ -182,12 +181,12 @@ public class Builder<T> {
|
|||
return frontier[0].inputCount;
|
||||
}
|
||||
|
||||
public int getMappedStateCount() {
|
||||
public long getMappedStateCount() {
|
||||
return dedupHash == null ? 0 : fst.nodeCount;
|
||||
}
|
||||
|
||||
private CompiledNode compileNode(UnCompiledNode<T> nodeIn, int tailLength) throws IOException {
|
||||
final int node;
|
||||
final long node;
|
||||
if (dedupHash != null && (doShareNonSingletonNodes || nodeIn.numArcs <= 1) && tailLength <= shareMaxTailLength) {
|
||||
if (nodeIn.numArcs == 0) {
|
||||
node = fst.addNode(nodeIn);
|
||||
|
@ -475,7 +474,7 @@ public class Builder<T> {
|
|||
fst.finish(compileNode(root, lastInput.length).node);
|
||||
|
||||
if (doPackFST) {
|
||||
return fst.pack(3, Math.max(10, fst.getNodeCount()/4), acceptableOverheadRatio);
|
||||
return fst.pack(3, Math.max(10, (int) (fst.getNodeCount()/4)), acceptableOverheadRatio);
|
||||
} else {
|
||||
return fst;
|
||||
}
|
||||
|
@ -513,8 +512,12 @@ public class Builder<T> {
|
|||
boolean isCompiled();
|
||||
}
|
||||
|
||||
public long fstSizeInBytes() {
|
||||
return fst.sizeInBytes();
|
||||
}
|
||||
|
||||
static final class CompiledNode implements Node {
|
||||
int node;
|
||||
long node;
|
||||
@Override
|
||||
public boolean isCompiled() {
|
||||
return true;
|
||||
|
|
|
@ -0,0 +1,468 @@
|
|||
package org.apache.lucene.util.fst;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.store.DataInput;
|
||||
import org.apache.lucene.store.DataOutput;
|
||||
|
||||
// TODO: merge with PagedBytes, except PagedBytes doesn't
|
||||
// let you read while writing which FST needs
|
||||
|
||||
class BytesStore extends DataOutput {
|
||||
|
||||
private final List<byte[]> blocks = new ArrayList<byte[]>();
|
||||
|
||||
private final int blockSize;
|
||||
private final int blockBits;
|
||||
private final int blockMask;
|
||||
|
||||
private byte[] current;
|
||||
private int nextWrite;
|
||||
|
||||
public BytesStore(int blockBits) {
|
||||
this.blockBits = blockBits;
|
||||
blockSize = 1 << blockBits;
|
||||
blockMask = blockSize-1;
|
||||
nextWrite = blockSize;
|
||||
}
|
||||
|
||||
/** Pulls bytes from the provided IndexInput. */
|
||||
public BytesStore(DataInput in, int numBytes, int maxBlockSize) throws IOException {
|
||||
int blockSize = 2;
|
||||
int blockBits = 1;
|
||||
while(blockSize < numBytes && blockSize < maxBlockSize) {
|
||||
blockSize *= 2;
|
||||
blockBits++;
|
||||
}
|
||||
this.blockBits = blockBits;
|
||||
this.blockSize = blockSize;
|
||||
this.blockMask = blockSize-1;
|
||||
int left = numBytes;
|
||||
while(left > 0) {
|
||||
final int chunk = Math.min(blockSize, left);
|
||||
byte[] block = new byte[chunk];
|
||||
in.readBytes(block, 0, block.length);
|
||||
blocks.add(block);
|
||||
left -= chunk;
|
||||
}
|
||||
|
||||
// So .getPosition still works
|
||||
nextWrite = blocks.get(blocks.size()-1).length;
|
||||
}
|
||||
|
||||
/** Absolute write byte; you must ensure dest is < max
|
||||
* position written so far. */
|
||||
public void writeByte(int dest, byte b) {
|
||||
int blockIndex = dest >> blockBits;
|
||||
byte[] block = blocks.get(blockIndex);
|
||||
block[dest & blockMask] = b;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeByte(byte b) {
|
||||
if (nextWrite == blockSize) {
|
||||
current = new byte[blockSize];
|
||||
blocks.add(current);
|
||||
nextWrite = 0;
|
||||
}
|
||||
current[nextWrite++] = b;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeBytes(byte[] b, int offset, int len) {
|
||||
while (len > 0) {
|
||||
int chunk = blockSize - nextWrite;
|
||||
if (len <= chunk) {
|
||||
System.arraycopy(b, offset, current, nextWrite, len);
|
||||
nextWrite += len;
|
||||
break;
|
||||
} else {
|
||||
if (chunk > 0) {
|
||||
System.arraycopy(b, offset, current, nextWrite, chunk);
|
||||
offset += chunk;
|
||||
len -= chunk;
|
||||
}
|
||||
current = new byte[blockSize];
|
||||
blocks.add(current);
|
||||
nextWrite = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int getBlockBits() {
|
||||
return blockBits;
|
||||
}
|
||||
|
||||
/** Absolute writeBytes without changing the current
|
||||
* position. Note: this cannot "grow" the bytes, so you
|
||||
* must only call it on already written parts. */
|
||||
void writeBytes(long dest, byte[] b, int offset, int len) {
|
||||
//System.out.println(" BS.writeBytes dest=" + dest + " offset=" + offset + " len=" + len);
|
||||
assert dest + len <= getPosition(): "dest=" + dest + " pos=" + getPosition() + " len=" + len;
|
||||
|
||||
// Note: weird: must go "backwards" because copyBytes
|
||||
// calls us with overlapping src/dest. If we
|
||||
// go forwards then we overwrite bytes before we can
|
||||
// copy them:
|
||||
|
||||
/*
|
||||
int blockIndex = dest >> blockBits;
|
||||
int upto = dest & blockMask;
|
||||
byte[] block = blocks.get(blockIndex);
|
||||
while (len > 0) {
|
||||
int chunk = blockSize - upto;
|
||||
System.out.println(" cycle chunk=" + chunk + " len=" + len);
|
||||
if (len <= chunk) {
|
||||
System.arraycopy(b, offset, block, upto, len);
|
||||
break;
|
||||
} else {
|
||||
System.arraycopy(b, offset, block, upto, chunk);
|
||||
offset += chunk;
|
||||
len -= chunk;
|
||||
blockIndex++;
|
||||
block = blocks.get(blockIndex);
|
||||
upto = 0;
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
final long end = dest + len;
|
||||
int blockIndex = (int) (end >> blockBits);
|
||||
int downTo = (int) (end & blockMask);
|
||||
if (downTo == 0) {
|
||||
blockIndex--;
|
||||
downTo = blockSize;
|
||||
}
|
||||
byte[] block = blocks.get(blockIndex);
|
||||
|
||||
while (len > 0) {
|
||||
//System.out.println(" cycle downTo=" + downTo + " len=" + len);
|
||||
if (len <= downTo) {
|
||||
//System.out.println(" final: offset=" + offset + " len=" + len + " dest=" + (downTo-len));
|
||||
System.arraycopy(b, offset, block, downTo-len, len);
|
||||
break;
|
||||
} else {
|
||||
len -= downTo;
|
||||
//System.out.println(" partial: offset=" + (offset + len) + " len=" + downTo + " dest=0");
|
||||
System.arraycopy(b, offset + len, block, 0, downTo);
|
||||
blockIndex--;
|
||||
block = blocks.get(blockIndex);
|
||||
downTo = blockSize;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Absolute copy bytes self to self, without changing the
|
||||
* position. Note: this cannot "grow" the bytes, so must
|
||||
* only call it on already written parts. */
|
||||
public void copyBytes(long src, long dest, int len) {
|
||||
//System.out.println("BS.copyBytes src=" + src + " dest=" + dest + " len=" + len);
|
||||
assert src < dest;
|
||||
|
||||
// Note: weird: must go "backwards" because copyBytes
|
||||
// calls us with overlapping src/dest. If we
|
||||
// go forwards then we overwrite bytes before we can
|
||||
// copy them:
|
||||
|
||||
/*
|
||||
int blockIndex = src >> blockBits;
|
||||
int upto = src & blockMask;
|
||||
byte[] block = blocks.get(blockIndex);
|
||||
while (len > 0) {
|
||||
int chunk = blockSize - upto;
|
||||
System.out.println(" cycle: chunk=" + chunk + " len=" + len);
|
||||
if (len <= chunk) {
|
||||
writeBytes(dest, block, upto, len);
|
||||
break;
|
||||
} else {
|
||||
writeBytes(dest, block, upto, chunk);
|
||||
blockIndex++;
|
||||
block = blocks.get(blockIndex);
|
||||
upto = 0;
|
||||
len -= chunk;
|
||||
dest += chunk;
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
long end = src + len;
|
||||
|
||||
int blockIndex = (int) (end >> blockBits);
|
||||
int downTo = (int) (end & blockMask);
|
||||
if (downTo == 0) {
|
||||
blockIndex--;
|
||||
downTo = blockSize;
|
||||
}
|
||||
byte[] block = blocks.get(blockIndex);
|
||||
|
||||
while (len > 0) {
|
||||
//System.out.println(" cycle downTo=" + downTo);
|
||||
if (len <= downTo) {
|
||||
//System.out.println(" finish");
|
||||
writeBytes(dest, block, downTo-len, len);
|
||||
break;
|
||||
} else {
|
||||
//System.out.println(" partial");
|
||||
len -= downTo;
|
||||
writeBytes(dest + len, block, 0, downTo);
|
||||
blockIndex--;
|
||||
block = blocks.get(blockIndex);
|
||||
downTo = blockSize;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Writes an int at the absolute position without
|
||||
* changing the current pointer. */
|
||||
public void writeInt(long pos, int value) {
|
||||
int blockIndex = (int) (pos >> blockBits);
|
||||
int upto = (int) (pos & blockMask);
|
||||
byte[] block = blocks.get(blockIndex);
|
||||
int shift = 24;
|
||||
for(int i=0;i<4;i++) {
|
||||
block[upto++] = (byte) (value >> shift);
|
||||
shift -= 8;
|
||||
if (upto == blockSize) {
|
||||
upto = 0;
|
||||
blockIndex++;
|
||||
block = blocks.get(blockIndex);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Reverse from srcPos, inclusive, to destPos, inclusive. */
|
||||
public void reverse(long srcPos, long destPos) {
|
||||
assert srcPos < destPos;
|
||||
assert destPos < getPosition();
|
||||
//System.out.println("reverse src=" + srcPos + " dest=" + destPos);
|
||||
|
||||
int srcBlockIndex = (int) (srcPos >> blockBits);
|
||||
int src = (int) (srcPos & blockMask);
|
||||
byte[] srcBlock = blocks.get(srcBlockIndex);
|
||||
|
||||
int destBlockIndex = (int) (destPos >> blockBits);
|
||||
int dest = (int) (destPos & blockMask);
|
||||
byte[] destBlock = blocks.get(destBlockIndex);
|
||||
//System.out.println(" srcBlock=" + srcBlockIndex + " destBlock=" + destBlockIndex);
|
||||
|
||||
int limit = (int) (destPos - srcPos + 1)/2;
|
||||
for(int i=0;i<limit;i++) {
|
||||
//System.out.println(" cycle src=" + src + " dest=" + dest);
|
||||
byte b = srcBlock[src];
|
||||
srcBlock[src] = destBlock[dest];
|
||||
destBlock[dest] = b;
|
||||
src++;
|
||||
if (src == blockSize) {
|
||||
srcBlockIndex++;
|
||||
srcBlock = blocks.get(srcBlockIndex);
|
||||
//System.out.println(" set destBlock=" + destBlock + " srcBlock=" + srcBlock);
|
||||
src = 0;
|
||||
}
|
||||
|
||||
dest--;
|
||||
if (dest == -1) {
|
||||
destBlockIndex--;
|
||||
destBlock = blocks.get(destBlockIndex);
|
||||
//System.out.println(" set destBlock=" + destBlock + " srcBlock=" + srcBlock);
|
||||
dest = blockSize-1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void skipBytes(int len) {
|
||||
while (len > 0) {
|
||||
int chunk = blockSize - nextWrite;
|
||||
if (len <= chunk) {
|
||||
nextWrite += len;
|
||||
break;
|
||||
} else {
|
||||
len -= chunk;
|
||||
current = new byte[blockSize];
|
||||
blocks.add(current);
|
||||
nextWrite = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public long getPosition() {
|
||||
return ((long) blocks.size()-1) * blockSize + nextWrite;
|
||||
}
|
||||
|
||||
/** Pos must be less than the max position written so far!
|
||||
* Ie, you cannot "grow" the file with this! */
|
||||
public void truncate(long newLen) {
|
||||
assert newLen <= getPosition();
|
||||
assert newLen >= 0;
|
||||
int blockIndex = (int) (newLen >> blockBits);
|
||||
nextWrite = (int) (newLen & blockMask);
|
||||
if (nextWrite == 0) {
|
||||
blockIndex--;
|
||||
nextWrite = blockSize;
|
||||
}
|
||||
blocks.subList(blockIndex+1, blocks.size()).clear();
|
||||
if (newLen == 0) {
|
||||
current = null;
|
||||
} else {
|
||||
current = blocks.get(blockIndex);
|
||||
}
|
||||
assert newLen == getPosition();
|
||||
}
|
||||
|
||||
public void finish() {
|
||||
if (current != null) {
|
||||
byte[] lastBuffer = new byte[nextWrite];
|
||||
System.arraycopy(current, 0, lastBuffer, 0, nextWrite);
|
||||
blocks.set(blocks.size()-1, lastBuffer);
|
||||
current = null;
|
||||
}
|
||||
}
|
||||
|
||||
/** Writes all of our bytes to the target {@link DataOutput}. */
|
||||
public void writeTo(DataOutput out) throws IOException {
|
||||
for(byte[] block : blocks) {
|
||||
out.writeBytes(block, 0, block.length);
|
||||
}
|
||||
}
|
||||
|
||||
public FST.BytesReader getForwardReader() {
|
||||
if (blocks.size() == 1) {
|
||||
return new ForwardBytesReader(blocks.get(0));
|
||||
}
|
||||
return new FST.BytesReader() {
|
||||
private byte[] current;
|
||||
private int nextBuffer;
|
||||
private int nextRead = blockSize;
|
||||
|
||||
@Override
|
||||
public byte readByte() {
|
||||
if (nextRead == blockSize) {
|
||||
current = blocks.get(nextBuffer++);
|
||||
nextRead = 0;
|
||||
}
|
||||
return current[nextRead++];
|
||||
}
|
||||
|
||||
@Override
|
||||
public void skipBytes(int count) {
|
||||
setPosition(getPosition() + count);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void readBytes(byte[] b, int offset, int len) {
|
||||
while(len > 0) {
|
||||
int chunkLeft = blockSize - nextRead;
|
||||
if (len <= chunkLeft) {
|
||||
System.arraycopy(current, nextRead, b, offset, len);
|
||||
nextRead += len;
|
||||
break;
|
||||
} else {
|
||||
if (chunkLeft > 0) {
|
||||
System.arraycopy(current, nextRead, b, offset, chunkLeft);
|
||||
offset += chunkLeft;
|
||||
len -= chunkLeft;
|
||||
}
|
||||
current = blocks.get(nextBuffer++);
|
||||
nextRead = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getPosition() {
|
||||
return ((long) nextBuffer-1)*blockSize + nextRead;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setPosition(long pos) {
|
||||
int bufferIndex = (int) (pos >> blockBits);
|
||||
nextBuffer = bufferIndex+1;
|
||||
current = blocks.get(bufferIndex);
|
||||
nextRead = (int) (pos & blockMask);
|
||||
assert getPosition() == pos;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean reversed() {
|
||||
return false;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
public FST.BytesReader getReverseReader() {
|
||||
return getReverseReader(true);
|
||||
}
|
||||
|
||||
FST.BytesReader getReverseReader(boolean allowSingle) {
|
||||
if (allowSingle && blocks.size() == 1) {
|
||||
return new ReverseBytesReader(blocks.get(0));
|
||||
}
|
||||
return new FST.BytesReader() {
|
||||
private byte[] current = blocks.size() == 0 ? null : blocks.get(0);
|
||||
private int nextBuffer = -1;
|
||||
private int nextRead = 0;
|
||||
|
||||
@Override
|
||||
public byte readByte() {
|
||||
if (nextRead == -1) {
|
||||
current = blocks.get(nextBuffer--);
|
||||
nextRead = blockSize-1;
|
||||
}
|
||||
return current[nextRead--];
|
||||
}
|
||||
|
||||
@Override
|
||||
public void skipBytes(int count) {
|
||||
setPosition(getPosition() - count);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void readBytes(byte[] b, int offset, int len) {
|
||||
for(int i=0;i<len;i++) {
|
||||
b[offset+i] = readByte();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getPosition() {
|
||||
return ((long) nextBuffer+1)*blockSize + nextRead;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setPosition(long pos) {
|
||||
// NOTE: a little weird because if you
|
||||
// setPosition(0), the next byte you read is
|
||||
// bytes[0] ... but I would expect bytes[-1] (ie,
|
||||
// EOF)...?
|
||||
int bufferIndex = (int) (pos >> blockBits);
|
||||
nextBuffer = bufferIndex-1;
|
||||
current = blocks.get(bufferIndex);
|
||||
nextRead = (int) (pos & blockMask);
|
||||
assert getPosition() == pos: "pos=" + pos + " getPos()=" + getPosition();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean reversed() {
|
||||
return true;
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -17,11 +17,11 @@ package org.apache.lucene.util.fst;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/** Can next() and advance() through the terms in an FST
|
||||
*
|
||||
* @lucene.experimental
|
||||
|
@ -153,8 +153,8 @@ abstract class FSTEnum<T> {
|
|||
boolean found = false;
|
||||
while (low <= high) {
|
||||
mid = (low + high) >>> 1;
|
||||
in.pos = arc.posArcsStart;
|
||||
in.skip(arc.bytesPerArc*mid+1);
|
||||
in.setPosition(arc.posArcsStart);
|
||||
in.skipBytes(arc.bytesPerArc*mid+1);
|
||||
final int midLabel = fst.readLabel(in);
|
||||
final int cmp = midLabel - targetLabel;
|
||||
//System.out.println(" cycle low=" + low + " high=" + high + " mid=" + mid + " midLabel=" + midLabel + " cmp=" + cmp);
|
||||
|
@ -292,8 +292,8 @@ abstract class FSTEnum<T> {
|
|||
boolean found = false;
|
||||
while (low <= high) {
|
||||
mid = (low + high) >>> 1;
|
||||
in.pos = arc.posArcsStart;
|
||||
in.skip(arc.bytesPerArc*mid+1);
|
||||
in.setPosition(arc.posArcsStart);
|
||||
in.skipBytes(arc.bytesPerArc*mid+1);
|
||||
final int midLabel = fst.readLabel(in);
|
||||
final int cmp = midLabel - targetLabel;
|
||||
//System.out.println(" cycle low=" + low + " high=" + high + " mid=" + mid + " midLabel=" + midLabel + " cmp=" + cmp);
|
||||
|
|
|
@ -0,0 +1,62 @@
|
|||
package org.apache.lucene.util.fst;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
// TODO: can we use just ByteArrayDataInput...? need to
|
||||
// add a .skipBytes to DataInput.. hmm and .setPosition
|
||||
|
||||
/** Reads from a single byte[]. */
|
||||
final class ForwardBytesReader extends FST.BytesReader {
|
||||
private final byte[] bytes;
|
||||
private int pos;
|
||||
|
||||
public ForwardBytesReader(byte[] bytes) {
|
||||
this.bytes = bytes;
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte readByte() {
|
||||
return bytes[pos++];
|
||||
}
|
||||
|
||||
@Override
|
||||
public void readBytes(byte[] b, int offset, int len) {
|
||||
System.arraycopy(bytes, pos, b, offset, len);
|
||||
pos += len;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void skipBytes(int count) {
|
||||
pos += count;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getPosition() {
|
||||
return pos;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setPosition(long pos) {
|
||||
this.pos = (int) pos;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean reversed() {
|
||||
return false;
|
||||
}
|
||||
}
|
|
@ -19,22 +19,27 @@ package org.apache.lucene.util.fst;
|
|||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.util.packed.GrowableWriter;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
|
||||
// Used to dedup states (lookup already-frozen states)
|
||||
final class NodeHash<T> {
|
||||
|
||||
private int[] table;
|
||||
private GrowableWriter table;
|
||||
private int count;
|
||||
private int mask;
|
||||
private final FST<T> fst;
|
||||
private final FST.Arc<T> scratchArc = new FST.Arc<T>();
|
||||
private final FST.BytesReader in;
|
||||
|
||||
public NodeHash(FST<T> fst) {
|
||||
table = new int[16];
|
||||
public NodeHash(FST<T> fst, FST.BytesReader in) {
|
||||
table = new GrowableWriter(8, 16, PackedInts.COMPACT);
|
||||
mask = 15;
|
||||
this.fst = fst;
|
||||
this.in = in;
|
||||
}
|
||||
|
||||
private boolean nodesEqual(Builder.UnCompiledNode<T> node, int address, FST.BytesReader in) throws IOException {
|
||||
private boolean nodesEqual(Builder.UnCompiledNode<T> node, long address) throws IOException {
|
||||
fst.readFirstRealTargetArc(address, scratchArc, in);
|
||||
if (scratchArc.bytesPerArc != 0 && node.numArcs != scratchArc.numArcs) {
|
||||
return false;
|
||||
|
@ -73,7 +78,8 @@ final class NodeHash<T> {
|
|||
final Builder.Arc<T> arc = node.arcs[arcIdx];
|
||||
//System.out.println(" label=" + arc.label + " target=" + ((Builder.CompiledNode) arc.target).node + " h=" + h + " output=" + fst.outputs.outputToString(arc.output) + " isFinal?=" + arc.isFinal);
|
||||
h = PRIME * h + arc.label;
|
||||
h = PRIME * h + ((Builder.CompiledNode) arc.target).node;
|
||||
long n = ((Builder.CompiledNode) arc.target).node;
|
||||
h = PRIME * h + (int) (n^(n>>32));
|
||||
h = PRIME * h + arc.output.hashCode();
|
||||
h = PRIME * h + arc.nextFinalOutput.hashCode();
|
||||
if (arc.isFinal) {
|
||||
|
@ -85,16 +91,15 @@ final class NodeHash<T> {
|
|||
}
|
||||
|
||||
// hash code for a frozen node
|
||||
private int hash(int node) throws IOException {
|
||||
private int hash(long node) throws IOException {
|
||||
final int PRIME = 31;
|
||||
final FST.BytesReader in = fst.getBytesReader(0);
|
||||
//System.out.println("hash frozen node=" + node);
|
||||
int h = 0;
|
||||
fst.readFirstRealTargetArc(node, scratchArc, in);
|
||||
while(true) {
|
||||
//System.out.println(" label=" + scratchArc.label + " target=" + scratchArc.target + " h=" + h + " output=" + fst.outputs.outputToString(scratchArc.output) + " next?=" + scratchArc.flag(4) + " final?=" + scratchArc.isFinal());
|
||||
//System.out.println(" label=" + scratchArc.label + " target=" + scratchArc.target + " h=" + h + " output=" + fst.outputs.outputToString(scratchArc.output) + " next?=" + scratchArc.flag(4) + " final?=" + scratchArc.isFinal() + " pos=" + in.getPosition());
|
||||
h = PRIME * h + scratchArc.label;
|
||||
h = PRIME * h + scratchArc.target;
|
||||
h = PRIME * h + (int) (scratchArc.target^(scratchArc.target>>32));
|
||||
h = PRIME * h + scratchArc.output.hashCode();
|
||||
h = PRIME * h + scratchArc.nextFinalOutput.hashCode();
|
||||
if (scratchArc.isFinal()) {
|
||||
|
@ -109,26 +114,25 @@ final class NodeHash<T> {
|
|||
return h & Integer.MAX_VALUE;
|
||||
}
|
||||
|
||||
public int add(Builder.UnCompiledNode<T> nodeIn) throws IOException {
|
||||
// System.out.println("hash: add count=" + count + " vs " + table.length);
|
||||
final FST.BytesReader in = fst.getBytesReader(0);
|
||||
public long add(Builder.UnCompiledNode<T> nodeIn) throws IOException {
|
||||
// System.out.println("hash: add count=" + count + " vs " + table.size());
|
||||
final int h = hash(nodeIn);
|
||||
int pos = h & mask;
|
||||
int c = 0;
|
||||
while(true) {
|
||||
final int v = table[pos];
|
||||
final long v = table.get(pos);
|
||||
if (v == 0) {
|
||||
// freeze & add
|
||||
final int node = fst.addNode(nodeIn);
|
||||
final long node = fst.addNode(nodeIn);
|
||||
//System.out.println(" now freeze node=" + node);
|
||||
assert hash(node) == h : "frozenHash=" + hash(node) + " vs h=" + h;
|
||||
count++;
|
||||
table[pos] = node;
|
||||
if (table.length < 2*count) {
|
||||
table.set(pos, node);
|
||||
if (table.size() < 2*count) {
|
||||
rehash();
|
||||
}
|
||||
return node;
|
||||
} else if (nodesEqual(nodeIn, v, in)) {
|
||||
} else if (nodesEqual(nodeIn, v)) {
|
||||
// same node is already here
|
||||
return v;
|
||||
}
|
||||
|
@ -139,12 +143,12 @@ final class NodeHash<T> {
|
|||
}
|
||||
|
||||
// called only by rehash
|
||||
private void addNew(int address) throws IOException {
|
||||
private void addNew(long address) throws IOException {
|
||||
int pos = hash(address) & mask;
|
||||
int c = 0;
|
||||
while(true) {
|
||||
if (table[pos] == 0) {
|
||||
table[pos] = address;
|
||||
if (table.get(pos) == 0) {
|
||||
table.set(pos, address);
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -154,16 +158,16 @@ final class NodeHash<T> {
|
|||
}
|
||||
|
||||
private void rehash() throws IOException {
|
||||
final int[] oldTable = table;
|
||||
final GrowableWriter oldTable = table;
|
||||
|
||||
if (oldTable.length >= Integer.MAX_VALUE/2) {
|
||||
if (oldTable.size() >= Integer.MAX_VALUE/2) {
|
||||
throw new IllegalStateException("FST too large (> 2.1 GB)");
|
||||
}
|
||||
|
||||
table = new int[2*table.length];
|
||||
mask = table.length-1;
|
||||
for(int idx=0;idx<oldTable.length;idx++) {
|
||||
final int address = oldTable[idx];
|
||||
table = new GrowableWriter(oldTable.getBitsPerValue(), 2*oldTable.size(), PackedInts.COMPACT);
|
||||
mask = table.size()-1;
|
||||
for(int idx=0;idx<oldTable.size();idx++) {
|
||||
final long address = oldTable.get(idx);
|
||||
if (address != 0) {
|
||||
addNew(address);
|
||||
}
|
||||
|
|
|
@ -0,0 +1,61 @@
|
|||
package org.apache.lucene.util.fst;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/** Reads in reverse from a single byte[]. */
|
||||
final class ReverseBytesReader extends FST.BytesReader {
|
||||
private final byte[] bytes;
|
||||
private int pos;
|
||||
|
||||
public ReverseBytesReader(byte[] bytes) {
|
||||
this.bytes = bytes;
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte readByte() {
|
||||
return bytes[pos--];
|
||||
}
|
||||
|
||||
@Override
|
||||
public void readBytes(byte[] b, int offset, int len) {
|
||||
for(int i=0;i<len;i++) {
|
||||
b[offset+i] = bytes[pos--];
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void skipBytes(int count) {
|
||||
pos -= count;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getPosition() {
|
||||
return pos;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setPosition(long pos) {
|
||||
this.pos = (int) pos;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean reversed() {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
|
@ -39,7 +39,7 @@ public final class Util {
|
|||
// TODO: would be nice not to alloc this on every lookup
|
||||
final FST.Arc<T> arc = fst.getFirstArc(new FST.Arc<T>());
|
||||
|
||||
final FST.BytesReader fstReader = fst.getBytesReader(0);
|
||||
final BytesReader fstReader = fst.getBytesReader(0);
|
||||
|
||||
// Accumulate output as we go
|
||||
T output = fst.outputs.getNoOutput();
|
||||
|
@ -64,7 +64,7 @@ public final class Util {
|
|||
public static<T> T get(FST<T> fst, BytesRef input) throws IOException {
|
||||
assert fst.inputType == FST.INPUT_TYPE.BYTE1;
|
||||
|
||||
final FST.BytesReader fstReader = fst.getBytesReader(0);
|
||||
final BytesReader fstReader = fst.getBytesReader(0);
|
||||
|
||||
// TODO: would be nice not to alloc this on every lookup
|
||||
final FST.Arc<T> arc = fst.getFirstArc(new FST.Arc<T>());
|
||||
|
@ -101,7 +101,7 @@ public final class Util {
|
|||
* fit this. */
|
||||
public static IntsRef getByOutput(FST<Long> fst, long targetOutput) throws IOException {
|
||||
|
||||
final FST.BytesReader in = fst.getBytesReader(0);
|
||||
final BytesReader in = fst.getBytesReader(0);
|
||||
|
||||
// TODO: would be nice not to alloc this on every lookup
|
||||
FST.Arc<Long> arc = fst.getFirstArc(new FST.Arc<Long>());
|
||||
|
@ -147,8 +147,8 @@ public final class Util {
|
|||
boolean exact = false;
|
||||
while (low <= high) {
|
||||
mid = (low + high) >>> 1;
|
||||
in.pos = arc.posArcsStart;
|
||||
in.skip(arc.bytesPerArc*mid);
|
||||
in.setPosition(arc.posArcsStart);
|
||||
in.skipBytes(arc.bytesPerArc*mid);
|
||||
final byte flags = in.readByte();
|
||||
fst.readLabel(in);
|
||||
final long minArcOutput;
|
||||
|
@ -273,7 +273,7 @@ public final class Util {
|
|||
public static class TopNSearcher<T> {
|
||||
|
||||
private final FST<T> fst;
|
||||
private final FST.BytesReader bytesReader;
|
||||
private final BytesReader bytesReader;
|
||||
private final int topN;
|
||||
private final int maxQueueDepth;
|
||||
|
||||
|
@ -374,7 +374,7 @@ public final class Util {
|
|||
|
||||
//System.out.println("search topN=" + topN);
|
||||
|
||||
final FST.BytesReader fstReader = fst.getBytesReader(0);
|
||||
final BytesReader fstReader = fst.getBytesReader(0);
|
||||
final T NO_OUTPUT = fst.outputs.getNoOutput();
|
||||
|
||||
// TODO: we could enable FST to sorting arcs by weight
|
||||
|
@ -544,7 +544,9 @@ public final class Util {
|
|||
* </pre>
|
||||
*
|
||||
* <p>
|
||||
* Note: larger FSTs (a few thousand nodes) won't even render, don't bother.
|
||||
* Note: larger FSTs (a few thousand nodes) won't even
|
||||
* render, don't bother. If the FST is > 2.1 GB in size
|
||||
* then this method will throw strange exceptions.
|
||||
*
|
||||
* @param sameRank
|
||||
* If <code>true</code>, the resulting <code>dot</code> file will try
|
||||
|
@ -578,7 +580,7 @@ public final class Util {
|
|||
|
||||
// A bitset of already seen states (target offset).
|
||||
final BitSet seen = new BitSet();
|
||||
seen.set(startArc.target);
|
||||
seen.set((int) startArc.target);
|
||||
|
||||
// Shape for states.
|
||||
final String stateShape = "circle";
|
||||
|
@ -595,7 +597,7 @@ public final class Util {
|
|||
emitDotState(out, "initial", "point", "white", "");
|
||||
|
||||
final T NO_OUTPUT = fst.outputs.getNoOutput();
|
||||
final FST.BytesReader r = fst.getBytesReader(0);
|
||||
final BytesReader r = fst.getBytesReader(0);
|
||||
|
||||
// final FST.Arc<T> scratchArc = new FST.Arc<T>();
|
||||
|
||||
|
@ -617,7 +619,7 @@ public final class Util {
|
|||
finalOutput = null;
|
||||
}
|
||||
|
||||
emitDotState(out, Integer.toString(startArc.target), isFinal ? finalStateShape : stateShape, stateColor, finalOutput == null ? "" : fst.outputs.outputToString(finalOutput));
|
||||
emitDotState(out, Long.toString(startArc.target), isFinal ? finalStateShape : stateShape, stateColor, finalOutput == null ? "" : fst.outputs.outputToString(finalOutput));
|
||||
}
|
||||
|
||||
out.write(" initial -> " + startArc.target + "\n");
|
||||
|
@ -638,7 +640,8 @@ public final class Util {
|
|||
if (FST.targetHasArcs(arc)) {
|
||||
// scan all target arcs
|
||||
//System.out.println(" readFirstTarget...");
|
||||
final int node = arc.target;
|
||||
|
||||
final long node = arc.target;
|
||||
|
||||
fst.readFirstRealTargetArc(arc.target, arc, r);
|
||||
|
||||
|
@ -648,7 +651,7 @@ public final class Util {
|
|||
|
||||
//System.out.println(" cycle arc=" + arc);
|
||||
// Emit the unseen state and add it to the queue for the next level.
|
||||
if (arc.target >= 0 && !seen.get(arc.target)) {
|
||||
if (arc.target >= 0 && !seen.get((int) arc.target)) {
|
||||
|
||||
/*
|
||||
boolean isFinal = false;
|
||||
|
@ -675,12 +678,12 @@ public final class Util {
|
|||
finalOutput = "";
|
||||
}
|
||||
|
||||
emitDotState(out, Integer.toString(arc.target), stateShape, stateColor, finalOutput);
|
||||
emitDotState(out, Long.toString(arc.target), stateShape, stateColor, finalOutput);
|
||||
// To see the node address, use this instead:
|
||||
//emitDotState(out, Integer.toString(arc.target), stateShape, stateColor, String.valueOf(arc.target));
|
||||
seen.set(arc.target);
|
||||
seen.set((int) arc.target);
|
||||
nextLevelQueue.add(new FST.Arc<T>().copyFrom(arc));
|
||||
sameLevelStates.add(arc.target);
|
||||
sameLevelStates.add((int) arc.target);
|
||||
}
|
||||
|
||||
String outs;
|
||||
|
@ -893,8 +896,8 @@ public final class Util {
|
|||
// " targetLabel=" + targetLabel);
|
||||
while (low <= high) {
|
||||
mid = (low + high) >>> 1;
|
||||
in.pos = arc.posArcsStart;
|
||||
in.skip(arc.bytesPerArc * mid + 1);
|
||||
in.setPosition(arc.posArcsStart);
|
||||
in.skipBytes(arc.bytesPerArc * mid + 1);
|
||||
final int midLabel = fst.readLabel(in);
|
||||
final int cmp = midLabel - label;
|
||||
// System.out.println(" cycle low=" + low + " high=" + high + " mid=" +
|
||||
|
|
|
@ -99,7 +99,7 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
|
|||
createIndex("index.nocfs", false, false);
|
||||
}
|
||||
*/
|
||||
|
||||
|
||||
/*
|
||||
// These are only needed for the special upgrade test to verify
|
||||
// that also single-segment indexes are correctly upgraded by IndexUpgrader.
|
||||
|
@ -115,8 +115,40 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
|
|||
}
|
||||
|
||||
*/
|
||||
|
||||
/*
|
||||
public void testCreateMoreTermsIndex() throws Exception {
|
||||
// we use a real directory name that is not cleaned up,
|
||||
// because this method is only used to create backwards
|
||||
// indexes:
|
||||
File indexDir = new File("moreterms");
|
||||
_TestUtil.rmDir(indexDir);
|
||||
Directory dir = newFSDirectory(indexDir);
|
||||
|
||||
LogByteSizeMergePolicy mp = new LogByteSizeMergePolicy();
|
||||
mp.setUseCompoundFile(false);
|
||||
mp.setNoCFSRatio(1.0);
|
||||
mp.setMaxCFSSegmentSizeMB(Double.POSITIVE_INFINITY);
|
||||
// TODO: remove randomness
|
||||
IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))
|
||||
.setMergePolicy(mp);
|
||||
conf.setCodec(Codec.forName("Lucene40"));
|
||||
IndexWriter writer = new IndexWriter(dir, conf);
|
||||
LineFileDocs docs = new LineFileDocs(null, true);
|
||||
for(int i=0;i<50;i++) {
|
||||
writer.addDocument(docs.nextDoc());
|
||||
}
|
||||
writer.close();
|
||||
dir.close();
|
||||
|
||||
// Gives you time to copy the index out!: (there is also
|
||||
// a test option to not remove temp dir...):
|
||||
Thread.sleep(100000);
|
||||
}
|
||||
*/
|
||||
|
||||
final static String[] oldNames = {"40.cfs",
|
||||
"40.nocfs",
|
||||
"40.nocfs",
|
||||
};
|
||||
|
||||
final String[] unsupportedNames = {"19.cfs",
|
||||
|
@ -144,7 +176,7 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
|
|||
};
|
||||
|
||||
final static String[] oldSingleSegmentNames = {"40.optimized.cfs",
|
||||
"40.optimized.nocfs",
|
||||
"40.optimized.nocfs",
|
||||
};
|
||||
|
||||
static Map<String,Directory> oldIndexDirs;
|
||||
|
@ -916,4 +948,15 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
|
|||
dir.close();
|
||||
}
|
||||
}
|
||||
|
||||
public static final String moreTermsIndex = "moreterms.40.zip";
|
||||
|
||||
public void testMoreTerms() throws Exception {
|
||||
File oldIndexDir = _TestUtil.getTempDir("moreterms");
|
||||
_TestUtil.unzip(getDataFile(moreTermsIndex), oldIndexDir);
|
||||
Directory dir = newFSDirectory(oldIndexDir);
|
||||
// TODO: more tests
|
||||
_TestUtil.checkIndex(dir);
|
||||
dir.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -22,7 +22,6 @@ import java.util.Random;
|
|||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.DocValues.Source;
|
||||
import org.apache.lucene.index.DocValues.Type;
|
||||
|
@ -31,12 +30,14 @@ import org.apache.lucene.search.TermStatistics;
|
|||
import org.apache.lucene.search.similarities.DefaultSimilarity;
|
||||
import org.apache.lucene.search.similarities.PerFieldSimilarityWrapper;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.search.similarities.Similarity.ExactSimScorer;
|
||||
import org.apache.lucene.search.similarities.Similarity.SimWeight;
|
||||
import org.apache.lucene.search.similarities.Similarity.SloppySimScorer;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LineFileDocs;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
|
||||
/**
|
||||
*
|
||||
|
@ -86,39 +87,6 @@ public class TestCustomNorms extends LuceneTestCase {
|
|||
dir.close();
|
||||
docs.close();
|
||||
}
|
||||
|
||||
public void testPackedNorms() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriterConfig config = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
|
||||
config.setSimilarity(new PackedNormSimilarity());
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, config);
|
||||
int num = _TestUtil.nextInt(random(), 1, 1000);
|
||||
for (int i = 0; i < num; i++) {
|
||||
Document doc = new Document();
|
||||
doc.add(new StringField("len", Integer.toString(i), Field.Store.YES));
|
||||
StringBuilder sb = new StringBuilder();
|
||||
for (int j = 0; j < i; j++) {
|
||||
sb.append(" token");
|
||||
}
|
||||
doc.add(new TextField("content", sb.toString(), Field.Store.NO));
|
||||
writer.addDocument(doc);
|
||||
}
|
||||
|
||||
DirectoryReader ir = writer.getReader();
|
||||
writer.close();
|
||||
for (AtomicReaderContext context : ir.leaves()) {
|
||||
AtomicReader reader = context.reader();
|
||||
DocValues norms = reader.normValues("content");
|
||||
assertNotNull(norms);
|
||||
Source source = norms.getSource();
|
||||
assertEquals(Type.VAR_INTS, source.getType());
|
||||
for (int i = 0; i < reader.maxDoc(); i++) {
|
||||
assertEquals(source.getInt(i), Long.parseLong(reader.document(i).get("len")));
|
||||
}
|
||||
}
|
||||
ir.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testExceptionOnRandomType() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
|
@ -334,28 +302,5 @@ public class TestCustomNorms extends LuceneTestCase {
|
|||
throw new UnsupportedOperationException();
|
||||
}
|
||||
}
|
||||
|
||||
class PackedNormSimilarity extends Similarity {
|
||||
|
||||
@Override
|
||||
public void computeNorm(FieldInvertState state, Norm norm) {
|
||||
norm.setPackedLong(state.getLength());
|
||||
}
|
||||
|
||||
@Override
|
||||
public SimWeight computeWeight(float queryBoost, CollectionStatistics collectionStats, TermStatistics... termStats) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public ExactSimScorer exactSimScorer(SimWeight weight, AtomicReaderContext context) throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public SloppySimScorer sloppySimScorer(SimWeight weight, AtomicReaderContext context) throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Binary file not shown.
|
@ -0,0 +1,261 @@
|
|||
package org.apache.lucene.util.fst;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Random;
|
||||
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IntsRef;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.TimeUnits;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
import org.junit.Ignore;
|
||||
import com.carrotsearch.randomizedtesting.annotations.TimeoutSuite;
|
||||
|
||||
@Ignore("Requires tons of heap to run (10G works)")
|
||||
@TimeoutSuite(millis = 100 * TimeUnits.HOUR)
|
||||
public class Test2BFST extends LuceneTestCase {
|
||||
|
||||
private static long LIMIT = 3L*1024*1024*1024;
|
||||
|
||||
public void test() throws Exception {
|
||||
int[] ints = new int[7];
|
||||
IntsRef input = new IntsRef(ints, 0, ints.length);
|
||||
long seed = random().nextLong();
|
||||
|
||||
for(int doPackIter=0;doPackIter<2;doPackIter++) {
|
||||
boolean doPack = doPackIter == 1;
|
||||
|
||||
// Build FST w/ NoOutputs and stop when nodeCount > 3B
|
||||
if (!doPack) {
|
||||
System.out.println("\nTEST: 3B nodes; doPack=false output=NO_OUTPUTS");
|
||||
Outputs<Object> outputs = NoOutputs.getSingleton();
|
||||
Object NO_OUTPUT = outputs.getNoOutput();
|
||||
final Builder<Object> b = new Builder<Object>(FST.INPUT_TYPE.BYTE1, 0, 0, false, false, Integer.MAX_VALUE, outputs,
|
||||
null, doPack, PackedInts.COMPACT, true, 15);
|
||||
|
||||
int count = 0;
|
||||
Random r = new Random(seed);
|
||||
int[] ints2 = new int[200];
|
||||
IntsRef input2 = new IntsRef(ints2, 0, ints2.length);
|
||||
while(true) {
|
||||
//System.out.println("add: " + input + " -> " + output);
|
||||
for(int i=10;i<ints2.length;i++) {
|
||||
ints2[i] = r.nextInt(256);
|
||||
}
|
||||
b.add(input2, NO_OUTPUT);
|
||||
count++;
|
||||
if (count % 100000 == 0) {
|
||||
System.out.println(count + ": " + b.fstSizeInBytes() + " bytes; " + b.getTotStateCount() + " nodes");
|
||||
}
|
||||
if (b.getTotStateCount() > LIMIT) {
|
||||
break;
|
||||
}
|
||||
nextInput(r, ints2);
|
||||
}
|
||||
|
||||
FST<Object> fst = b.finish();
|
||||
|
||||
System.out.println("\nTEST: now verify [fst size=" + fst.sizeInBytes() + "; nodeCount=" + fst.getNodeCount() + "; arcCount=" + fst.getArcCount() + "]");
|
||||
|
||||
Arrays.fill(ints2, 0);
|
||||
r = new Random(seed);
|
||||
|
||||
for(int i=0;i<count;i++) {
|
||||
if (i % 1000000 == 0) {
|
||||
System.out.println(i + "...: ");
|
||||
}
|
||||
for(int j=10;j<ints2.length;j++) {
|
||||
ints2[j] = r.nextInt(256);
|
||||
}
|
||||
assertEquals(NO_OUTPUT, Util.get(fst, input2));
|
||||
nextInput(r, ints2);
|
||||
}
|
||||
|
||||
System.out.println("\nTEST: enum all input/outputs");
|
||||
IntsRefFSTEnum<Object> fstEnum = new IntsRefFSTEnum<Object>(fst);
|
||||
|
||||
Arrays.fill(ints2, 0);
|
||||
r = new Random(seed);
|
||||
int upto = 0;
|
||||
while(true) {
|
||||
IntsRefFSTEnum.InputOutput<Object> pair = fstEnum.next();
|
||||
if (pair == null) {
|
||||
break;
|
||||
}
|
||||
for(int j=10;j<ints2.length;j++) {
|
||||
ints2[j] = r.nextInt(256);
|
||||
}
|
||||
assertEquals(input2, pair.input);
|
||||
assertEquals(NO_OUTPUT, pair.output);
|
||||
upto++;
|
||||
nextInput(r, ints2);
|
||||
}
|
||||
assertEquals(count, upto);
|
||||
}
|
||||
|
||||
// Build FST w/ ByteSequenceOutputs and stop when FST
|
||||
// size = 3GB
|
||||
{
|
||||
System.out.println("\nTEST: 3 GB size; doPack=" + doPack + " outputs=bytes");
|
||||
Outputs<BytesRef> outputs = ByteSequenceOutputs.getSingleton();
|
||||
final Builder<BytesRef> b = new Builder<BytesRef>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs,
|
||||
null, doPack, PackedInts.COMPACT, true, 15);
|
||||
|
||||
byte[] outputBytes = new byte[20];
|
||||
BytesRef output = new BytesRef(outputBytes);
|
||||
Arrays.fill(ints, 0);
|
||||
int count = 0;
|
||||
Random r = new Random(seed);
|
||||
while(true) {
|
||||
r.nextBytes(outputBytes);
|
||||
//System.out.println("add: " + input + " -> " + output);
|
||||
b.add(input, BytesRef.deepCopyOf(output));
|
||||
count++;
|
||||
if (count % 1000000 == 0) {
|
||||
System.out.println(count + "...: " + b.fstSizeInBytes() + " bytes");
|
||||
}
|
||||
if (b.fstSizeInBytes() > LIMIT) {
|
||||
break;
|
||||
}
|
||||
nextInput(r, ints);
|
||||
}
|
||||
|
||||
FST<BytesRef> fst = b.finish();
|
||||
|
||||
System.out.println("\nTEST: now verify [fst size=" + fst.sizeInBytes() + "; nodeCount=" + fst.getNodeCount() + "; arcCount=" + fst.getArcCount() + "]");
|
||||
|
||||
r = new Random(seed);
|
||||
Arrays.fill(ints, 0);
|
||||
|
||||
for(int i=0;i<count;i++) {
|
||||
if (i % 1000000 == 0) {
|
||||
System.out.println(i + "...: ");
|
||||
}
|
||||
r.nextBytes(outputBytes);
|
||||
assertEquals(output, Util.get(fst, input));
|
||||
nextInput(r, ints);
|
||||
}
|
||||
|
||||
System.out.println("\nTEST: enum all input/outputs");
|
||||
IntsRefFSTEnum<BytesRef> fstEnum = new IntsRefFSTEnum<BytesRef>(fst);
|
||||
|
||||
Arrays.fill(ints, 0);
|
||||
r = new Random(seed);
|
||||
int upto = 0;
|
||||
while(true) {
|
||||
IntsRefFSTEnum.InputOutput<BytesRef> pair = fstEnum.next();
|
||||
if (pair == null) {
|
||||
break;
|
||||
}
|
||||
assertEquals(input, pair.input);
|
||||
r.nextBytes(outputBytes);
|
||||
assertEquals(output, pair.output);
|
||||
upto++;
|
||||
nextInput(r, ints);
|
||||
}
|
||||
assertEquals(count, upto);
|
||||
}
|
||||
|
||||
// Build FST w/ PositiveIntOutputs and stop when FST
|
||||
// size = 3GB
|
||||
{
|
||||
System.out.println("\nTEST: 3 GB size; doPack=" + doPack + " outputs=long");
|
||||
Outputs<Long> outputs = PositiveIntOutputs.getSingleton();
|
||||
final Builder<Long> b = new Builder<Long>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs,
|
||||
null, doPack, PackedInts.COMPACT, true, 15);
|
||||
|
||||
long output = 1;
|
||||
|
||||
Arrays.fill(ints, 0);
|
||||
int count = 0;
|
||||
Random r = new Random(seed);
|
||||
while(true) {
|
||||
//System.out.println("add: " + input + " -> " + output);
|
||||
b.add(input, output);
|
||||
output += 1+r.nextInt(10);
|
||||
count++;
|
||||
if (count % 1000000 == 0) {
|
||||
System.out.println(count + "...: " + b.fstSizeInBytes() + " bytes");
|
||||
}
|
||||
if (b.fstSizeInBytes() > LIMIT) {
|
||||
break;
|
||||
}
|
||||
nextInput(r, ints);
|
||||
}
|
||||
|
||||
FST<Long> fst = b.finish();
|
||||
|
||||
System.out.println("\nTEST: now verify [fst size=" + fst.sizeInBytes() + "; nodeCount=" + fst.getNodeCount() + "; arcCount=" + fst.getArcCount() + "]");
|
||||
|
||||
Arrays.fill(ints, 0);
|
||||
|
||||
output = 1;
|
||||
r = new Random(seed);
|
||||
for(int i=0;i<count;i++) {
|
||||
if (i % 1000000 == 0) {
|
||||
System.out.println(i + "...: ");
|
||||
}
|
||||
|
||||
// forward lookup:
|
||||
assertEquals(output, Util.get(fst, input).longValue());
|
||||
// reverse lookup:
|
||||
assertEquals(input, Util.getByOutput(fst, output));
|
||||
output += 1 + r.nextInt(10);
|
||||
nextInput(r, ints);
|
||||
}
|
||||
|
||||
System.out.println("\nTEST: enum all input/outputs");
|
||||
IntsRefFSTEnum<Long> fstEnum = new IntsRefFSTEnum<Long>(fst);
|
||||
|
||||
Arrays.fill(ints, 0);
|
||||
r = new Random(seed);
|
||||
int upto = 0;
|
||||
output = 1;
|
||||
while(true) {
|
||||
IntsRefFSTEnum.InputOutput<Long> pair = fstEnum.next();
|
||||
if (pair == null) {
|
||||
break;
|
||||
}
|
||||
assertEquals(input, pair.input);
|
||||
assertEquals(output, pair.output.longValue());
|
||||
output += 1 + r.nextInt(10);
|
||||
upto++;
|
||||
nextInput(r, ints);
|
||||
}
|
||||
assertEquals(count, upto);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void nextInput(Random r, int[] ints) {
|
||||
int downTo = 6;
|
||||
while(downTo >= 0) {
|
||||
// Must add random amounts (and not just 1) because
|
||||
// otherwise FST outsmarts us and remains tiny:
|
||||
ints[downTo] += 1+r.nextInt(10);
|
||||
if (ints[downTo] < 256) {
|
||||
break;
|
||||
} else {
|
||||
ints[downTo] = 0;
|
||||
downTo--;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,360 @@
|
|||
package org.apache.lucene.util.fst;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
|
||||
public class TestBytesStore extends LuceneTestCase {
|
||||
|
||||
public void testRandom() throws Exception {
|
||||
|
||||
final int iters = atLeast(10);
|
||||
for(int iter=0;iter<iters;iter++) {
|
||||
final int numBytes = _TestUtil.nextInt(random(), 1, 200000);
|
||||
final byte[] expected = new byte[numBytes];
|
||||
final int blockBits = _TestUtil.nextInt(random(), 8, 15);
|
||||
final BytesStore bytes = new BytesStore(blockBits);
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: iter=" + iter + " numBytes=" + numBytes + " blockBits=" + blockBits);
|
||||
}
|
||||
|
||||
int pos = 0;
|
||||
while(pos < numBytes) {
|
||||
int op = random().nextInt(8);
|
||||
if (VERBOSE) {
|
||||
System.out.println(" cycle pos=" + pos);
|
||||
}
|
||||
switch(op) {
|
||||
|
||||
case 0:
|
||||
{
|
||||
// write random byte
|
||||
byte b = (byte) random().nextInt(256);
|
||||
if (VERBOSE) {
|
||||
System.out.println(" writeByte b=" + b);
|
||||
}
|
||||
|
||||
expected[pos++] = b;
|
||||
bytes.writeByte(b);
|
||||
}
|
||||
break;
|
||||
|
||||
case 1:
|
||||
{
|
||||
// write random byte[]
|
||||
int len = random().nextInt(Math.min(numBytes - pos, 100));
|
||||
byte[] temp = new byte[len];
|
||||
random().nextBytes(temp);
|
||||
if (VERBOSE) {
|
||||
System.out.println(" writeBytes len=" + len + " bytes=" + Arrays.toString(temp));
|
||||
}
|
||||
System.arraycopy(temp, 0, expected, pos, temp.length);
|
||||
bytes.writeBytes(temp, 0, temp.length);
|
||||
pos += len;
|
||||
}
|
||||
break;
|
||||
|
||||
case 2:
|
||||
{
|
||||
// write int @ absolute pos
|
||||
if (pos > 4) {
|
||||
int x = random().nextInt();
|
||||
int randomPos = random().nextInt(pos-4);
|
||||
if (VERBOSE) {
|
||||
System.out.println(" abs writeInt pos=" + randomPos + " x=" + x);
|
||||
}
|
||||
bytes.writeInt(randomPos, x);
|
||||
expected[randomPos++] = (byte) (x >> 24);
|
||||
expected[randomPos++] = (byte) (x >> 16);
|
||||
expected[randomPos++] = (byte) (x >> 8);
|
||||
expected[randomPos++] = (byte) x;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case 3:
|
||||
{
|
||||
// reverse bytes
|
||||
if (pos > 1) {
|
||||
int len = _TestUtil.nextInt(random(), 2, Math.min(100, pos));
|
||||
int start;
|
||||
if (len == pos) {
|
||||
start = 0;
|
||||
} else {
|
||||
start = random().nextInt(pos - len);
|
||||
}
|
||||
int end = start + len - 1;
|
||||
if (VERBOSE) {
|
||||
System.out.println(" reverse start=" + start + " end=" + end + " len=" + len + " pos=" + pos);
|
||||
}
|
||||
bytes.reverse(start, end);
|
||||
|
||||
while(start <= end) {
|
||||
byte b = expected[end];
|
||||
expected[end] = expected[start];
|
||||
expected[start] = b;
|
||||
start++;
|
||||
end--;
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case 4:
|
||||
{
|
||||
// abs write random byte[]
|
||||
if (pos > 2) {
|
||||
int randomPos = random().nextInt(pos-1);
|
||||
int len = _TestUtil.nextInt(random(), 1, Math.min(pos - randomPos - 1, 100));
|
||||
byte[] temp = new byte[len];
|
||||
random().nextBytes(temp);
|
||||
if (VERBOSE) {
|
||||
System.out.println(" abs writeBytes pos=" + randomPos + " len=" + len + " bytes=" + Arrays.toString(temp));
|
||||
}
|
||||
System.arraycopy(temp, 0, expected, randomPos, temp.length);
|
||||
bytes.writeBytes(randomPos, temp, 0, temp.length);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case 5:
|
||||
{
|
||||
// copyBytes
|
||||
if (pos > 1) {
|
||||
int src = random().nextInt(pos-1);
|
||||
int dest = _TestUtil.nextInt(random(), src+1, pos-1);
|
||||
int len = _TestUtil.nextInt(random(), 1, Math.min(300, pos - dest));
|
||||
if (VERBOSE) {
|
||||
System.out.println(" copyBytes src=" + src + " dest=" + dest + " len=" + len);
|
||||
}
|
||||
System.arraycopy(expected, src, expected, dest, len);
|
||||
bytes.copyBytes(src, dest, len);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case 6:
|
||||
{
|
||||
// skip
|
||||
int len = random().nextInt(Math.min(100, numBytes - pos));
|
||||
|
||||
if (VERBOSE) {
|
||||
System.out.println(" skip len=" + len);
|
||||
}
|
||||
|
||||
pos += len;
|
||||
bytes.skipBytes(len);
|
||||
|
||||
// NOTE: must fill in zeros in case truncate was
|
||||
// used, else we get false fails:
|
||||
if (len > 0) {
|
||||
byte[] zeros = new byte[len];
|
||||
bytes.writeBytes(pos-len, zeros, 0, len);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case 7:
|
||||
{
|
||||
// absWriteByte
|
||||
if (pos > 0) {
|
||||
int dest = random().nextInt(pos);
|
||||
byte b = (byte) random().nextInt(256);
|
||||
expected[dest] = b;
|
||||
bytes.writeByte(dest, b);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
assertEquals(pos, bytes.getPosition());
|
||||
|
||||
if (pos > 0 && random().nextInt(50) == 17) {
|
||||
// truncate
|
||||
int len = _TestUtil.nextInt(random(), 1, Math.min(pos, 100));
|
||||
bytes.truncate(pos - len);
|
||||
pos -= len;
|
||||
Arrays.fill(expected, pos, pos+len, (byte) 0);
|
||||
if (VERBOSE) {
|
||||
System.out.println(" truncate len=" + len + " newPos=" + pos);
|
||||
}
|
||||
}
|
||||
|
||||
if ((pos > 0 && random().nextInt(200) == 17)) {
|
||||
verify(bytes, expected, pos);
|
||||
}
|
||||
}
|
||||
|
||||
BytesStore bytesToVerify;
|
||||
|
||||
if (random().nextBoolean()) {
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: save/load final bytes");
|
||||
}
|
||||
Directory dir = newDirectory();
|
||||
IndexOutput out = dir.createOutput("bytes", IOContext.DEFAULT);
|
||||
bytes.writeTo(out);
|
||||
out.close();
|
||||
IndexInput in = dir.openInput("bytes", IOContext.DEFAULT);
|
||||
bytesToVerify = new BytesStore(in, numBytes, _TestUtil.nextInt(random(), 256, Integer.MAX_VALUE));
|
||||
in.close();
|
||||
dir.close();
|
||||
} else {
|
||||
bytesToVerify = bytes;
|
||||
}
|
||||
|
||||
verify(bytesToVerify, expected, numBytes);
|
||||
}
|
||||
}
|
||||
|
||||
private void verify(BytesStore bytes, byte[] expected, int totalLength) throws Exception {
|
||||
assertEquals(totalLength, bytes.getPosition());
|
||||
if (totalLength == 0) {
|
||||
return;
|
||||
}
|
||||
if (VERBOSE) {
|
||||
System.out.println(" verify...");
|
||||
}
|
||||
|
||||
// First verify whole thing in one blast:
|
||||
byte[] actual = new byte[totalLength];
|
||||
if (random().nextBoolean()) {
|
||||
if (VERBOSE) {
|
||||
System.out.println(" bulk: reversed");
|
||||
}
|
||||
// reversed
|
||||
FST.BytesReader r = bytes.getReverseReader();
|
||||
assertTrue(r.reversed());
|
||||
r.setPosition(totalLength-1);
|
||||
r.readBytes(actual, 0, actual.length);
|
||||
int start = 0;
|
||||
int end = totalLength - 1;
|
||||
while(start < end) {
|
||||
byte b = actual[start];
|
||||
actual[start] = actual[end];
|
||||
actual[end] = b;
|
||||
start++;
|
||||
end--;
|
||||
}
|
||||
} else {
|
||||
// forward
|
||||
if (VERBOSE) {
|
||||
System.out.println(" bulk: forward");
|
||||
}
|
||||
FST.BytesReader r = bytes.getForwardReader();
|
||||
assertFalse(r.reversed());
|
||||
r.readBytes(actual, 0, actual.length);
|
||||
}
|
||||
|
||||
for(int i=0;i<totalLength;i++) {
|
||||
assertEquals("byte @ index=" + i, expected[i], actual[i]);
|
||||
}
|
||||
|
||||
FST.BytesReader r;
|
||||
|
||||
// Then verify ops:
|
||||
boolean reversed = random().nextBoolean();
|
||||
if (reversed) {
|
||||
if (VERBOSE) {
|
||||
System.out.println(" ops: reversed");
|
||||
}
|
||||
r = bytes.getReverseReader();
|
||||
} else {
|
||||
if (VERBOSE) {
|
||||
System.out.println(" ops: forward");
|
||||
}
|
||||
r = bytes.getForwardReader();
|
||||
}
|
||||
|
||||
if (totalLength > 1) {
|
||||
int numOps = _TestUtil.nextInt(random(), 100, 200);
|
||||
for(int op=0;op<numOps;op++) {
|
||||
|
||||
int numBytes = random().nextInt(Math.min(1000, totalLength-1));
|
||||
int pos;
|
||||
if (reversed) {
|
||||
pos = _TestUtil.nextInt(random(), numBytes, totalLength-1);
|
||||
} else {
|
||||
pos = random().nextInt(totalLength-numBytes);
|
||||
}
|
||||
if (VERBOSE) {
|
||||
System.out.println(" op iter=" + op + " reversed=" + reversed + " numBytes=" + numBytes + " pos=" + pos);
|
||||
}
|
||||
byte[] temp = new byte[numBytes];
|
||||
r.setPosition(pos);
|
||||
assertEquals(pos, r.getPosition());
|
||||
r.readBytes(temp, 0, temp.length);
|
||||
for(int i=0;i<numBytes;i++) {
|
||||
byte expectedByte;
|
||||
if (reversed) {
|
||||
expectedByte = expected[pos - i];
|
||||
} else {
|
||||
expectedByte = expected[pos + i];
|
||||
}
|
||||
assertEquals("byte @ index=" + i, expectedByte, temp[i]);
|
||||
}
|
||||
|
||||
int left;
|
||||
int expectedPos;
|
||||
|
||||
if (reversed) {
|
||||
expectedPos = pos-numBytes;
|
||||
left = (int) r.getPosition();
|
||||
} else {
|
||||
expectedPos = pos+numBytes;
|
||||
left = (int) (totalLength - r.getPosition());
|
||||
}
|
||||
assertEquals(expectedPos, r.getPosition());
|
||||
|
||||
if (left > 4) {
|
||||
int skipBytes = random().nextInt(left-4);
|
||||
|
||||
int expectedInt = 0;
|
||||
if (reversed) {
|
||||
expectedPos -= skipBytes;
|
||||
expectedInt |= (expected[expectedPos--]&0xFF)<<24;
|
||||
expectedInt |= (expected[expectedPos--]&0xFF)<<16;
|
||||
expectedInt |= (expected[expectedPos--]&0xFF)<<8;
|
||||
expectedInt |= (expected[expectedPos--]&0xFF);
|
||||
} else {
|
||||
expectedPos += skipBytes;
|
||||
expectedInt |= (expected[expectedPos++]&0xFF)<<24;
|
||||
expectedInt |= (expected[expectedPos++]&0xFF)<<16;
|
||||
expectedInt |= (expected[expectedPos++]&0xFF)<<8;
|
||||
expectedInt |= (expected[expectedPos++]&0xFF);
|
||||
}
|
||||
|
||||
if (VERBOSE) {
|
||||
System.out.println(" skip numBytes=" + skipBytes);
|
||||
System.out.println(" readInt");
|
||||
}
|
||||
|
||||
r.skipBytes(skipBytes);
|
||||
assertEquals(expectedInt, r.readInt());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -310,7 +310,7 @@ public class TestFSTs extends LuceneTestCase {
|
|||
|
||||
final boolean doRewrite = random().nextBoolean();
|
||||
|
||||
Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, null, doRewrite, true);
|
||||
Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, null, doRewrite, PackedInts.DEFAULT, true, 15);
|
||||
|
||||
boolean storeOrd = random().nextBoolean();
|
||||
if (VERBOSE) {
|
||||
|
@ -453,7 +453,7 @@ public class TestFSTs extends LuceneTestCase {
|
|||
this.outputs = outputs;
|
||||
this.doPack = doPack;
|
||||
|
||||
builder = new Builder<T>(inputMode == 0 ? FST.INPUT_TYPE.BYTE1 : FST.INPUT_TYPE.BYTE4, 0, prune, prune == 0, true, Integer.MAX_VALUE, outputs, null, doPack, !noArcArrays);
|
||||
builder = new Builder<T>(inputMode == 0 ? FST.INPUT_TYPE.BYTE1 : FST.INPUT_TYPE.BYTE4, 0, prune, prune == 0, true, Integer.MAX_VALUE, outputs, null, doPack, PackedInts.DEFAULT, !noArcArrays, 15);
|
||||
}
|
||||
|
||||
protected abstract T getOutput(IntsRef input, int ord) throws IOException;
|
||||
|
@ -484,8 +484,13 @@ public class TestFSTs extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
long tMid = System.currentTimeMillis();
|
||||
System.out.println(((tMid-tStart) / 1000.0) + " sec to add all terms");
|
||||
|
||||
assert builder.getTermCount() == ord;
|
||||
FST<T> fst = builder.finish();
|
||||
long tEnd = System.currentTimeMillis();
|
||||
System.out.println(((tEnd-tMid) / 1000.0) + " sec to finish/pack");
|
||||
if (fst == null) {
|
||||
System.out.println("FST was fully pruned!");
|
||||
System.exit(0);
|
||||
|
@ -513,6 +518,12 @@ public class TestFSTs extends LuceneTestCase {
|
|||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
IndexInput in = dir.openInput("fst.bin", IOContext.DEFAULT);
|
||||
fst = new FST<T>(in, outputs);
|
||||
in.close();
|
||||
*/
|
||||
|
||||
System.out.println("\nNow verify...");
|
||||
|
||||
while(true) {
|
||||
|
@ -576,7 +587,7 @@ public class TestFSTs extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
// java -cp build/classes/test:build/classes/test-framework:build/classes/java:lib/junit-4.10.jar org.apache.lucene.util.fst.TestFSTs /x/tmp/allTerms3.txt out
|
||||
// java -cp ../build/codecs/classes/java:../test-framework/lib/randomizedtesting-runner-2.0.8.jar:../build/core/classes/test:../build/core/classes/test-framework:../build/core/classes/java:../build/test-framework/classes/java:../test-framework/lib/junit-4.10.jar org.apache.lucene.util.fst.TestFSTs /xold/tmp/allTerms3.txt out
|
||||
public static void main(String[] args) throws IOException {
|
||||
int prune = 0;
|
||||
int limit = Integer.MAX_VALUE;
|
||||
|
@ -1022,7 +1033,7 @@ public class TestFSTs extends LuceneTestCase {
|
|||
throws IOException {
|
||||
if (FST.targetHasArcs(arc)) {
|
||||
int childCount = 0;
|
||||
FST.BytesReader fstReader = fst.getBytesReader(0);
|
||||
BytesReader fstReader = fst.getBytesReader(0);
|
||||
for (arc = fst.readFirstTargetArc(arc, arc, fstReader);;
|
||||
arc = fst.readNextArc(arc, fstReader), childCount++)
|
||||
{
|
||||
|
@ -1062,7 +1073,7 @@ public class TestFSTs extends LuceneTestCase {
|
|||
public void testFinalOutputOnEndState() throws Exception {
|
||||
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
|
||||
|
||||
final Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE4, 2, 0, true, true, Integer.MAX_VALUE, outputs, null, random().nextBoolean(), true);
|
||||
final Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE4, 2, 0, true, true, Integer.MAX_VALUE, outputs, null, random().nextBoolean(), PackedInts.DEFAULT, true, 15);
|
||||
builder.add(Util.toUTF32("stat", new IntsRef()), 17L);
|
||||
builder.add(Util.toUTF32("station", new IntsRef()), 10L);
|
||||
final FST<Long> fst = builder.finish();
|
||||
|
@ -1077,7 +1088,7 @@ public class TestFSTs extends LuceneTestCase {
|
|||
public void testInternalFinalState() throws Exception {
|
||||
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
|
||||
final boolean willRewrite = random().nextBoolean();
|
||||
final Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, null, willRewrite, true);
|
||||
final Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, null, willRewrite, PackedInts.DEFAULT, true, 15);
|
||||
builder.add(Util.toIntsRef(new BytesRef("stat"), new IntsRef()), outputs.getNoOutput());
|
||||
builder.add(Util.toIntsRef(new BytesRef("station"), new IntsRef()), outputs.getNoOutput());
|
||||
final FST<Long> fst = builder.finish();
|
||||
|
@ -1100,7 +1111,7 @@ public class TestFSTs extends LuceneTestCase {
|
|||
final Long nothing = outputs.getNoOutput();
|
||||
final Builder<Long> b = new Builder<Long>(FST.INPUT_TYPE.BYTE1, outputs);
|
||||
|
||||
final FST<Long> fst = new FST<Long>(FST.INPUT_TYPE.BYTE1, outputs, false, PackedInts.COMPACT, true);
|
||||
final FST<Long> fst = new FST<Long>(FST.INPUT_TYPE.BYTE1, outputs, false, PackedInts.COMPACT, true, 15);
|
||||
|
||||
final Builder.UnCompiledNode<Long> rootNode = new Builder.UnCompiledNode<Long>(b, 0);
|
||||
|
||||
|
|
|
@ -46,7 +46,7 @@ public class SearchFiles {
|
|||
/** Simple command-line based search demo. */
|
||||
public static void main(String[] args) throws Exception {
|
||||
String usage =
|
||||
"Usage:\tjava org.apache.lucene.demo.SearchFiles [-index dir] [-field f] [-repeat n] [-queries file] [-query string] [-raw] [-paging hitsPerPage]\n\nSee http://lucene.apache.org/java/4_0/demo.html for details.";
|
||||
"Usage:\tjava org.apache.lucene.demo.SearchFiles [-index dir] [-field f] [-repeat n] [-queries file] [-query string] [-raw] [-paging hitsPerPage]\n\nSee http://lucene.apache.org/core/4_1_0/demo/ for details.";
|
||||
if (args.length > 0 && ("-h".equals(args[0]) || "-help".equals(args[0]))) {
|
||||
System.out.println(usage);
|
||||
System.exit(0);
|
||||
|
|
|
@ -3,7 +3,7 @@ package org.apache.lucene.facet.associations;
|
|||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.facet.search.PayloadIterator;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.store.ByteArrayDataInput;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
@ -46,12 +46,21 @@ public abstract class AssociationsPayloadIterator<T extends CategoryAssociation>
|
|||
* It is assumed that all association values can be deserialized with the
|
||||
* given {@link CategoryAssociation}.
|
||||
*/
|
||||
public AssociationsPayloadIterator(IndexReader reader, String field, T association) throws IOException {
|
||||
pi = new PayloadIterator(reader, new Term(field, association.getCategoryListID()));
|
||||
hasAssociations = pi.init();
|
||||
public AssociationsPayloadIterator(String field, T association) throws IOException {
|
||||
pi = new PayloadIterator(new Term(field, association.getCategoryListID()));
|
||||
this.association = association;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the {@link AtomicReaderContext} for which {@link #setNextDoc(int)}
|
||||
* calls will be made. Returns true iff this reader has associations for any
|
||||
* of the documents belonging to the association given to the constructor.
|
||||
*/
|
||||
public final boolean setNextReader(AtomicReaderContext context) throws IOException {
|
||||
hasAssociations = pi.setNextReader(context);
|
||||
return hasAssociations;
|
||||
}
|
||||
|
||||
/**
|
||||
* Skip to the requested document. Returns true iff the document has category
|
||||
* association values and they were read successfully. Associations are
|
||||
|
|
|
@ -2,7 +2,6 @@ package org.apache.lucene.facet.associations;
|
|||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.util.collections.IntToFloatMap;
|
||||
|
||||
/*
|
||||
|
@ -31,9 +30,8 @@ public class FloatAssociationsPayloadIterator extends AssociationsPayloadIterato
|
|||
|
||||
private final IntToFloatMap ordinalAssociations = new IntToFloatMap();
|
||||
|
||||
public FloatAssociationsPayloadIterator(IndexReader reader, String field, CategoryFloatAssociation association)
|
||||
throws IOException {
|
||||
super(reader, field, association);
|
||||
public FloatAssociationsPayloadIterator(String field, CategoryFloatAssociation association) throws IOException {
|
||||
super(field, association);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -2,7 +2,6 @@ package org.apache.lucene.facet.associations;
|
|||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.util.collections.IntToIntMap;
|
||||
|
||||
/*
|
||||
|
@ -31,9 +30,8 @@ public class IntAssociationsPayloadIterator extends AssociationsPayloadIterator<
|
|||
|
||||
private final IntToIntMap ordinalAssociations = new IntToIntMap();
|
||||
|
||||
public IntAssociationsPayloadIterator(IndexReader reader, String field, CategoryIntAssociation association)
|
||||
throws IOException {
|
||||
super(reader, field, association);
|
||||
public IntAssociationsPayloadIterator(String field, CategoryIntAssociation association) throws IOException {
|
||||
super(field, association);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -3,13 +3,10 @@ package org.apache.lucene.facet.index.params;
|
|||
import java.io.IOException;
|
||||
import java.io.Serializable;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.Term;
|
||||
|
||||
import org.apache.lucene.facet.search.CategoryListIterator;
|
||||
import org.apache.lucene.facet.search.PayloadCategoryListIteraor;
|
||||
import org.apache.lucene.facet.search.TotalFacetCounts;
|
||||
import org.apache.lucene.facet.util.PartitionsUtils;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.util.encoding.DGapIntEncoder;
|
||||
import org.apache.lucene.util.encoding.IntDecoder;
|
||||
import org.apache.lucene.util.encoding.IntEncoder;
|
||||
|
@ -98,11 +95,6 @@ public class CategoryListParams implements Serializable {
|
|||
return new SortingIntEncoder(new UniqueValuesIntEncoder(new DGapIntEncoder(new VInt8IntEncoder())));
|
||||
}
|
||||
|
||||
/**
|
||||
* Equality is defined by the 'term' that defines this category list.
|
||||
* Sub-classes should override this method if a more complex calculation
|
||||
* is needed to ensure equality.
|
||||
*/
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (o == this) {
|
||||
|
@ -121,29 +113,16 @@ public class CategoryListParams implements Serializable {
|
|||
return this.term.equals(other.term);
|
||||
}
|
||||
|
||||
/**
|
||||
* Hashcode is similar to {@link #equals(Object)}, in that it uses
|
||||
* the term that defines this category list to derive the hashcode.
|
||||
* Subclasses need to ensure that equality/hashcode is correctly defined,
|
||||
* or there could be side-effects in the {@link TotalFacetCounts} caching
|
||||
* mechanism (as the filename for a Total Facet Counts array cache
|
||||
* is dependent on the hashCode, so it should consistently return the same
|
||||
* hash for identity).
|
||||
*/
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return this.hashCode;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create the category list iterator for the specified partition.
|
||||
*/
|
||||
public CategoryListIterator createCategoryListIterator(IndexReader reader,
|
||||
int partition) throws IOException {
|
||||
/** Create the {@link CategoryListIterator} for the specified partition. */
|
||||
public CategoryListIterator createCategoryListIterator(int partition) throws IOException {
|
||||
String categoryListTermStr = PartitionsUtils.partitionName(this, partition);
|
||||
Term payloadTerm = new Term(term.field(), categoryListTermStr);
|
||||
return new PayloadCategoryListIteraor(reader, payloadTerm,
|
||||
createEncoder().createMatchingDecoder());
|
||||
return new PayloadCategoryListIteraor(payloadTerm, createEncoder().createMatchingDecoder());
|
||||
}
|
||||
|
||||
}
|
|
@ -50,7 +50,7 @@ public final class AdaptiveFacetsAccumulator extends StandardFacetsAccumulator {
|
|||
* Create an {@link AdaptiveFacetsAccumulator}
|
||||
* @see StandardFacetsAccumulator#StandardFacetsAccumulator(FacetSearchParams, IndexReader, TaxonomyReader)
|
||||
*/
|
||||
public AdaptiveFacetsAccumulator(FacetSearchParams searchParams, IndexReader indexReader,
|
||||
public AdaptiveFacetsAccumulator(FacetSearchParams searchParams, IndexReader indexReader,
|
||||
TaxonomyReader taxonomyReader) {
|
||||
super(searchParams, indexReader, taxonomyReader);
|
||||
}
|
||||
|
|
|
@ -2,6 +2,7 @@ package org.apache.lucene.facet.search;
|
|||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.util.IntsRef;
|
||||
|
||||
/*
|
||||
|
@ -23,6 +24,8 @@ import org.apache.lucene.util.IntsRef;
|
|||
|
||||
/**
|
||||
* An interface for obtaining the category ordinals of documents.
|
||||
* {@link #getOrdinals(int, IntsRef)} calls are done with document IDs that are
|
||||
* local to the reader given to {@link #setNextReader(AtomicReaderContext)}.
|
||||
* <p>
|
||||
* <b>NOTE:</b> this class operates as a key to a map, and therefore you should
|
||||
* implement {@code equals()} and {@code hashCode()} for proper behavior.
|
||||
|
@ -32,19 +35,20 @@ import org.apache.lucene.util.IntsRef;
|
|||
public interface CategoryListIterator {
|
||||
|
||||
/**
|
||||
* Initializes the iterator. This method must be called before any calls to
|
||||
* {@link #getOrdinals(int, IntsRef)}, and its return value indicates whether there are
|
||||
* any relevant documents for this iterator.
|
||||
* Sets the {@link AtomicReaderContext} for which
|
||||
* {@link #getOrdinals(int, IntsRef)} calls will be made. Returns true iff any
|
||||
* of the documents in this reader have category ordinals. This method must be
|
||||
* called before any calls to {@link #getOrdinals(int, IntsRef)}.
|
||||
*/
|
||||
public boolean init() throws IOException;
|
||||
|
||||
public boolean setNextReader(AtomicReaderContext context) throws IOException;
|
||||
|
||||
/**
|
||||
* Stores the category ordinals of the given document ID in the given
|
||||
* {@link IntsRef}, starting at position 0 upto {@link IntsRef#length}. Grows
|
||||
* the {@link IntsRef} if it is not large enough.
|
||||
*
|
||||
* <p>
|
||||
* <b>NOTE:</b> if the requested document does not category ordinals
|
||||
* <b>NOTE:</b> if the requested document does not have category ordinals
|
||||
* associated with it, {@link IntsRef#length} is set to zero.
|
||||
*/
|
||||
public void getOrdinals(int docID, IntsRef ints) throws IOException;
|
||||
|
|
|
@ -2,7 +2,7 @@ package org.apache.lucene.facet.search;
|
|||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IntsRef;
|
||||
|
@ -34,17 +34,15 @@ import org.apache.lucene.util.encoding.IntDecoder;
|
|||
public class PayloadCategoryListIteraor implements CategoryListIterator {
|
||||
|
||||
private final IntDecoder decoder;
|
||||
private final IndexReader indexReader;
|
||||
private final Term term;
|
||||
private final PayloadIterator pi;
|
||||
private final int hashCode;
|
||||
|
||||
public PayloadCategoryListIteraor(IndexReader indexReader, Term term, IntDecoder decoder) throws IOException {
|
||||
pi = new PayloadIterator(indexReader, term);
|
||||
public PayloadCategoryListIteraor(Term term, IntDecoder decoder) throws IOException {
|
||||
pi = new PayloadIterator(term);
|
||||
this.decoder = decoder;
|
||||
hashCode = indexReader.hashCode() ^ term.hashCode();
|
||||
hashCode = term.hashCode();
|
||||
this.term = term;
|
||||
this.indexReader = indexReader;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -58,7 +56,7 @@ public class PayloadCategoryListIteraor implements CategoryListIterator {
|
|||
}
|
||||
|
||||
// Hash codes are the same, check equals() to avoid cases of hash-collisions.
|
||||
return indexReader.equals(that.indexReader) && term.equals(that.term);
|
||||
return term.equals(that.term);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -67,8 +65,8 @@ public class PayloadCategoryListIteraor implements CategoryListIterator {
|
|||
}
|
||||
|
||||
@Override
|
||||
public boolean init() throws IOException {
|
||||
return pi.init();
|
||||
public boolean setNextReader(AtomicReaderContext context) throws IOException {
|
||||
return pi.setNextReader(context);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -1,12 +1,10 @@
|
|||
package org.apache.lucene.facet.search;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Iterator;
|
||||
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||
import org.apache.lucene.index.Fields;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
|
@ -42,99 +40,75 @@ import org.apache.lucene.util.BytesRef;
|
|||
*/
|
||||
public class PayloadIterator {
|
||||
|
||||
protected BytesRef data;
|
||||
|
||||
private TermsEnum reuseTE;
|
||||
private DocsAndPositionsEnum currentDPE;
|
||||
private DocsAndPositionsEnum dpe;
|
||||
private boolean hasMore;
|
||||
private int curDocID, curDocBase;
|
||||
private int curDocID;
|
||||
|
||||
private final Iterator<AtomicReaderContext> leaves;
|
||||
private final Term term;
|
||||
|
||||
public PayloadIterator(IndexReader indexReader, Term term) throws IOException {
|
||||
leaves = indexReader.leaves().iterator();
|
||||
public PayloadIterator(Term term) throws IOException {
|
||||
this.term = term;
|
||||
}
|
||||
|
||||
private void nextSegment() throws IOException {
|
||||
/**
|
||||
* Sets the {@link AtomicReaderContext} for which {@link #getPayload(int)}
|
||||
* calls will be made. Returns true iff this reader has payload for any of the
|
||||
* documents belonging to the {@link Term} given to the constructor.
|
||||
*/
|
||||
public boolean setNextReader(AtomicReaderContext context) throws IOException {
|
||||
hasMore = false;
|
||||
while (leaves.hasNext()) {
|
||||
AtomicReaderContext ctx = leaves.next();
|
||||
curDocBase = ctx.docBase;
|
||||
Fields fields = ctx.reader().fields();
|
||||
if (fields != null) {
|
||||
Terms terms = fields.terms(term.field());
|
||||
if (terms != null) {
|
||||
reuseTE = terms.iterator(reuseTE);
|
||||
if (reuseTE.seekExact(term.bytes(), true)) {
|
||||
// this class is usually used to iterate on whatever a Query matched
|
||||
// if it didn't match deleted documents, we won't receive them. if it
|
||||
// did, we should iterate on them too, therefore we pass liveDocs=null
|
||||
currentDPE = reuseTE.docsAndPositions(null, currentDPE, DocsAndPositionsEnum.FLAG_PAYLOADS);
|
||||
if (currentDPE != null && (curDocID = currentDPE.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
hasMore = true;
|
||||
break;
|
||||
}
|
||||
Fields fields = context.reader().fields();
|
||||
if (fields != null) {
|
||||
Terms terms = fields.terms(term.field());
|
||||
if (terms != null) {
|
||||
reuseTE = terms.iterator(reuseTE);
|
||||
if (reuseTE.seekExact(term.bytes(), true)) {
|
||||
// this class is usually used to iterate on whatever a Query matched
|
||||
// if it didn't match deleted documents, we won't receive them. if it
|
||||
// did, we should iterate on them too, therefore we pass liveDocs=null
|
||||
dpe = reuseTE.docsAndPositions(null, dpe, DocsAndPositionsEnum.FLAG_PAYLOADS);
|
||||
if (dpe != null && (curDocID = dpe.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
hasMore = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return hasMore;
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize the iterator. Should be done before the first call to
|
||||
* {@link #getPayload(int)}. Returns {@code false} if no category list is
|
||||
* found, or the category list has no documents.
|
||||
*/
|
||||
public boolean init() throws IOException {
|
||||
nextSegment();
|
||||
return hasMore;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the {@link BytesRef payload} of the given document, or {@code null}
|
||||
* if the document does not exist, there are no more documents in the posting
|
||||
* list, or the document exists but has not payload. You should call
|
||||
* {@link #init()} before the first call to this method.
|
||||
* list, or the document exists but has not payload. The given document IDs
|
||||
* are treated as local to the reader given to
|
||||
* {@link #setNextReader(AtomicReaderContext)}.
|
||||
*/
|
||||
public BytesRef getPayload(int docID) throws IOException {
|
||||
if (!hasMore) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// re-basing docId->localDocID is done fewer times than currentDoc->globalDoc
|
||||
int localDocID = docID - curDocBase;
|
||||
|
||||
if (curDocID > localDocID) {
|
||||
if (curDocID > docID) {
|
||||
// document does not exist
|
||||
return null;
|
||||
}
|
||||
|
||||
if (curDocID < localDocID) {
|
||||
// look for the document either in that segment, or others
|
||||
while (hasMore && (curDocID = currentDPE.advance(localDocID)) == DocIdSetIterator.NO_MORE_DOCS) {
|
||||
nextSegment(); // also updates curDocID
|
||||
localDocID = docID - curDocBase;
|
||||
// nextSegment advances to nextDoc, so check if we still need to advance
|
||||
if (curDocID >= localDocID) {
|
||||
break;
|
||||
if (curDocID < docID) {
|
||||
curDocID = dpe.advance(docID);
|
||||
if (curDocID != docID) { // requested document does not have a payload
|
||||
if (curDocID == DocIdSetIterator.NO_MORE_DOCS) { // no more docs in this reader
|
||||
hasMore = false;
|
||||
}
|
||||
}
|
||||
|
||||
// we break from the above loop when:
|
||||
// 1. we iterated over all segments (hasMore=false)
|
||||
// 2. current segment advanced to a doc, either requested or higher
|
||||
if (!hasMore || curDocID != localDocID) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
// we're on the document
|
||||
assert currentDPE.freq() == 1 : "expecting freq=1 (got " + currentDPE.freq() + ") term=" + term + " doc=" + (curDocID + curDocBase);
|
||||
int pos = currentDPE.nextPosition();
|
||||
assert pos != -1 : "no positions for term=" + term + " doc=" + (curDocID + curDocBase);
|
||||
return currentDPE.getPayload();
|
||||
assert dpe.freq() == 1 : "expecting freq=1 (got " + dpe.freq() + ") term=" + term + " doc=" + curDocID;
|
||||
int pos = dpe.nextPosition();
|
||||
assert pos != -1 : "no positions for term=" + term + " doc=" + curDocID;
|
||||
return dpe.getPayload();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -62,7 +62,7 @@ public abstract class ScoredDocIdCollector extends Collector {
|
|||
}
|
||||
|
||||
@Override
|
||||
public ScoredDocIDsIterator scoredDocIdsIterator() {
|
||||
protected ScoredDocIDsIterator scoredDocIdsIterator() {
|
||||
return new ScoredDocIDsIterator() {
|
||||
|
||||
private DocIdSetIterator docIdsIter = docIds.iterator();
|
||||
|
@ -129,7 +129,7 @@ public abstract class ScoredDocIdCollector extends Collector {
|
|||
}
|
||||
|
||||
@Override
|
||||
public ScoredDocIDsIterator scoredDocIdsIterator() {
|
||||
protected ScoredDocIDsIterator scoredDocIdsIterator() {
|
||||
return new ScoredDocIDsIterator() {
|
||||
|
||||
private DocIdSetIterator docIdsIter = docIds.iterator();
|
||||
|
@ -189,8 +189,7 @@ public abstract class ScoredDocIdCollector extends Collector {
|
|||
* do not require scoring, it is better to set it to <i>false</i>.
|
||||
*/
|
||||
public static ScoredDocIdCollector create(int maxDoc, boolean enableScoring) {
|
||||
return enableScoring ? new ScoringDocIdCollector(maxDoc)
|
||||
: new NonScoringDocIdCollector(maxDoc);
|
||||
return enableScoring ? new ScoringDocIdCollector(maxDoc) : new NonScoringDocIdCollector(maxDoc);
|
||||
}
|
||||
|
||||
private ScoredDocIdCollector(int maxDoc) {
|
||||
|
@ -198,13 +197,14 @@ public abstract class ScoredDocIdCollector extends Collector {
|
|||
docIds = new FixedBitSet(maxDoc);
|
||||
}
|
||||
|
||||
protected abstract ScoredDocIDsIterator scoredDocIdsIterator() throws IOException;
|
||||
|
||||
/** Returns the default score used when scoring is disabled. */
|
||||
public abstract float getDefaultScore();
|
||||
|
||||
/** Set the default score. Only applicable if scoring is disabled. */
|
||||
public abstract void setDefaultScore(float defaultScore);
|
||||
|
||||
public abstract ScoredDocIDsIterator scoredDocIdsIterator() throws IOException;
|
||||
|
||||
public ScoredDocIDs getScoredDocIDs() {
|
||||
return new ScoredDocIDs() {
|
||||
|
|
|
@ -4,22 +4,23 @@ import java.io.IOException;
|
|||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.logging.Level;
|
||||
import java.util.logging.Logger;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.util.IntsRef;
|
||||
|
||||
import org.apache.lucene.facet.search.aggregator.Aggregator;
|
||||
import org.apache.lucene.facet.search.params.FacetSearchParams;
|
||||
import org.apache.lucene.facet.search.params.FacetRequest;
|
||||
import org.apache.lucene.facet.search.params.FacetSearchParams;
|
||||
import org.apache.lucene.facet.search.results.FacetResult;
|
||||
import org.apache.lucene.facet.search.results.IntermediateFacetResult;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||
import org.apache.lucene.facet.util.PartitionsUtils;
|
||||
import org.apache.lucene.facet.util.ScoredDocIdsUtils;
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.util.IntsRef;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
|
@ -179,11 +180,11 @@ public class StandardFacetsAccumulator extends FacetsAccumulator {
|
|||
List<FacetResult> res = new ArrayList<FacetResult>();
|
||||
for (FacetRequest fr : searchParams.getFacetRequests()) {
|
||||
FacetResultsHandler frHndlr = fr.createFacetResultsHandler(taxonomyReader);
|
||||
IntermediateFacetResult tmpResult = fr2tmpRes.get(fr);
|
||||
IntermediateFacetResult tmpResult = fr2tmpRes.get(fr);
|
||||
if (tmpResult == null) {
|
||||
continue; // do not add a null to the list.
|
||||
}
|
||||
FacetResult facetRes = frHndlr.renderFacetResult(tmpResult);
|
||||
FacetResult facetRes = frHndlr.renderFacetResult(tmpResult);
|
||||
// final labeling if allowed (because labeling is a costly operation)
|
||||
if (isAllowLabeling()) {
|
||||
frHndlr.labelResult(facetRes);
|
||||
|
@ -213,18 +214,15 @@ public class StandardFacetsAccumulator extends FacetsAccumulator {
|
|||
|
||||
/** Check if it is worth to use complements */
|
||||
protected boolean shouldComplement(ScoredDocIDs docids) {
|
||||
return
|
||||
mayComplement() &&
|
||||
(docids.size() > indexReader.numDocs() * getComplementThreshold()) ;
|
||||
return mayComplement() && (docids.size() > indexReader.numDocs() * getComplementThreshold()) ;
|
||||
}
|
||||
|
||||
/**
|
||||
* Iterate over the documents for this partition and fill the facet arrays with the correct
|
||||
* count/complement count/value.
|
||||
* @throws IOException If there is a low-level I/O error.
|
||||
*/
|
||||
private final void fillArraysForPartition(ScoredDocIDs docids,
|
||||
FacetArrays facetArrays, int partition) throws IOException {
|
||||
private final void fillArraysForPartition(ScoredDocIDs docids, FacetArrays facetArrays, int partition)
|
||||
throws IOException {
|
||||
|
||||
if (isUsingComplements) {
|
||||
initArraysByTotalCounts(facetArrays, partition, docids.size());
|
||||
|
@ -236,27 +234,41 @@ public class StandardFacetsAccumulator extends FacetsAccumulator {
|
|||
|
||||
IntsRef ordinals = new IntsRef(32); // a reasonable start capacity for most common apps
|
||||
for (Entry<CategoryListIterator, Aggregator> entry : categoryLists.entrySet()) {
|
||||
CategoryListIterator categoryList = entry.getKey();
|
||||
if (!categoryList.init()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
Aggregator categorator = entry.getValue();
|
||||
ScoredDocIDsIterator iterator = docids.iterator();
|
||||
final ScoredDocIDsIterator iterator = docids.iterator();
|
||||
final CategoryListIterator categoryListIter = entry.getKey();
|
||||
final Aggregator aggregator = entry.getValue();
|
||||
Iterator<AtomicReaderContext> contexts = indexReader.leaves().iterator();
|
||||
AtomicReaderContext current = null;
|
||||
int maxDoc = -1;
|
||||
while (iterator.next()) {
|
||||
int docID = iterator.getDocID();
|
||||
categoryList.getOrdinals(docID, ordinals);
|
||||
if (ordinals.length == 0) {
|
||||
continue;
|
||||
while (docID >= maxDoc) { // find the segment which contains this document
|
||||
if (!contexts.hasNext()) {
|
||||
throw new RuntimeException("ScoredDocIDs contains documents outside this reader's segments !?");
|
||||
}
|
||||
current = contexts.next();
|
||||
maxDoc = current.docBase + current.reader().maxDoc();
|
||||
if (docID < maxDoc) { // segment has docs, check if it has categories
|
||||
boolean validSegment = categoryListIter.setNextReader(current);
|
||||
validSegment &= aggregator.setNextReader(current);
|
||||
if (!validSegment) { // if categoryList or aggregtor say it's an invalid segment, skip all docs
|
||||
while (docID < maxDoc && iterator.next()) {
|
||||
docID = iterator.getDocID();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
categorator.aggregate(docID, iterator.getScore(), ordinals);
|
||||
docID -= current.docBase;
|
||||
categoryListIter.getOrdinals(docID, ordinals);
|
||||
if (ordinals.length == 0) {
|
||||
continue; // document does not have category ordinals
|
||||
}
|
||||
aggregator.aggregate(docID, iterator.getScore(), ordinals);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Init arrays for partition by total counts, optionally applying a factor
|
||||
*/
|
||||
/** Init arrays for partition by total counts, optionally applying a factor */
|
||||
private final void initArraysByTotalCounts(FacetArrays facetArrays, int partition, int nAccumulatedDocs) {
|
||||
int[] intArray = facetArrays.getIntArray();
|
||||
totalFacetCounts.fillTotalCountsForPartition(intArray, partition);
|
||||
|
@ -302,10 +314,9 @@ public class StandardFacetsAccumulator extends FacetsAccumulator {
|
|||
|
||||
for (FacetRequest facetRequest : searchParams.getFacetRequests()) {
|
||||
Aggregator categoryAggregator = facetRequest.createAggregator(
|
||||
isUsingComplements, facetArrays, indexReader, taxonomyReader);
|
||||
isUsingComplements, facetArrays, taxonomyReader);
|
||||
|
||||
CategoryListIterator cli =
|
||||
facetRequest.createCategoryListIterator(indexReader, taxonomyReader, searchParams, partition);
|
||||
CategoryListIterator cli = facetRequest.createCategoryListIterator(taxonomyReader, searchParams, partition);
|
||||
|
||||
// get the aggregator
|
||||
Aggregator old = categoryLists.put(cli, categoryAggregator);
|
||||
|
|
|
@ -170,7 +170,7 @@ public class TotalFacetCounts {
|
|||
Aggregator aggregator = new CountingAggregator(counts[partition]);
|
||||
HashMap<CategoryListIterator, Aggregator> map = new HashMap<CategoryListIterator, Aggregator>();
|
||||
for (CategoryListParams clp: facetIndexingParams.getAllCategoryListParams()) {
|
||||
final CategoryListIterator cli = clIteraor(clCache, clp, indexReader, partition);
|
||||
final CategoryListIterator cli = clIteraor(clCache, clp, partition);
|
||||
map.put(cli, aggregator);
|
||||
}
|
||||
return map;
|
||||
|
@ -181,14 +181,14 @@ public class TotalFacetCounts {
|
|||
return new TotalFacetCounts(taxonomy, facetIndexingParams, counts, CreationType.Computed);
|
||||
}
|
||||
|
||||
static CategoryListIterator clIteraor(CategoryListCache clCache, CategoryListParams clp,
|
||||
IndexReader indexReader, int partition) throws IOException {
|
||||
static CategoryListIterator clIteraor(CategoryListCache clCache, CategoryListParams clp, int partition)
|
||||
throws IOException {
|
||||
if (clCache != null) {
|
||||
CategoryListData cld = clCache.get(clp);
|
||||
if (cld != null) {
|
||||
return cld.iterator(partition);
|
||||
}
|
||||
}
|
||||
return clp.createCategoryListIterator(indexReader, partition);
|
||||
return clp.createCategoryListIterator(partition);
|
||||
}
|
||||
}
|
|
@ -2,6 +2,7 @@ package org.apache.lucene.facet.search.aggregator;
|
|||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.util.IntsRef;
|
||||
|
||||
/*
|
||||
|
@ -22,21 +23,22 @@ import org.apache.lucene.util.IntsRef;
|
|||
*/
|
||||
|
||||
/**
|
||||
* An Aggregator is the analogue of Lucene's Collector (see
|
||||
* {@link org.apache.lucene.search.Collector}), for processing the categories
|
||||
* belonging to a certain document. The Aggregator is responsible for doing
|
||||
* whatever it wishes with the categories it is fed, e.g., counting the number
|
||||
* of times that each category appears, or performing some computation on their
|
||||
* association values.
|
||||
* <P>
|
||||
* Much of the function of an Aggregator implementation is not described by this
|
||||
* interface. This includes the constructor and getter methods to retrieve the
|
||||
* results of the aggregation.
|
||||
* Aggregates the categories of documents given to
|
||||
* {@link #aggregate(int, float, IntsRef)}. Note that the document IDs are local
|
||||
* to the reader given to {@link #setNextReader(AtomicReaderContext)}.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public interface Aggregator {
|
||||
|
||||
/**
|
||||
* Sets the {@link AtomicReaderContext} for which
|
||||
* {@link #aggregate(int, float, IntsRef)} calls will be made. If this method
|
||||
* returns false, {@link #aggregate(int, float, IntsRef)} should not be called
|
||||
* for this reader.
|
||||
*/
|
||||
public boolean setNextReader(AtomicReaderContext context) throws IOException;
|
||||
|
||||
/**
|
||||
* Aggregate the ordinals of the given document ID (and its score). The given
|
||||
* ordinals offset is always zero.
|
||||
|
|
|
@ -2,6 +2,7 @@ package org.apache.lucene.facet.search.aggregator;
|
|||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.util.IntsRef;
|
||||
|
||||
/*
|
||||
|
@ -57,4 +58,9 @@ public class CountingAggregator implements Aggregator {
|
|||
return counterArray == null ? 0 : counterArray.hashCode();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean setNextReader(AtomicReaderContext context) throws IOException {
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -2,6 +2,7 @@ package org.apache.lucene.facet.search.aggregator;
|
|||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.util.IntsRef;
|
||||
|
||||
/*
|
||||
|
@ -58,4 +59,9 @@ public class ScoringAggregator implements Aggregator {
|
|||
return hashCode;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean setNextReader(AtomicReaderContext context) throws IOException {
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -6,7 +6,7 @@ import org.apache.lucene.facet.associations.CategoryFloatAssociation;
|
|||
import org.apache.lucene.facet.associations.FloatAssociationsPayloadIterator;
|
||||
import org.apache.lucene.facet.index.params.CategoryListParams;
|
||||
import org.apache.lucene.facet.search.aggregator.Aggregator;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.util.IntsRef;
|
||||
import org.apache.lucene.util.collections.IntToFloatMap;
|
||||
|
||||
|
@ -39,13 +39,13 @@ public class AssociationFloatSumAggregator implements Aggregator {
|
|||
protected final float[] sumArray;
|
||||
protected final FloatAssociationsPayloadIterator associations;
|
||||
|
||||
public AssociationFloatSumAggregator(IndexReader reader, float[] sumArray) throws IOException {
|
||||
this(CategoryListParams.DEFAULT_TERM.field(), reader, sumArray);
|
||||
public AssociationFloatSumAggregator(float[] sumArray) throws IOException {
|
||||
this(CategoryListParams.DEFAULT_TERM.field(), sumArray);
|
||||
}
|
||||
|
||||
public AssociationFloatSumAggregator(String field, IndexReader reader, float[] sumArray) throws IOException {
|
||||
public AssociationFloatSumAggregator(String field, float[] sumArray) throws IOException {
|
||||
this.field = field;
|
||||
associations = new FloatAssociationsPayloadIterator(reader, field, new CategoryFloatAssociation());
|
||||
associations = new FloatAssociationsPayloadIterator(field, new CategoryFloatAssociation());
|
||||
this.sumArray = sumArray;
|
||||
}
|
||||
|
||||
|
@ -76,4 +76,9 @@ public class AssociationFloatSumAggregator implements Aggregator {
|
|||
return field.hashCode();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean setNextReader(AtomicReaderContext context) throws IOException {
|
||||
return associations.setNextReader(context);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -6,7 +6,7 @@ import org.apache.lucene.facet.associations.CategoryIntAssociation;
|
|||
import org.apache.lucene.facet.associations.IntAssociationsPayloadIterator;
|
||||
import org.apache.lucene.facet.index.params.CategoryListParams;
|
||||
import org.apache.lucene.facet.search.aggregator.Aggregator;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.util.IntsRef;
|
||||
import org.apache.lucene.util.collections.IntToIntMap;
|
||||
|
||||
|
@ -39,13 +39,13 @@ public class AssociationIntSumAggregator implements Aggregator {
|
|||
protected final int[] sumArray;
|
||||
protected final IntAssociationsPayloadIterator associations;
|
||||
|
||||
public AssociationIntSumAggregator(IndexReader reader, int[] sumArray) throws IOException {
|
||||
this(CategoryListParams.DEFAULT_TERM.field(), reader, sumArray);
|
||||
public AssociationIntSumAggregator(int[] sumArray) throws IOException {
|
||||
this(CategoryListParams.DEFAULT_TERM.field(), sumArray);
|
||||
}
|
||||
|
||||
public AssociationIntSumAggregator(String field, IndexReader reader, int[] sumArray) throws IOException {
|
||||
public AssociationIntSumAggregator(String field, int[] sumArray) throws IOException {
|
||||
this.field = field;
|
||||
associations = new IntAssociationsPayloadIterator(reader, field, new CategoryIntAssociation());
|
||||
associations = new IntAssociationsPayloadIterator(field, new CategoryIntAssociation());
|
||||
this.sumArray = sumArray;
|
||||
}
|
||||
|
||||
|
@ -76,4 +76,9 @@ public class AssociationIntSumAggregator implements Aggregator {
|
|||
return field.hashCode();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean setNextReader(AtomicReaderContext context) throws IOException {
|
||||
return associations.setNextReader(context);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -6,6 +6,7 @@ import org.apache.lucene.facet.index.params.CategoryListParams;
|
|||
import org.apache.lucene.facet.index.params.FacetIndexingParams;
|
||||
import org.apache.lucene.facet.search.CategoryListIterator;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.util.IntsRef;
|
||||
|
||||
|
@ -56,25 +57,30 @@ public class CategoryListData {
|
|||
}
|
||||
|
||||
/** Compute category list data for caching for faster iteration. */
|
||||
CategoryListData(IndexReader reader, TaxonomyReader taxo,
|
||||
FacetIndexingParams iparams, CategoryListParams clp) throws IOException {
|
||||
CategoryListData(IndexReader reader, TaxonomyReader taxo, FacetIndexingParams iparams, CategoryListParams clp)
|
||||
throws IOException {
|
||||
|
||||
final int maxDoc = reader.maxDoc();
|
||||
int[][][]dpf = new int[maxDoc][][];
|
||||
int[][][]dpf = new int[reader.maxDoc()][][];
|
||||
int numPartitions = (int)Math.ceil(taxo.getSize()/(double)iparams.getPartitionSize());
|
||||
IntsRef ordinals = new IntsRef(32);
|
||||
for (int part = 0; part < numPartitions; part++) {
|
||||
CategoryListIterator cli = clp.createCategoryListIterator(reader, part);
|
||||
if (cli.init()) {
|
||||
for (int doc = 0; doc < maxDoc; doc++) {
|
||||
cli.getOrdinals(doc, ordinals);
|
||||
if (ordinals.length > 0) {
|
||||
if (dpf[doc] == null) {
|
||||
dpf[doc] = new int[numPartitions][];
|
||||
}
|
||||
dpf[doc][part] = new int[ordinals.length];
|
||||
for (int i = 0; i < ordinals.length; i++) {
|
||||
dpf[doc][part][i] = ordinals.ints[i];
|
||||
for (AtomicReaderContext context : reader.leaves()) {
|
||||
CategoryListIterator cli = clp.createCategoryListIterator(part);
|
||||
if (cli.setNextReader(context)) {
|
||||
final int maxDoc = context.reader().maxDoc();
|
||||
for (int i = 0; i < maxDoc; i++) {
|
||||
cli.getOrdinals(i, ordinals);
|
||||
if (ordinals.length > 0) {
|
||||
int doc = i + context.docBase;
|
||||
if (dpf[doc] == null) {
|
||||
dpf[doc] = new int[numPartitions][];
|
||||
}
|
||||
if (dpf[doc][part] == null) {
|
||||
dpf[doc][part] = new int[ordinals.length];
|
||||
}
|
||||
for (int j = 0; j < ordinals.length; j++) {
|
||||
dpf[doc][part][j] = ordinals.ints[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -93,6 +99,7 @@ public class CategoryListData {
|
|||
/** Internal: category list iterator over uncompressed category info in RAM */
|
||||
private static class RAMCategoryListIterator implements CategoryListIterator {
|
||||
|
||||
private int docBase;
|
||||
private final int part;
|
||||
private final int[][][] dpc;
|
||||
|
||||
|
@ -102,13 +109,15 @@ public class CategoryListData {
|
|||
}
|
||||
|
||||
@Override
|
||||
public boolean init() throws IOException {
|
||||
public boolean setNextReader(AtomicReaderContext context) throws IOException {
|
||||
docBase = context.docBase;
|
||||
return dpc != null && dpc.length > part;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void getOrdinals(int docID, IntsRef ints) throws IOException {
|
||||
ints.length = 0;
|
||||
docID += docBase;
|
||||
if (dpc.length > docID && dpc[docID] != null && dpc[docID][part] != null) {
|
||||
if (ints.ints.length < dpc[docID][part].length) {
|
||||
ints.grow(dpc[docID][part].length);
|
||||
|
|
|
@ -1,7 +1,5 @@
|
|||
package org.apache.lucene.facet.search.params;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
|
||||
import org.apache.lucene.facet.search.FacetArrays;
|
||||
import org.apache.lucene.facet.search.aggregator.Aggregator;
|
||||
import org.apache.lucene.facet.search.aggregator.ComplementCountingAggregator;
|
||||
|
@ -47,8 +45,7 @@ public class CountFacetRequest extends FacetRequest {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Aggregator createAggregator(boolean useComplements,
|
||||
FacetArrays arrays, IndexReader reader, TaxonomyReader taxonomy) {
|
||||
public Aggregator createAggregator(boolean useComplements, FacetArrays arrays, TaxonomyReader taxonomy) {
|
||||
// we rely on that, if needed, result is cleared by arrays!
|
||||
int[] a = arrays.getIntArray();
|
||||
if (useComplements) {
|
||||
|
|
|
@ -2,8 +2,6 @@ package org.apache.lucene.facet.search.params;
|
|||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
|
||||
import org.apache.lucene.facet.index.params.CategoryListParams;
|
||||
import org.apache.lucene.facet.search.CategoryListIterator;
|
||||
import org.apache.lucene.facet.search.FacetArrays;
|
||||
|
@ -11,8 +9,8 @@ import org.apache.lucene.facet.search.FacetResultsHandler;
|
|||
import org.apache.lucene.facet.search.TopKFacetResultsHandler;
|
||||
import org.apache.lucene.facet.search.TopKInEachNodeHandler;
|
||||
import org.apache.lucene.facet.search.aggregator.Aggregator;
|
||||
import org.apache.lucene.facet.search.cache.CategoryListData;
|
||||
import org.apache.lucene.facet.search.cache.CategoryListCache;
|
||||
import org.apache.lucene.facet.search.cache.CategoryListData;
|
||||
import org.apache.lucene.facet.taxonomy.CategoryPath;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||
|
||||
|
@ -314,33 +312,29 @@ public abstract class FacetRequest implements Cloneable {
|
|||
* computation.
|
||||
* @param arrays
|
||||
* provider for facet arrays in use for current computation.
|
||||
* @param indexReader
|
||||
* index reader in effect.
|
||||
* @param taxonomy
|
||||
* reader of taxonomy in effect.
|
||||
* @throws IOException If there is a low-level I/O error.
|
||||
*/
|
||||
public abstract Aggregator createAggregator(boolean useComplements,
|
||||
FacetArrays arrays, IndexReader indexReader,
|
||||
TaxonomyReader taxonomy) throws IOException;
|
||||
public abstract Aggregator createAggregator(boolean useComplements, FacetArrays arrays, TaxonomyReader taxonomy)
|
||||
throws IOException;
|
||||
|
||||
/**
|
||||
* Create the category list iterator for the specified partition.
|
||||
* If a non null cache is provided which contains the required data,
|
||||
* use it for the iteration.
|
||||
* Create the category list iterator for the specified partition. If a non
|
||||
* null cache is provided which contains the required data, use it for the
|
||||
* iteration.
|
||||
*/
|
||||
public CategoryListIterator createCategoryListIterator(IndexReader reader,
|
||||
TaxonomyReader taxo, FacetSearchParams sParams, int partition)
|
||||
public CategoryListIterator createCategoryListIterator(TaxonomyReader taxo, FacetSearchParams sParams, int partition)
|
||||
throws IOException {
|
||||
CategoryListCache clCache = sParams.getCategoryListCache();
|
||||
CategoryListParams clParams = sParams.getFacetIndexingParams().getCategoryListParams(categoryPath);
|
||||
if (clCache!=null) {
|
||||
if (clCache != null) {
|
||||
CategoryListData clData = clCache.get(clParams);
|
||||
if (clData!=null) {
|
||||
if (clData != null) {
|
||||
return clData.iterator(partition);
|
||||
}
|
||||
}
|
||||
return clParams.createCategoryListIterator(reader, partition);
|
||||
return clParams.createCategoryListIterator(partition);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -1,7 +1,5 @@
|
|||
package org.apache.lucene.facet.search.params;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
|
||||
import org.apache.lucene.facet.search.FacetArrays;
|
||||
import org.apache.lucene.facet.search.aggregator.Aggregator;
|
||||
import org.apache.lucene.facet.search.aggregator.ScoringAggregator;
|
||||
|
@ -38,9 +36,7 @@ public class ScoreFacetRequest extends FacetRequest {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Aggregator createAggregator(boolean useComplements,
|
||||
FacetArrays arrays, IndexReader reader,
|
||||
TaxonomyReader taxonomy) {
|
||||
public Aggregator createAggregator(boolean useComplements, FacetArrays arrays, TaxonomyReader taxonomy) {
|
||||
assert !useComplements : "complements are not supported by this FacetRequest";
|
||||
return new ScoringAggregator(arrays.getFloatArray());
|
||||
}
|
||||
|
|
|
@ -2,8 +2,6 @@ package org.apache.lucene.facet.search.params.associations;
|
|||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
|
||||
import org.apache.lucene.facet.search.FacetArrays;
|
||||
import org.apache.lucene.facet.search.aggregator.Aggregator;
|
||||
import org.apache.lucene.facet.search.aggregator.associations.AssociationFloatSumAggregator;
|
||||
|
@ -45,10 +43,10 @@ public class AssociationFloatSumFacetRequest extends FacetRequest {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Aggregator createAggregator(boolean useComplements, FacetArrays arrays, IndexReader reader,
|
||||
TaxonomyReader taxonomy) throws IOException {
|
||||
public Aggregator createAggregator(boolean useComplements, FacetArrays arrays, TaxonomyReader taxonomy)
|
||||
throws IOException {
|
||||
assert !useComplements : "complements are not supported by this FacetRequest";
|
||||
return new AssociationFloatSumAggregator(reader, arrays.getFloatArray());
|
||||
return new AssociationFloatSumAggregator(arrays.getFloatArray());
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -2,8 +2,6 @@ package org.apache.lucene.facet.search.params.associations;
|
|||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
|
||||
import org.apache.lucene.facet.search.FacetArrays;
|
||||
import org.apache.lucene.facet.search.aggregator.Aggregator;
|
||||
import org.apache.lucene.facet.search.aggregator.associations.AssociationIntSumAggregator;
|
||||
|
@ -45,10 +43,10 @@ public class AssociationIntSumFacetRequest extends FacetRequest {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Aggregator createAggregator(boolean useComplements, FacetArrays arrays, IndexReader reader,
|
||||
TaxonomyReader taxonomy) throws IOException {
|
||||
public Aggregator createAggregator(boolean useComplements, FacetArrays arrays, TaxonomyReader taxonomy)
|
||||
throws IOException {
|
||||
assert !useComplements : "complements are not supported by this FacetRequest";
|
||||
return new AssociationIntSumAggregator(reader, arrays.getIntArray());
|
||||
return new AssociationIntSumAggregator(arrays.getIntArray());
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -60,6 +60,7 @@ public abstract class Sampler {
|
|||
|
||||
/**
|
||||
* Construct with certain {@link SamplingParams}
|
||||
*
|
||||
* @param params sampling params in effect
|
||||
* @throws IllegalArgumentException if the provided SamplingParams are not valid
|
||||
*/
|
||||
|
@ -110,16 +111,15 @@ public abstract class Sampler {
|
|||
* @param sampleSetSize required size of sample set
|
||||
* @return sample of the input set in the required size
|
||||
*/
|
||||
protected abstract SampleResult createSample(ScoredDocIDs docids, int actualSize,
|
||||
int sampleSetSize) throws IOException;
|
||||
protected abstract SampleResult createSample(ScoredDocIDs docids, int actualSize, int sampleSetSize)
|
||||
throws IOException;
|
||||
|
||||
/**
|
||||
* Get a fixer of sample facet accumulation results. Default implementation
|
||||
* returns a <code>TakmiSampleFixer</code> which is adequate only for
|
||||
* counting. For any other accumulator, provide a different fixer.
|
||||
*/
|
||||
public SampleFixer getSampleFixer(
|
||||
IndexReader indexReader, TaxonomyReader taxonomyReader,
|
||||
public SampleFixer getSampleFixer(IndexReader indexReader, TaxonomyReader taxonomyReader,
|
||||
FacetSearchParams searchParams) {
|
||||
return new TakmiSampleFixer(indexReader, taxonomyReader, searchParams);
|
||||
}
|
||||
|
@ -161,10 +161,10 @@ public abstract class Sampler {
|
|||
OverSampledFacetRequest sampledFreq = null;
|
||||
|
||||
try {
|
||||
sampledFreq = (OverSampledFacetRequest)facetResult.getFacetRequest();
|
||||
sampledFreq = (OverSampledFacetRequest) facetResult.getFacetRequest();
|
||||
} catch (ClassCastException e) {
|
||||
throw new IllegalArgumentException(
|
||||
"It is only valid to call this method with result obtained for a" +
|
||||
"It is only valid to call this method with result obtained for a " +
|
||||
"facet request created through sampler.overSamlpingSearchParams()",
|
||||
e);
|
||||
}
|
||||
|
@ -215,19 +215,15 @@ public abstract class Sampler {
|
|||
}
|
||||
|
||||
@Override
|
||||
public CategoryListIterator createCategoryListIterator(IndexReader reader,
|
||||
TaxonomyReader taxo, FacetSearchParams sParams, int partition)
|
||||
throws IOException {
|
||||
return orig.createCategoryListIterator(reader, taxo, sParams, partition);
|
||||
public CategoryListIterator createCategoryListIterator(TaxonomyReader taxo, FacetSearchParams sParams,
|
||||
int partition) throws IOException {
|
||||
return orig.createCategoryListIterator(taxo, sParams, partition);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public Aggregator createAggregator(boolean useComplements,
|
||||
FacetArrays arrays, IndexReader indexReader,
|
||||
TaxonomyReader taxonomy) throws IOException {
|
||||
return orig.createAggregator(useComplements, arrays, indexReader,
|
||||
taxonomy);
|
||||
public Aggregator createAggregator(boolean useComplements, FacetArrays arrays, TaxonomyReader taxonomy)
|
||||
throws IOException {
|
||||
return orig.createAggregator(useComplements, arrays, taxonomy);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -245,4 +241,5 @@ public abstract class Sampler {
|
|||
return orig.supportsComplements();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -91,8 +91,7 @@ class TakmiSampleFixer implements SampleFixer {
|
|||
* full set of matching documents.
|
||||
* @throws IOException If there is a low-level I/O error.
|
||||
*/
|
||||
private void recount(FacetResultNode fresNode, ScoredDocIDs docIds)
|
||||
throws IOException {
|
||||
private void recount(FacetResultNode fresNode, ScoredDocIDs docIds) throws IOException {
|
||||
// TODO (Facet): change from void to return the new, smaller docSet, and use
|
||||
// that for the children, as this will make their intersection ops faster.
|
||||
// can do this only when the new set is "sufficiently" smaller.
|
||||
|
@ -109,8 +108,7 @@ class TakmiSampleFixer implements SampleFixer {
|
|||
Bits liveDocs = MultiFields.getLiveDocs(indexReader);
|
||||
int updatedCount = countIntersection(MultiFields.getTermDocsEnum(indexReader, liveDocs,
|
||||
drillDownTerm.field(), drillDownTerm.bytes(),
|
||||
0),
|
||||
docIds.iterator());
|
||||
0), docIds.iterator());
|
||||
|
||||
fresNode.setValue(updatedCount);
|
||||
}
|
||||
|
|
|
@ -5,6 +5,7 @@ import java.util.ArrayList;
|
|||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.facet.search.CategoryListIterator;
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.util.IntsRef;
|
||||
|
||||
/*
|
||||
|
@ -42,9 +43,10 @@ public class MultiCategoryListIterator implements CategoryListIterator {
|
|||
}
|
||||
|
||||
@Override
|
||||
public boolean init() throws IOException {
|
||||
public boolean setNextReader(AtomicReaderContext context) throws IOException {
|
||||
validIterators.clear();
|
||||
for (CategoryListIterator cli : iterators) {
|
||||
if (cli.init()) {
|
||||
if (cli.setNextReader(context)) {
|
||||
validIterators.add(cli);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -3,17 +3,18 @@ package org.apache.lucene.facet.util;
|
|||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
|
||||
import org.apache.lucene.facet.search.ScoredDocIDs;
|
||||
import org.apache.lucene.facet.search.ScoredDocIDsIterator;
|
||||
import org.apache.lucene.index.AtomicReader;
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.MultiFields;
|
||||
import org.apache.lucene.search.DocIdSet;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.OpenBitSet;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.lucene.util.OpenBitSetDISI;
|
||||
|
||||
import org.apache.lucene.facet.search.ScoredDocIDs;
|
||||
import org.apache.lucene.facet.search.ScoredDocIDsIterator;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
|
@ -49,48 +50,57 @@ public class ScoredDocIdsUtils {
|
|||
* @param reader holding the number of documents & information about deletions.
|
||||
*/
|
||||
public final static ScoredDocIDs getComplementSet(final ScoredDocIDs docids, final IndexReader reader)
|
||||
throws IOException {
|
||||
throws IOException {
|
||||
final int maxDoc = reader.maxDoc();
|
||||
|
||||
DocIdSet docIdSet = docids.getDocIDs();
|
||||
final OpenBitSet complement;
|
||||
if (docIdSet instanceof OpenBitSet) {
|
||||
final FixedBitSet complement;
|
||||
if (docIdSet instanceof FixedBitSet) {
|
||||
// That is the most common case, if ScoredDocIdsCollector was used.
|
||||
complement = ((OpenBitSet) docIdSet).clone();
|
||||
complement = ((FixedBitSet) docIdSet).clone();
|
||||
} else {
|
||||
complement = new OpenBitSetDISI(docIdSet.iterator(), maxDoc);
|
||||
complement = new FixedBitSet(maxDoc);
|
||||
DocIdSetIterator iter = docIdSet.iterator();
|
||||
int doc;
|
||||
while ((doc = iter.nextDoc()) < maxDoc) {
|
||||
complement.set(doc);
|
||||
}
|
||||
}
|
||||
|
||||
complement.flip(0, maxDoc);
|
||||
|
||||
// Remove all Deletions from the complement set
|
||||
clearDeleted(reader, complement);
|
||||
|
||||
return createScoredDocIds(complement, maxDoc);
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear all deleted documents from a given open-bit-set according to a given reader
|
||||
*/
|
||||
private static void clearDeleted(final IndexReader reader,
|
||||
final OpenBitSet set) throws IOException {
|
||||
|
||||
|
||||
/** Clear all deleted documents from a given open-bit-set according to a given reader */
|
||||
private static void clearDeleted(final IndexReader reader, final FixedBitSet set) throws IOException {
|
||||
|
||||
// If there are no deleted docs
|
||||
if (!reader.hasDeletions()) {
|
||||
return; // return immediately
|
||||
}
|
||||
|
||||
Bits bits = MultiFields.getLiveDocs(reader);
|
||||
|
||||
DocIdSetIterator it = set.iterator();
|
||||
int doc = DocIdSetIterator.NO_MORE_DOCS;
|
||||
while ((doc = it.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
if (!bits.get(doc)) {
|
||||
set.fastClear(doc);
|
||||
int doc = it.nextDoc();
|
||||
for (AtomicReaderContext context : reader.leaves()) {
|
||||
AtomicReader r = context.reader();
|
||||
final int maxDoc = r.maxDoc() + context.docBase;
|
||||
if (doc >= maxDoc) { // skip this segment
|
||||
continue;
|
||||
}
|
||||
if (!r.hasDeletions()) { // skip all docs that belong to this reader as it has no deletions
|
||||
while ((doc = it.nextDoc()) < maxDoc) {}
|
||||
continue;
|
||||
}
|
||||
Bits liveDocs = r.getLiveDocs();
|
||||
do {
|
||||
if (!liveDocs.get(doc - context.docBase)) {
|
||||
set.clear(doc);
|
||||
}
|
||||
} while ((doc = it.nextDoc()) < maxDoc);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Create a subset of an existing ScoredDocIDs object.
|
||||
*
|
||||
|
@ -274,8 +284,7 @@ public class ScoredDocIdsUtils {
|
|||
if (target <= next) {
|
||||
target = next + 1;
|
||||
}
|
||||
return next = target >= maxDoc ? NO_MORE_DOCS
|
||||
: target;
|
||||
return next = target >= maxDoc ? NO_MORE_DOCS : target;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -420,4 +429,5 @@ public class ScoredDocIdsUtils {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -317,8 +317,7 @@ public abstract class FacetTestBase extends LuceneTestCase {
|
|||
}
|
||||
|
||||
/** Validate results equality */
|
||||
protected static void assertSameResults(List<FacetResult> expected,
|
||||
List<FacetResult> actual) {
|
||||
protected static void assertSameResults(List<FacetResult> expected, List<FacetResult> actual) {
|
||||
String expectedResults = resStringValueOnly(expected);
|
||||
String actualResults = resStringValueOnly(actual);
|
||||
if (!expectedResults.equals(actualResults)) {
|
||||
|
|
|
@ -29,12 +29,11 @@ import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
|||
public class AdaptiveAccumulatorTest extends BaseSampleTestTopK {
|
||||
|
||||
@Override
|
||||
protected FacetsAccumulator getSamplingAccumulator(Sampler sampler,
|
||||
TaxonomyReader taxoReader, IndexReader indexReader,
|
||||
FacetSearchParams searchParams) {
|
||||
AdaptiveFacetsAccumulator res = new AdaptiveFacetsAccumulator(searchParams,
|
||||
indexReader, taxoReader);
|
||||
protected FacetsAccumulator getSamplingAccumulator(Sampler sampler, TaxonomyReader taxoReader,
|
||||
IndexReader indexReader, FacetSearchParams searchParams) {
|
||||
AdaptiveFacetsAccumulator res = new AdaptiveFacetsAccumulator(searchParams, indexReader, taxoReader);
|
||||
res.setSampler(sampler);
|
||||
return res;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -14,6 +14,7 @@ import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
|||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
|
@ -106,30 +107,31 @@ public class CategoryListIteratorTest extends LuceneTestCase {
|
|||
IndexReader reader = writer.getReader();
|
||||
writer.close();
|
||||
|
||||
IntsRef ordinals = new IntsRef();
|
||||
CategoryListIterator cli = new PayloadCategoryListIteraor(reader, new Term("f","1"), encoder.createMatchingDecoder());
|
||||
cli.init();
|
||||
int totalCategories = 0;
|
||||
for (int i = 0; i < data.length; i++) {
|
||||
Set<Integer> values = new HashSet<Integer>();
|
||||
for (int j = 0; j < data[i].length; j++) {
|
||||
values.add(data[i].ints[j]);
|
||||
IntsRef ordinals = new IntsRef();
|
||||
CategoryListIterator cli = new PayloadCategoryListIteraor(new Term("f","1"), encoder.createMatchingDecoder());
|
||||
for (AtomicReaderContext context : reader.leaves()) {
|
||||
cli.setNextReader(context);
|
||||
int maxDoc = context.reader().maxDoc();
|
||||
int dataIdx = context.docBase;
|
||||
for (int doc = 0; doc < maxDoc; doc++, dataIdx++) {
|
||||
Set<Integer> values = new HashSet<Integer>();
|
||||
for (int j = 0; j < data[dataIdx].length; j++) {
|
||||
values.add(data[dataIdx].ints[j]);
|
||||
}
|
||||
cli.getOrdinals(doc, ordinals);
|
||||
assertTrue("no ordinals for document " + doc, ordinals.length > 0);
|
||||
for (int j = 0; j < ordinals.length; j++) {
|
||||
assertTrue("expected category not found: " + ordinals.ints[j], values.contains(ordinals.ints[j]));
|
||||
}
|
||||
totalCategories += ordinals.length;
|
||||
}
|
||||
cli.getOrdinals(i, ordinals);
|
||||
assertTrue("no ordinals for document " + i, ordinals.length > 0);
|
||||
for (int j = 0; j < ordinals.length; j++) {
|
||||
assertTrue("expected category not found: " + ordinals.ints[j], values.contains(ordinals.ints[j]));
|
||||
}
|
||||
totalCategories += ordinals.length;
|
||||
}
|
||||
assertEquals("Missing categories!",10,totalCategories);
|
||||
assertEquals("Missing categories!", 10, totalCategories);
|
||||
reader.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
/**
|
||||
* Test that a document with no payloads does not confuse the payload decoder.
|
||||
*/
|
||||
@Test
|
||||
public void testPayloadIteratorWithInvalidDoc() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
|
@ -160,24 +162,28 @@ public class CategoryListIteratorTest extends LuceneTestCase {
|
|||
IndexReader reader = writer.getReader();
|
||||
writer.close();
|
||||
|
||||
IntsRef ordinals = new IntsRef();
|
||||
CategoryListIterator cli = new PayloadCategoryListIteraor(reader, new Term("f","1"), encoder.createMatchingDecoder());
|
||||
assertTrue("Failed to initialize payload iterator", cli.init());
|
||||
int totalCategories = 0;
|
||||
for (int i = 0; i < data.length; i++) {
|
||||
Set<Integer> values = new HashSet<Integer>();
|
||||
for (int j = 0; j < data[i].length; j++) {
|
||||
values.add(data[i].ints[j]);
|
||||
}
|
||||
cli.getOrdinals(i, ordinals);
|
||||
if (i == 0) {
|
||||
assertTrue("document 0 must have a payload", ordinals.length > 0);
|
||||
for (int j = 0; j < ordinals.length; j++) {
|
||||
assertTrue("expected category not found: " + ordinals.ints[j], values.contains(ordinals.ints[j]));
|
||||
IntsRef ordinals = new IntsRef();
|
||||
CategoryListIterator cli = new PayloadCategoryListIteraor(new Term("f","1"), encoder.createMatchingDecoder());
|
||||
for (AtomicReaderContext context : reader.leaves()) {
|
||||
cli.setNextReader(context);
|
||||
int maxDoc = context.reader().maxDoc();
|
||||
int dataIdx = context.docBase;
|
||||
for (int doc = 0; doc < maxDoc; doc++, dataIdx++) {
|
||||
Set<Integer> values = new HashSet<Integer>();
|
||||
for (int j = 0; j < data[dataIdx].length; j++) {
|
||||
values.add(data[dataIdx].ints[j]);
|
||||
}
|
||||
cli.getOrdinals(doc, ordinals);
|
||||
if (dataIdx == 0) {
|
||||
assertTrue("document 0 must have a payload", ordinals.length > 0);
|
||||
for (int j = 0; j < ordinals.length; j++) {
|
||||
assertTrue("expected category not found: " + ordinals.ints[j], values.contains(ordinals.ints[j]));
|
||||
}
|
||||
totalCategories += ordinals.length;
|
||||
} else {
|
||||
assertTrue("only document 0 should have a payload", ordinals.length == 0);
|
||||
}
|
||||
totalCategories += ordinals.length;
|
||||
} else {
|
||||
assertTrue("only document 0 should have a payload", ordinals.length == 0);
|
||||
}
|
||||
}
|
||||
assertEquals("Wrong number of total categories!", 2, totalCategories);
|
||||
|
|
|
@ -22,6 +22,7 @@ import org.apache.lucene.facet.search.params.FacetRequest;
|
|||
import org.apache.lucene.facet.search.params.FacetSearchParams;
|
||||
import org.apache.lucene.facet.search.results.FacetResult;
|
||||
import org.apache.lucene.facet.taxonomy.CategoryPath;
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
|
@ -132,8 +133,8 @@ public class TestCategoryListCache extends FacetTestBase {
|
|||
}
|
||||
}
|
||||
@Override
|
||||
public boolean init() throws IOException {
|
||||
return it.init();
|
||||
public boolean setNextReader(AtomicReaderContext context) throws IOException {
|
||||
return it.setNextReader(context);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
|
|
@ -0,0 +1,128 @@
|
|||
package org.apache.lucene.facet.search;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field.Store;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.facet.index.FacetFields;
|
||||
import org.apache.lucene.facet.index.params.CategoryListParams;
|
||||
import org.apache.lucene.facet.index.params.FacetIndexingParams;
|
||||
import org.apache.lucene.facet.search.params.CountFacetRequest;
|
||||
import org.apache.lucene.facet.search.params.FacetRequest;
|
||||
import org.apache.lucene.facet.search.params.FacetSearchParams;
|
||||
import org.apache.lucene.facet.search.results.FacetResult;
|
||||
import org.apache.lucene.facet.search.results.FacetResultNode;
|
||||
import org.apache.lucene.facet.taxonomy.CategoryPath;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
|
||||
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
|
||||
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
|
||||
import org.apache.lucene.facet.util.AssertingCategoryListIterator;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.NoMergePolicy;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.junit.Test;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
public class TestStandardFacetsAccumulator extends LuceneTestCase {
|
||||
|
||||
private void indexTwoDocs(IndexWriter indexWriter, FacetFields facetFields, boolean withContent) throws Exception {
|
||||
for (int i = 0; i < 2; i++) {
|
||||
Document doc = new Document();
|
||||
if (withContent) {
|
||||
doc.add(new StringField("f", "a", Store.NO));
|
||||
}
|
||||
if (facetFields != null) {
|
||||
facetFields.addFields(doc, Collections.singletonList(new CategoryPath("A", Integer.toString(i))));
|
||||
}
|
||||
indexWriter.addDocument(doc);
|
||||
}
|
||||
|
||||
indexWriter.commit();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSegmentsWithoutCategoriesOrResults() throws Exception {
|
||||
// tests the accumulator when there are segments with no results
|
||||
Directory indexDir = newDirectory();
|
||||
Directory taxoDir = newDirectory();
|
||||
|
||||
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
|
||||
iwc.setMergePolicy(NoMergePolicy.COMPOUND_FILES); // prevent merges
|
||||
IndexWriter indexWriter = new IndexWriter(indexDir, iwc);
|
||||
FacetIndexingParams fip = new FacetIndexingParams(new CategoryListParams() {
|
||||
@Override
|
||||
public CategoryListIterator createCategoryListIterator(int partition) throws IOException {
|
||||
return new AssertingCategoryListIterator(super.createCategoryListIterator(partition));
|
||||
}
|
||||
});
|
||||
TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
|
||||
FacetFields facetFields = new FacetFields(taxoWriter, fip);
|
||||
indexTwoDocs(indexWriter, facetFields, false); // 1st segment, no content, with categories
|
||||
indexTwoDocs(indexWriter, null, true); // 2nd segment, with content, no categories
|
||||
indexTwoDocs(indexWriter, facetFields, true); // 3rd segment ok
|
||||
indexTwoDocs(indexWriter, null, false); // 4th segment, no content, or categories
|
||||
indexTwoDocs(indexWriter, null, true); // 5th segment, with content, no categories
|
||||
indexTwoDocs(indexWriter, facetFields, true); // 6th segment, with content, with categories
|
||||
IOUtils.close(indexWriter, taxoWriter);
|
||||
|
||||
DirectoryReader indexReader = DirectoryReader.open(indexDir);
|
||||
TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);
|
||||
IndexSearcher indexSearcher = new IndexSearcher(indexReader);
|
||||
|
||||
// search for "f:a", only segments 1 and 3 should match results
|
||||
Query q = new TermQuery(new Term("f", "a"));
|
||||
ArrayList<FacetRequest> requests = new ArrayList<FacetRequest>(1);
|
||||
CountFacetRequest countNoComplements = new CountFacetRequest(new CategoryPath("A"), 10) {
|
||||
@Override
|
||||
public boolean supportsComplements() {
|
||||
return false; // disable complements
|
||||
}
|
||||
};
|
||||
requests.add(countNoComplements);
|
||||
FacetSearchParams fsp = new FacetSearchParams(requests, fip);
|
||||
FacetsCollector fc = new FacetsCollector(fsp , indexReader, taxoReader);
|
||||
indexSearcher.search(q, fc);
|
||||
List<FacetResult> results = fc.getFacetResults();
|
||||
assertEquals("received too many facet results", 1, results.size());
|
||||
FacetResultNode frn = results.get(0).getFacetResultNode();
|
||||
assertEquals("wrong weight for \"A\"", 4, (int) frn.getValue());
|
||||
assertEquals("wrong number of children", 2, frn.getNumSubResults());
|
||||
for (FacetResultNode node : frn.getSubResults()) {
|
||||
assertEquals("wrong weight for child " + node.getLabel(), 2, (int) node.getValue());
|
||||
}
|
||||
IOUtils.close(indexReader, taxoReader);
|
||||
|
||||
IOUtils.close(indexDir, taxoDir);
|
||||
}
|
||||
|
||||
}
|
|
@ -17,6 +17,7 @@ import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
|
|||
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
|
||||
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
|
||||
import org.apache.lucene.facet.util.MultiCategoryListIterator;
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
|
@ -100,21 +101,24 @@ public class MultiCategoryListIteratorTest extends LuceneTestCase {
|
|||
clCache.loadAndRegister(clp, indexReader, taxoReader, indexingParams);
|
||||
iterators[i] = clCache.get(clp).iterator(0); // no partitions
|
||||
} else {
|
||||
iterators[i] = new PayloadCategoryListIteraor(indexReader, clp.getTerm(), decoder);
|
||||
iterators[i] = new PayloadCategoryListIteraor(clp.getTerm(), decoder);
|
||||
}
|
||||
}
|
||||
MultiCategoryListIterator cli = new MultiCategoryListIterator(iterators);
|
||||
assertTrue("failed to init multi-iterator", cli.init());
|
||||
IntsRef ordinals = new IntsRef();
|
||||
int maxDoc = indexReader.maxDoc();
|
||||
for (int i = 0; i < maxDoc; i++) {
|
||||
cli.getOrdinals(i, ordinals);
|
||||
assertTrue("document " + i + " does not have categories", ordinals.length > 0);
|
||||
for (int j = 0; j < ordinals.length; j++) {
|
||||
CategoryPath cp = taxoReader.getPath(ordinals.ints[j]);
|
||||
assertNotNull("ordinal " + ordinals.ints[j] + " not found in taxonomy", cp);
|
||||
if (cp.length == 2) {
|
||||
assertEquals("invalid category for document " + i, i, Integer.parseInt(cp.components[1]));
|
||||
for (AtomicReaderContext context : indexReader.leaves()) {
|
||||
assertTrue("failed to init multi-iterator", cli.setNextReader(context));
|
||||
IntsRef ordinals = new IntsRef();
|
||||
final int maxDoc = context.reader().maxDoc();
|
||||
for (int i = 0; i < maxDoc; i++) {
|
||||
cli.getOrdinals(i, ordinals);
|
||||
assertTrue("document " + i + " does not have categories", ordinals.length > 0);
|
||||
for (int j = 0; j < ordinals.length; j++) {
|
||||
CategoryPath cp = taxoReader.getPath(ordinals.ints[j]);
|
||||
assertNotNull("ordinal " + ordinals.ints[j] + " not found in taxonomy", cp);
|
||||
if (cp.length == 2) {
|
||||
int globalDoc = i + context.docBase;
|
||||
assertEquals("invalid category for document " + globalDoc, globalDoc, Integer.parseInt(cp.components[1]));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -59,9 +59,8 @@ public abstract class BaseSampleTestTopK extends BaseTestTopK {
|
|||
return res;
|
||||
}
|
||||
|
||||
protected abstract FacetsAccumulator getSamplingAccumulator(Sampler sampler,
|
||||
TaxonomyReader taxoReader, IndexReader indexReader,
|
||||
FacetSearchParams searchParams);
|
||||
protected abstract FacetsAccumulator getSamplingAccumulator(Sampler sampler, TaxonomyReader taxoReader,
|
||||
IndexReader indexReader, FacetSearchParams searchParams);
|
||||
|
||||
/**
|
||||
* Try out faceted search with sampling enabled and complements either disabled or enforced
|
||||
|
@ -89,7 +88,7 @@ public abstract class BaseSampleTestTopK extends BaseTestTopK {
|
|||
|
||||
// try several times in case of failure, because the test has a chance to fail
|
||||
// if the top K facets are not sufficiently common with the sample set
|
||||
for (int nTrial=0; nTrial<RETRIES; nTrial++) {
|
||||
for (int nTrial = 0; nTrial < RETRIES; nTrial++) {
|
||||
try {
|
||||
// complement with sampling!
|
||||
final Sampler sampler = createSampler(nTrial, docCollector.getScoredDocIDs(), useRandomSampler);
|
||||
|
@ -99,7 +98,7 @@ public abstract class BaseSampleTestTopK extends BaseTestTopK {
|
|||
|
||||
break; // succeeded
|
||||
} catch (NotSameResultError e) {
|
||||
if (nTrial>=RETRIES-1) {
|
||||
if (nTrial >= RETRIES - 1) {
|
||||
throw e; // no more retries allowed, must fail
|
||||
}
|
||||
}
|
||||
|
@ -119,14 +118,11 @@ public abstract class BaseSampleTestTopK extends BaseTestTopK {
|
|||
assertSameResults(expected, sampledResults);
|
||||
}
|
||||
|
||||
private FacetsCollector samplingCollector(
|
||||
final boolean complement,
|
||||
final Sampler sampler,
|
||||
private FacetsCollector samplingCollector(final boolean complement, final Sampler sampler,
|
||||
FacetSearchParams samplingSearchParams) {
|
||||
FacetsCollector samplingFC = new FacetsCollector(samplingSearchParams, indexReader, taxoReader) {
|
||||
@Override
|
||||
protected FacetsAccumulator initFacetsAccumulator(
|
||||
FacetSearchParams facetSearchParams, IndexReader indexReader,
|
||||
protected FacetsAccumulator initFacetsAccumulator(FacetSearchParams facetSearchParams, IndexReader indexReader,
|
||||
TaxonomyReader taxonomyReader) {
|
||||
FacetsAccumulator acc = getSamplingAccumulator(sampler, taxonomyReader, indexReader, facetSearchParams);
|
||||
acc.setComplementThreshold(complement ? FacetsAccumulator.FORCE_COMPLEMENT : FacetsAccumulator.DISABLE_COMPLEMENT);
|
||||
|
@ -144,12 +140,13 @@ public abstract class BaseSampleTestTopK extends BaseTestTopK {
|
|||
samplingParams.setMinSampleSize((int) (100 * retryFactor));
|
||||
samplingParams.setMaxSampleSize((int) (10000 * retryFactor));
|
||||
samplingParams.setOversampleFactor(5.0 * retryFactor);
|
||||
samplingParams.setSamplingThreshold(11000); //force sampling
|
||||
|
||||
samplingParams.setSamplingThreshold(11000); //force sampling
|
||||
Sampler sampler = useRandomSampler ?
|
||||
new RandomSampler(samplingParams, new Random(random().nextLong())) :
|
||||
new RepeatableSampler(samplingParams);
|
||||
assertTrue("must enable sampling for this test!",sampler.shouldSample(scoredDocIDs));
|
||||
return sampler;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -0,0 +1,65 @@
|
|||
package org.apache.lucene.facet.util;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.facet.search.CategoryListIterator;
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.util.IntsRef;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* A {@link CategoryListIterator} which asserts that
|
||||
* {@link #getOrdinals(int, IntsRef)} is not called before
|
||||
* {@link #setNextReader(AtomicReaderContext)} and that if
|
||||
* {@link #setNextReader(AtomicReaderContext)} returns false,
|
||||
* {@link #getOrdinals(int, IntsRef)} isn't called.
|
||||
*/
|
||||
public class AssertingCategoryListIterator implements CategoryListIterator {
|
||||
|
||||
private final CategoryListIterator delegate;
|
||||
private boolean setNextReaderCalled = false;
|
||||
private boolean validSegment = false;
|
||||
private int maxDoc;
|
||||
|
||||
public AssertingCategoryListIterator(CategoryListIterator delegate) {
|
||||
this.delegate = delegate;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean setNextReader(AtomicReaderContext context) throws IOException {
|
||||
setNextReaderCalled = true;
|
||||
maxDoc = context.reader().maxDoc();
|
||||
return validSegment = delegate.setNextReader(context);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void getOrdinals(int docID, IntsRef ints) throws IOException {
|
||||
if (!setNextReaderCalled) {
|
||||
throw new RuntimeException("should not call getOrdinals without setNextReader first");
|
||||
}
|
||||
if (!validSegment) {
|
||||
throw new RuntimeException("should not call getOrdinals if setNextReader returned false");
|
||||
}
|
||||
if (docID >= maxDoc) {
|
||||
throw new RuntimeException("docID is larger than current maxDoc; forgot to call setNextReader?");
|
||||
}
|
||||
delegate.getOrdinals(docID, ints);
|
||||
}
|
||||
|
||||
}
|
|
@ -9,6 +9,9 @@ import org.apache.lucene.document.Document;
|
|||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.facet.search.ScoredDocIDs;
|
||||
import org.apache.lucene.facet.search.ScoredDocIDsIterator;
|
||||
import org.apache.lucene.facet.search.ScoredDocIdCollector;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.MultiFields;
|
||||
|
@ -21,14 +24,9 @@ import org.apache.lucene.search.Query;
|
|||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.OpenBitSet;
|
||||
import org.apache.lucene.util.OpenBitSetDISI;
|
||||
import org.junit.Test;
|
||||
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.facet.search.ScoredDocIDs;
|
||||
import org.apache.lucene.facet.search.ScoredDocIDsIterator;
|
||||
import org.apache.lucene.facet.search.ScoredDocIdCollector;
|
||||
import org.junit.Test;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
|
@ -52,21 +50,21 @@ public class TestScoredDocIDsUtils extends LuceneTestCase {
|
|||
@Test
|
||||
public void testComplementIterator() throws Exception {
|
||||
final int n = atLeast(10000);
|
||||
final OpenBitSet bits = new OpenBitSet(n);
|
||||
for (int i = 0; i < 5 * n; i++) {
|
||||
bits.flip(random().nextInt(n));
|
||||
final FixedBitSet bits = new FixedBitSet(n);
|
||||
Random random = random();
|
||||
for (int i = 0; i < n; i++) {
|
||||
int idx = random.nextInt(n);
|
||||
bits.flip(idx, idx + 1);
|
||||
}
|
||||
|
||||
OpenBitSet verify = new OpenBitSet(n);
|
||||
verify.or(bits);
|
||||
FixedBitSet verify = new FixedBitSet(bits);
|
||||
|
||||
ScoredDocIDs scoredDocIDs = ScoredDocIdsUtils.createScoredDocIds(bits, n);
|
||||
|
||||
Directory dir = newDirectory();
|
||||
IndexReader reader = createReaderWithNDocs(random(), n, dir);
|
||||
IndexReader reader = createReaderWithNDocs(random, n, dir);
|
||||
try {
|
||||
assertEquals(n - verify.cardinality(), ScoredDocIdsUtils.getComplementSet(scoredDocIDs,
|
||||
reader).size());
|
||||
assertEquals(n - verify.cardinality(), ScoredDocIdsUtils.getComplementSet(scoredDocIDs, reader).size());
|
||||
} finally {
|
||||
reader.close();
|
||||
dir.close();
|
||||
|
@ -147,7 +145,7 @@ public class TestScoredDocIDsUtils extends LuceneTestCase {
|
|||
searcher.search(q, collector);
|
||||
|
||||
ScoredDocIDs scoredDocIds = collector.getScoredDocIDs();
|
||||
OpenBitSet resultSet = new OpenBitSetDISI(scoredDocIds.getDocIDs().iterator(), reader.maxDoc());
|
||||
FixedBitSet resultSet = (FixedBitSet) scoredDocIds.getDocIDs();
|
||||
|
||||
// Getting the complement set of the query result
|
||||
ScoredDocIDs complementSet = ScoredDocIdsUtils.getComplementSet(scoredDocIds, reader);
|
||||
|
@ -164,12 +162,11 @@ public class TestScoredDocIDsUtils extends LuceneTestCase {
|
|||
assertFalse(
|
||||
"Complement-Set must not contain deleted documents (doc="+docNum+")",
|
||||
live != null && !live.get(docNum));
|
||||
assertNull(
|
||||
"Complement-Set must not contain docs from the original set (doc="+ docNum+")",
|
||||
assertNull("Complement-Set must not contain docs from the original set (doc="+ docNum+")",
|
||||
reader.document(docNum).getField("del"));
|
||||
assertFalse(
|
||||
"Complement-Set must not contain docs from the original set (doc="+docNum+")",
|
||||
resultSet.fastGet(docNum));
|
||||
resultSet.get(docNum));
|
||||
}
|
||||
} finally {
|
||||
reader.close();
|
||||
|
|
|
@ -24,6 +24,7 @@ import org.apache.lucene.util.BytesRef;
|
|||
import org.apache.lucene.util.BytesRefIterator;
|
||||
import org.apache.lucene.util.IntsRef;
|
||||
import org.apache.lucene.util.fst.*;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
|
||||
/**
|
||||
* Finite state automata based implementation of "autocomplete" functionality.
|
||||
|
@ -237,7 +238,8 @@ public class FSTCompletionBuilder {
|
|||
final Object empty = outputs.getNoOutput();
|
||||
final Builder<Object> builder = new Builder<Object>(
|
||||
FST.INPUT_TYPE.BYTE1, 0, 0, true, true,
|
||||
shareMaxTailLength, outputs, null, false, true);
|
||||
shareMaxTailLength, outputs, null, false,
|
||||
PackedInts.DEFAULT, true, 15);
|
||||
|
||||
BytesRef scratch = new BytesRef();
|
||||
BytesRef entry;
|
||||
|
|
|
@ -40,6 +40,7 @@ import org.apache.lucene.util.IntsRef;
|
|||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertFalse;
|
||||
|
@ -288,7 +289,16 @@ public class FSTTester<T> {
|
|||
outputs,
|
||||
null,
|
||||
willRewrite,
|
||||
true);
|
||||
PackedInts.DEFAULT,
|
||||
true,
|
||||
15);
|
||||
if (LuceneTestCase.VERBOSE) {
|
||||
if (willRewrite) {
|
||||
System.out.println("TEST: packed FST");
|
||||
} else {
|
||||
System.out.println("TEST: non-packed FST");
|
||||
}
|
||||
}
|
||||
|
||||
for(InputOutput<T> pair : pairs) {
|
||||
if (pair.output instanceof List) {
|
||||
|
|
|
@ -41,8 +41,6 @@ Detailed Change List
|
|||
Other Changes
|
||||
----------------------
|
||||
|
||||
* SOLR-3735: Relocate the example mime-to-extension mapping, and
|
||||
upgrade Velocity Engine to 1.7 (ehatcher)
|
||||
|
||||
================== 4.1.0 ==================
|
||||
|
||||
|
@ -50,14 +48,14 @@ Versions of Major Components
|
|||
---------------------
|
||||
Apache Tika 1.2
|
||||
Carrot2 3.6.2
|
||||
Velocity 1.6.4 and Velocity Tools 2.0
|
||||
Velocity 1.7 and Velocity Tools 2.0
|
||||
Apache UIMA 2.3.1
|
||||
Apache ZooKeeper 3.4.5
|
||||
|
||||
Upgrading from Solr 4.0.0
|
||||
----------------------
|
||||
|
||||
Custom java parsing plugins need to migrade from throwing the internal
|
||||
Custom java parsing plugins need to migrate from throwing the internal
|
||||
ParseException to throwing SyntaxError.
|
||||
|
||||
BaseDistributedSearchTestCase now randomizes the servlet context it uses when
|
||||
|
@ -150,7 +148,7 @@ New Features
|
|||
CoreAdmin API the same way as the data directory. (Mark Miller)
|
||||
|
||||
* SOLR-4028: When using ZK chroot, it would be nice if Solr would create the
|
||||
initial path when it doesn't exist. (Tomas Fernandez Lobbe via Mark Miller)
|
||||
initial path when it doesn't exist. (Tomás Fernández Löbbe via Mark Miller)
|
||||
|
||||
* SOLR-3948: Calculate/display deleted documents in admin interface.
|
||||
(Shawn Heisey via Mark Miller)
|
||||
|
@ -209,6 +207,9 @@ New Features
|
|||
* SOLR-2201: DIH's "formatDate" function now supports a timezone as an optional
|
||||
fourth parameter (James Dyer, Mark Waddle)
|
||||
|
||||
* SOLR-4302: New parameter 'indexInfo' (defaults to true) in CoreAdmin STATUS
|
||||
command can be used to omit index specific information (Shahar Davidson via shalin)
|
||||
|
||||
Optimizations
|
||||
----------------------
|
||||
|
||||
|
@ -226,12 +227,12 @@ Optimizations
|
|||
dynamicField's (steffkes)
|
||||
|
||||
* SOLR-3941: The "commitOnLeader" part of distributed recovery can use
|
||||
openSearcher=false. (Tomas Fernandez Lobbe via Mark Miller)
|
||||
openSearcher=false. (Tomás Fernández Löbbe via Mark Miller)
|
||||
|
||||
* SOLR-4063: Allow CoreContainer to load multiple SolrCores in parallel rather
|
||||
than just serially. (Mark Miller)
|
||||
|
||||
* SOLR-4199: When doing zk retries due to connectionloss, rather than just
|
||||
* SOLR-4199: When doing zk retries due to connection loss, rather than just
|
||||
retrying for 2 minutes, retry in proportion to the session timeout.
|
||||
(Mark Miller)
|
||||
|
||||
|
@ -250,6 +251,10 @@ Optimizations
|
|||
|
||||
* SOLR-3982: Admin UI: Various Dataimport Improvements (steffkes)
|
||||
|
||||
* SOLR-4296: Admin UI: Improve Dataimport Auto-Refresh (steffkes)
|
||||
|
||||
* SOLR-3458: Allow multiple Items to stay open on Plugins-Page (steffkes)
|
||||
|
||||
Bug Fixes
|
||||
----------------------
|
||||
|
||||
|
@ -362,7 +367,7 @@ Bug Fixes
|
|||
|
||||
* SOLR-4081: QueryParsing.toString, used during debugQuery=true, did not
|
||||
correctly handle ExtendedQueries such as WrappedQuery
|
||||
(used when cache=false), spatial queries, and frange queires.
|
||||
(used when cache=false), spatial queries, and frange queries.
|
||||
(Eirik Lygre, yonik)
|
||||
|
||||
* SOLR-3959: Ensure the internal comma separator of poly fields is escaped
|
||||
|
@ -403,7 +408,7 @@ Bug Fixes
|
|||
|
||||
* SOLR-4162: ZkCli usage examples are not correct because the zkhost parameter
|
||||
is not present and it is mandatory for all commands.
|
||||
(Tomas Fernandez Lobbe via Mark Miller)
|
||||
(Tomás Fernández Löbbe via Mark Miller)
|
||||
|
||||
* SOLR-4071: Validate that name is pass to Collections API create, and behave the
|
||||
same way as on startup when collection.configName is not explicitly passed.
|
||||
|
@ -495,7 +500,7 @@ Bug Fixes
|
|||
* SOLR-4279: Wrong exception message if _version_ field is multivalued (shalin)
|
||||
|
||||
* SOLR-4170: The 'backup' ReplicationHandler command can sometimes use a stale
|
||||
index directory rather than the current one. (Mark Miller, Marcin Rzewuck)
|
||||
index directory rather than the current one. (Mark Miller, Marcin Rzewucki)
|
||||
|
||||
* SOLR-3876: Solr Admin UI is completely dysfunctional on IE 9 (steffkes)
|
||||
|
||||
|
@ -503,6 +508,17 @@ Bug Fixes
|
|||
import works fine with SolrCloud clusters (Deniz Durmus, James Dyer,
|
||||
Erick Erickson, shalin)
|
||||
|
||||
* SOLR-4291: Harden the Overseer work queue thread loop. (Mark Miller)
|
||||
|
||||
* SOLR-3820: Solr Admin Query form is missing some edismax request parameters
|
||||
(steffkes)
|
||||
|
||||
* SOLR-4217: post.jar no longer ignores -Dparams when -Durl is used.
|
||||
(Alexandre Rafalovitch, ehatcher)
|
||||
|
||||
* SOLR-4303: On replication, if the generation of the master is lower than the
|
||||
slave we need to force a full copy of the index. (Mark Miller, Gregg Donovan)
|
||||
|
||||
Other Changes
|
||||
----------------------
|
||||
|
||||
|
@ -580,6 +596,16 @@ Other Changes
|
|||
* SOLR-4208: ExtendedDismaxQParserPlugin has been refactored to make
|
||||
subclassing easier. (Tomás Fernández Löbbe, hossman)
|
||||
|
||||
* SOLR-3735: Relocate the example mime-to-extension mapping, and
|
||||
upgrade Velocity Engine to 1.7 (ehatcher)
|
||||
|
||||
* SOLR-4287: Removed "apache-" prefix from Solr distribution and artifact
|
||||
filenames. (Ryan Ernst, Robert Muir, Steve Rowe)
|
||||
|
||||
* SOLR-4016: Deduplication does not work with atomic/partial updates so
|
||||
disallow atomic update requests which change signature generating fields.
|
||||
(Joel Nothman, yonik, shalin)
|
||||
|
||||
================== 4.0.0 ==================
|
||||
|
||||
Versions of Major Components
|
||||
|
@ -862,7 +888,7 @@ Bug Fixes
|
|||
|
||||
* SOLR-3527: SolrCmdDistributor drops some of the important commit attributes
|
||||
(maxOptimizeSegments, softCommit, expungeDeletes) when sending a commit to
|
||||
replicas. (Andy Laird, Tomas Fernandez Lobbe, Mark Miller)
|
||||
replicas. (Andy Laird, Tomás Fernández Löbbe, Mark Miller)
|
||||
|
||||
* SOLR-3844: SolrCore reload can fail because it tries to remove the index
|
||||
write lock while already holding it. (Mark Miller)
|
||||
|
@ -1273,7 +1299,7 @@ New Features
|
|||
* SOLR-571: The autowarmCount for LRUCaches (LRUCache and FastLRUCache) now
|
||||
supports "percentages" which get evaluated relative the current size of
|
||||
the cache when warming happens.
|
||||
(Tomas Fernandez Lobbe and hossman)
|
||||
(Tomás Fernández Löbbe and hossman)
|
||||
|
||||
* SOLR-1932: New relevancy function queries: termfreq, tf, docfreq, idf
|
||||
norm, maxdoc, numdocs. (yonik)
|
||||
|
@ -1644,12 +1670,12 @@ Bug Fixes
|
|||
down to it via acceptDocs since LUCENE-1536. (Mike Hugo, yonik)
|
||||
|
||||
* SOLR-3214: If you use multiple fl entries rather than a comma separated list, all but the first
|
||||
entry can be ignored if you are using distributed search. (Tomas Fernandez Lobbe via Mark Miller)
|
||||
entry can be ignored if you are using distributed search. (Tomás Fernández Löbbe via Mark Miller)
|
||||
|
||||
* SOLR-3352: eDismax: pf2 should kick in for a query with 2 terms (janhoy)
|
||||
|
||||
* SOLR-3361: ReplicationHandler "maxNumberOfBackups" doesn't work if backups are triggered on commit
|
||||
(James Dyer, Tomas Fernandez Lobbe)
|
||||
(James Dyer, Tomás Fernández Löbbe)
|
||||
|
||||
* SOLR-2605: fixed tracking of the 'defaultCoreName' in CoreContainer so that
|
||||
CoreAdminHandler could return consistent information regardless of wether
|
||||
|
@ -1864,7 +1890,17 @@ Documentation
|
|||
|
||||
* SOLR-2232: Improved README info on solr.solr.home in examples
|
||||
(Eric Pugh and hossman)
|
||||
|
||||
|
||||
================== 3.6.2 ==================
|
||||
|
||||
Bug Fixes
|
||||
----------------------
|
||||
* SOLR-3790: ConcurrentModificationException could be thrown when using hl.fl=*.
|
||||
(yonik, koji)
|
||||
|
||||
* SOLR-3589: Edismax parser does not honor mm parameter if analyzer splits a token.
|
||||
(Tom Burton-West, Robert Muir)
|
||||
|
||||
================== 3.6.1 ==================
|
||||
More information about this release, including any errata related to the
|
||||
release notes, upgrade instructions, or other changes may be found online at:
|
||||
|
@ -1877,7 +1913,7 @@ Bug Fixes
|
|||
(Uwe Schindler, Mike McCandless, Robert Muir)
|
||||
|
||||
* SOLR-3361: ReplicationHandler "maxNumberOfBackups" doesn't work if backups are triggered on commit
|
||||
(James Dyer, Tomas Fernandez Lobbe)
|
||||
(James Dyer, Tomás Fernández Löbbe)
|
||||
|
||||
* SOLR-3375: Fix charset problems with HttpSolrServer (Roger Håkansson, yonik, siren)
|
||||
|
||||
|
|
|
@ -45,11 +45,11 @@ example/
|
|||
Please see example/README.txt for information about running this
|
||||
example.
|
||||
|
||||
dist/apache-solr-XX.war
|
||||
dist/solr-XX.war
|
||||
The Apache Solr Application. Deploy this WAR file to any servlet
|
||||
container to run Apache Solr.
|
||||
|
||||
dist/apache-solr-<component>-XX.jar
|
||||
dist/solr-<component>-XX.jar
|
||||
The Apache Solr libraries. To compile Apache Solr Plugins,
|
||||
one or more of these will be required. The core library is
|
||||
required at a minimum. (see http://wiki.apache.org/solr/SolrPlugins
|
||||
|
|
|
@ -25,7 +25,7 @@
|
|||
|
||||
<property name="Name" value="Solr" />
|
||||
<property name="version" value="5.0-SNAPSHOT"/>
|
||||
<property name="fullname" value="apache-${ant.project.name}"/>
|
||||
<property name="fullname" value="${ant.project.name}"/>
|
||||
<property name="fullnamever" value="${fullname}-${version}"/>
|
||||
<property name="final.name" value="${fullnamever}"/>
|
||||
|
||||
|
@ -114,7 +114,7 @@
|
|||
<attribute name="property" default="@{name}.uptodate"/>
|
||||
<attribute name="classpath.property" default="@{name}.jar"/>
|
||||
<!-- set jarfile only, if the target jar file has no generic name -->
|
||||
<attribute name="jarfile" default="${common-solr.dir}/build/contrib/solr-@{name}/apache-solr-@{name}-${version}.jar"/>
|
||||
<attribute name="jarfile" default="${common-solr.dir}/build/contrib/solr-@{name}/solr-@{name}-${version}.jar"/>
|
||||
<sequential>
|
||||
<!--<echo message="Checking '@{jarfile}' against source folder '${common.dir}/contrib/@{name}/src/java'"/>-->
|
||||
<property name="@{classpath.property}" location="@{jarfile}"/>
|
||||
|
@ -214,13 +214,13 @@
|
|||
</target>
|
||||
|
||||
<target name="check-solr-core-javadocs-uptodate" unless="solr-core-javadocs.uptodate">
|
||||
<uptodate property="solr-core-javadocs.uptodate" targetfile="${build.dir}/solr-core/apache-solr-core-${version}-javadoc.jar">
|
||||
<uptodate property="solr-core-javadocs.uptodate" targetfile="${build.dir}/solr-core/solr-core-${version}-javadoc.jar">
|
||||
<srcfiles dir="${common-solr.dir}/core/src/java" includes="**/*.java"/>
|
||||
</uptodate>
|
||||
</target>
|
||||
|
||||
<target name="check-solrj-javadocs-uptodate" unless="solrj-javadocs.uptodate">
|
||||
<uptodate property="solrj-javadocs.uptodate" targetfile="${build.dir}/solr-solrj/apache-solr-solrj-${version}-javadoc.jar">
|
||||
<uptodate property="solrj-javadocs.uptodate" targetfile="${build.dir}/solr-solrj/solr-solrj-${version}-javadoc.jar">
|
||||
<srcfiles dir="${common-solr.dir}/solrj/src/java" includes="**/*.java"/>
|
||||
</uptodate>
|
||||
</target>
|
||||
|
|
|
@ -19,7 +19,7 @@ To start using Solr UIMA Metadata Extraction Library you should go through the f
|
|||
|
||||
<lib dir="../../contrib/uima/lib" />
|
||||
<lib dir="../../contrib/uima/lucene-libs" />
|
||||
<lib dir="../../dist/" regex="apache-solr-uima-\d.*\.jar" />
|
||||
<lib dir="../../dist/" regex="solr-uima-\d.*\.jar" />
|
||||
|
||||
2. modify your schema.xml adding the fields you want to be hold metadata specifying proper values for type, indexed, stored and multiValued options:
|
||||
|
||||
|
|
|
@ -44,8 +44,8 @@
|
|||
in that directory which completely match the regex (anchored on both
|
||||
ends) will be included.
|
||||
-->
|
||||
<lib dir="../../dist/" regex="apache-solr-cell-\d.*\.jar" />
|
||||
<lib dir="../../dist/" regex="apache-solr-clustering-\d.*\.jar" />
|
||||
<lib dir="../../dist/" regex="solr-cell-\d.*\.jar" />
|
||||
<lib dir="../../dist/" regex="solr-clustering-\d.*\.jar" />
|
||||
<!--
|
||||
If a dir option (with or without a regex) is used and nothing is
|
||||
found that matches, it will be ignored
|
||||
|
|
|
@ -44,8 +44,8 @@
|
|||
in that directory which completely match the regex (anchored on both
|
||||
ends) will be included.
|
||||
-->
|
||||
<lib dir="../../dist/" regex="apache-solr-cell-\d.*\.jar" />
|
||||
<lib dir="../../dist/" regex="apache-solr-clustering-\d.*\.jar" />
|
||||
<lib dir="../../dist/" regex="solr-cell-\d.*\.jar" />
|
||||
<lib dir="../../dist/" regex="solr-clustering-\d.*\.jar" />
|
||||
<!--
|
||||
If a dir option (with or without a regex) is used and nothing is
|
||||
found that matches, it will be ignored
|
||||
|
|
|
@ -24,7 +24,7 @@
|
|||
<luceneMatchVersion>${tests.luceneMatchVersion:LUCENE_CURRENT}</luceneMatchVersion>
|
||||
|
||||
<lib dir="../../contrib/velocity/lib" />
|
||||
<lib dir="../../dist/" regex="apache-solr-velocity-\d.*\.jar" />
|
||||
<lib dir="../../dist/" regex="solr-velocity-\d.*\.jar" />
|
||||
<dataDir>${solr.data.dir:}</dataDir>
|
||||
|
||||
|
||||
|
|
|
@ -22,14 +22,12 @@ import java.util.HashMap;
|
|||
import java.util.LinkedHashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.NoSuchElementException;
|
||||
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.cloud.ClusterState;
|
||||
import org.apache.solr.common.cloud.ClosableThread;
|
||||
import org.apache.solr.common.cloud.ClusterState;
|
||||
import org.apache.solr.common.cloud.DocCollection;
|
||||
import org.apache.solr.common.cloud.DocRouter;
|
||||
import org.apache.solr.common.cloud.DocRouter;
|
||||
import org.apache.solr.common.cloud.ImplicitDocRouter;
|
||||
import org.apache.solr.common.cloud.Replica;
|
||||
import org.apache.solr.common.cloud.Slice;
|
||||
|
@ -37,7 +35,6 @@ import org.apache.solr.common.cloud.SolrZkClient;
|
|||
import org.apache.solr.common.cloud.ZkCoreNodeProps;
|
||||
import org.apache.solr.common.cloud.ZkNodeProps;
|
||||
import org.apache.solr.common.cloud.ZkStateReader;
|
||||
import org.apache.solr.common.cloud.ZooKeeperException;
|
||||
import org.apache.solr.handler.component.ShardHandler;
|
||||
import org.apache.zookeeper.CreateMode;
|
||||
import org.apache.zookeeper.KeeperException;
|
||||
|
@ -78,46 +75,48 @@ public class Overseer {
|
|||
@Override
|
||||
public void run() {
|
||||
|
||||
if(!this.isClosed && amILeader()) {
|
||||
if (!this.isClosed && amILeader()) {
|
||||
// see if there's something left from the previous Overseer and re
|
||||
// process all events that were not persisted into cloud state
|
||||
synchronized (reader.getUpdateLock()) { //XXX this only protects against edits inside single node
|
||||
try {
|
||||
byte[] head = workQueue.peek();
|
||||
synchronized (reader.getUpdateLock()) { // XXX this only protects
|
||||
// against edits inside single
|
||||
// node
|
||||
try {
|
||||
byte[] head = workQueue.peek();
|
||||
|
||||
if (head != null) {
|
||||
reader.updateClusterState(true);
|
||||
ClusterState clusterState = reader.getClusterState();
|
||||
log.info("Replaying operations from work queue.");
|
||||
|
||||
if (head != null) {
|
||||
reader.updateClusterState(true);
|
||||
ClusterState clusterState = reader.getClusterState();
|
||||
log.info("Replaying operations from work queue.");
|
||||
while (head != null && amILeader()) {
|
||||
final ZkNodeProps message = ZkNodeProps.load(head);
|
||||
final String operation = message.getStr(QUEUE_OPERATION);
|
||||
clusterState = processMessage(clusterState, message, operation);
|
||||
zkClient.setData(ZkStateReader.CLUSTER_STATE,
|
||||
ZkStateReader.toJSON(clusterState), true);
|
||||
|
||||
while (head != null && amILeader()) {
|
||||
final ZkNodeProps message = ZkNodeProps.load(head);
|
||||
final String operation = message
|
||||
.getStr(QUEUE_OPERATION);
|
||||
clusterState = processMessage(clusterState, message, operation);
|
||||
zkClient.setData(ZkStateReader.CLUSTER_STATE,
|
||||
ZkStateReader.toJSON(clusterState), true);
|
||||
|
||||
workQueue.poll();
|
||||
|
||||
head = workQueue.peek();
|
||||
}
|
||||
workQueue.poll();
|
||||
|
||||
head = workQueue.peek();
|
||||
}
|
||||
} catch (KeeperException e) {
|
||||
if (e.code() == KeeperException.Code.SESSIONEXPIRED
|
||||
|| e.code() == KeeperException.Code.CONNECTIONLOSS) {
|
||||
log.warn("Solr cannot talk to ZK");
|
||||
return;
|
||||
}
|
||||
SolrException.log(log, "", e);
|
||||
throw new ZooKeeperException(SolrException.ErrorCode.SERVER_ERROR,
|
||||
"", e);
|
||||
} catch (InterruptedException e) {
|
||||
Thread.currentThread().interrupt();
|
||||
}
|
||||
} catch (KeeperException e) {
|
||||
if (e.code() == KeeperException.Code.SESSIONEXPIRED) {
|
||||
log.warn("Solr cannot talk to ZK, exiting Overseer work queue loop", e);
|
||||
return;
|
||||
}
|
||||
log.error("Exception in Overseer work queue loop", e);
|
||||
} catch (InterruptedException e) {
|
||||
Thread.currentThread().interrupt();
|
||||
return;
|
||||
|
||||
} catch (Exception e) {
|
||||
log.error("Exception in Overseer work queue loop", e);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
log.info("Starting to work on the main queue");
|
||||
while (!this.isClosed && amILeader()) {
|
||||
|
@ -146,17 +145,17 @@ public class Overseer {
|
|||
while (workQueue.poll() != null);
|
||||
|
||||
} catch (KeeperException e) {
|
||||
if (e.code() == KeeperException.Code.SESSIONEXPIRED
|
||||
|| e.code() == KeeperException.Code.CONNECTIONLOSS) {
|
||||
log.warn("Overseer cannot talk to ZK");
|
||||
if (e.code() == KeeperException.Code.SESSIONEXPIRED) {
|
||||
log.warn("Solr cannot talk to ZK, exiting Overseer main queue loop", e);
|
||||
return;
|
||||
}
|
||||
SolrException.log(log, "", e);
|
||||
throw new ZooKeeperException(SolrException.ErrorCode.SERVER_ERROR,
|
||||
"", e);
|
||||
log.error("Exception in Overseer main queue loop", e);
|
||||
} catch (InterruptedException e) {
|
||||
Thread.currentThread().interrupt();
|
||||
return;
|
||||
|
||||
} catch (Exception e) {
|
||||
log.error("Exception in Overseer main queue loop", e);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -40,7 +40,7 @@ import org.slf4j.LoggerFactory;
|
|||
/**
|
||||
* A {@link DirectoryFactory} impl base class for caching Directory instances
|
||||
* per path. Most DirectoryFactory implementations will want to extend this
|
||||
* class and simply implement {@link DirectoryFactory#create(String)}.
|
||||
* class and simply implement {@link DirectoryFactory#create(String, DirContext)}.
|
||||
*
|
||||
*/
|
||||
public abstract class CachingDirectoryFactory extends DirectoryFactory {
|
||||
|
@ -202,7 +202,7 @@ public abstract class CachingDirectoryFactory extends DirectoryFactory {
|
|||
}
|
||||
|
||||
@Override
|
||||
protected abstract Directory create(String path) throws IOException;
|
||||
protected abstract Directory create(String path, DirContext dirContext) throws IOException;
|
||||
|
||||
@Override
|
||||
public boolean exists(String path) {
|
||||
|
@ -218,9 +218,9 @@ public abstract class CachingDirectoryFactory extends DirectoryFactory {
|
|||
* java.lang.String)
|
||||
*/
|
||||
@Override
|
||||
public final Directory get(String path, String rawLockType)
|
||||
public final Directory get(String path, DirContext dirContext, String rawLockType)
|
||||
throws IOException {
|
||||
return get(path, rawLockType, false);
|
||||
return get(path, dirContext, rawLockType, false);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -230,7 +230,7 @@ public abstract class CachingDirectoryFactory extends DirectoryFactory {
|
|||
* java.lang.String, boolean)
|
||||
*/
|
||||
@Override
|
||||
public final Directory get(String path, String rawLockType, boolean forceNew)
|
||||
public final Directory get(String path, DirContext dirContext, String rawLockType, boolean forceNew)
|
||||
throws IOException {
|
||||
String fullPath = new File(path).getAbsolutePath();
|
||||
synchronized (this) {
|
||||
|
@ -264,7 +264,7 @@ public abstract class CachingDirectoryFactory extends DirectoryFactory {
|
|||
}
|
||||
|
||||
if (directory == null || forceNew) {
|
||||
directory = create(fullPath);
|
||||
directory = create(fullPath, dirContext);
|
||||
|
||||
directory = rateLimit(directory);
|
||||
|
||||
|
|
|
@ -1626,10 +1626,15 @@ public class CoreContainer
|
|||
return schema;
|
||||
}
|
||||
|
||||
private static final String DEF_SOLR_XML ="<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n" +
|
||||
"<solr persistent=\"false\">\n" +
|
||||
" <cores adminPath=\"/admin/cores\" defaultCoreName=\"" + DEFAULT_DEFAULT_CORE_NAME + "\">\n" +
|
||||
" <core name=\""+ DEFAULT_DEFAULT_CORE_NAME + "\" shard=\"${shard:}\" instanceDir=\"collection1\" />\n" +
|
||||
" </cores>\n" +
|
||||
"</solr>";
|
||||
private static final String DEF_SOLR_XML = "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n"
|
||||
+ "<solr persistent=\"false\">\n"
|
||||
+ " <cores adminPath=\"/admin/cores\" defaultCoreName=\""
|
||||
+ DEFAULT_DEFAULT_CORE_NAME
|
||||
+ "\""
|
||||
+ " host=\"${host:}\" hostPort=\"${hostPort:}\" hostContext=\"${hostContext:}\" zkClientTimeout=\"${zkClientTimeout:15000}\""
|
||||
+ ">\n"
|
||||
+ " <core name=\""
|
||||
+ DEFAULT_DEFAULT_CORE_NAME
|
||||
+ "\" shard=\"${shard:}\" collection=\"${collection:}\" instanceDir=\"collection1\" />\n"
|
||||
+ " </cores>\n" + "</solr>";
|
||||
}
|
||||
|
|
|
@ -41,6 +41,8 @@ public abstract class DirectoryFactory implements NamedListInitializedPlugin,
|
|||
// A large estimate should currently have no other side effects.
|
||||
public static final IOContext IOCONTEXT_NO_CACHE = new IOContext(new FlushInfo(10*1000*1000, 100L*1000*1000*1000));
|
||||
|
||||
// hint about what the directory contains - default is index directory
|
||||
public enum DirContext {DEFAULT, META_DATA}
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(DirectoryFactory.class.getName());
|
||||
|
||||
|
@ -71,7 +73,7 @@ public abstract class DirectoryFactory implements NamedListInitializedPlugin,
|
|||
*
|
||||
* @throws IOException If there is a low-level I/O error.
|
||||
*/
|
||||
protected abstract Directory create(String path) throws IOException;
|
||||
protected abstract Directory create(String path, DirContext dirContext) throws IOException;
|
||||
|
||||
/**
|
||||
* Returns true if a Directory exists for a given path.
|
||||
|
@ -118,7 +120,7 @@ public abstract class DirectoryFactory implements NamedListInitializedPlugin,
|
|||
*
|
||||
* @throws IOException If there is a low-level I/O error.
|
||||
*/
|
||||
public abstract Directory get(String path, String rawLockType)
|
||||
public abstract Directory get(String path, DirContext dirContext, String rawLockType)
|
||||
throws IOException;
|
||||
|
||||
/**
|
||||
|
@ -130,7 +132,7 @@ public abstract class DirectoryFactory implements NamedListInitializedPlugin,
|
|||
*
|
||||
* @throws IOException If there is a low-level I/O error.
|
||||
*/
|
||||
public abstract Directory get(String path, String rawLockType,
|
||||
public abstract Directory get(String path, DirContext dirContext, String rawLockType,
|
||||
boolean forceNew) throws IOException;
|
||||
|
||||
/**
|
||||
|
|
|
@ -22,6 +22,7 @@ import org.apache.lucene.store.LockFactory; // javadocs
|
|||
import org.apache.lucene.store.MMapDirectory;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.core.DirectoryFactory.DirContext;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
|
@ -56,7 +57,7 @@ public class MMapDirectoryFactory extends StandardDirectoryFactory {
|
|||
}
|
||||
|
||||
@Override
|
||||
protected Directory create(String path) throws IOException {
|
||||
protected Directory create(String path, DirContext dirContext) throws IOException {
|
||||
MMapDirectory mapDirectory = new MMapDirectory(new File(path), null, maxChunk);
|
||||
try {
|
||||
mapDirectory.setUseUnmap(unmapHack);
|
||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.solr.core;
|
|||
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.NIOFSDirectory;
|
||||
import org.apache.solr.core.DirectoryFactory.DirContext;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
|
@ -30,7 +31,7 @@ import java.io.IOException;
|
|||
public class NIOFSDirectoryFactory extends StandardDirectoryFactory {
|
||||
|
||||
@Override
|
||||
protected Directory create(String path) throws IOException {
|
||||
protected Directory create(String path, DirContext dirContext) throws IOException {
|
||||
return new NIOFSDirectory(new File(path));
|
||||
}
|
||||
|
||||
|
|
|
@ -25,6 +25,7 @@ import org.apache.lucene.store.FSDirectory;
|
|||
import org.apache.lucene.store.NRTCachingDirectory;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.core.DirectoryFactory.DirContext;
|
||||
|
||||
/**
|
||||
* Factory to instantiate {@link org.apache.lucene.store.NRTCachingDirectory}
|
||||
|
@ -48,7 +49,7 @@ public class NRTCachingDirectoryFactory extends StandardDirectoryFactory {
|
|||
}
|
||||
|
||||
@Override
|
||||
protected Directory create(String path) throws IOException {
|
||||
protected Directory create(String path, DirContext dirContext) throws IOException {
|
||||
return new NRTCachingDirectory(FSDirectory.open(new File(path)), maxMergeSizeMB, maxCachedMB);
|
||||
}
|
||||
|
||||
|
|
|
@ -28,7 +28,7 @@ import org.apache.lucene.store.RAMDirectory;
|
|||
public class RAMDirectoryFactory extends EphemeralDirectoryFactory {
|
||||
|
||||
@Override
|
||||
protected Directory create(String path) throws IOException {
|
||||
protected Directory create(String path, DirContext dirContext) throws IOException {
|
||||
return new RAMDirectory();
|
||||
}
|
||||
|
||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.solr.core;
|
|||
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.SimpleFSDirectory;
|
||||
import org.apache.solr.core.DirectoryFactory.DirContext;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
|
@ -30,7 +31,7 @@ import java.io.IOException;
|
|||
public class SimpleFSDirectoryFactory extends StandardDirectoryFactory {
|
||||
|
||||
@Override
|
||||
protected Directory create(String path) throws IOException {
|
||||
protected Directory create(String path, DirContext dirContext) throws IOException {
|
||||
return new SimpleFSDirectory(new File(path));
|
||||
}
|
||||
|
||||
|
|
|
@ -69,6 +69,7 @@ import org.apache.solr.common.params.SolrParams;
|
|||
import org.apache.solr.common.util.ExecutorUtil;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.common.util.SimpleOrderedMap;
|
||||
import org.apache.solr.core.DirectoryFactory.DirContext;
|
||||
import org.apache.solr.handler.SnapPuller;
|
||||
import org.apache.solr.handler.admin.ShowFileRequestHandler;
|
||||
import org.apache.solr.handler.component.DebugComponent;
|
||||
|
@ -237,7 +238,7 @@ public final class SolrCore implements SolrInfoMBean {
|
|||
Properties p = new Properties();
|
||||
Directory dir = null;
|
||||
try {
|
||||
dir = getDirectoryFactory().get(getDataDir(), getSolrConfig().indexConfig.lockType);
|
||||
dir = getDirectoryFactory().get(getDataDir(), DirContext.META_DATA, getSolrConfig().indexConfig.lockType);
|
||||
if (dir.fileExists(SnapPuller.INDEX_PROPERTIES)){
|
||||
final IndexInput input = dir.openInput(SnapPuller.INDEX_PROPERTIES, IOContext.DEFAULT);
|
||||
|
||||
|
@ -454,7 +455,7 @@ public final class SolrCore implements SolrInfoMBean {
|
|||
|
||||
if (indexExists && firstTime && !reload) {
|
||||
|
||||
Directory dir = directoryFactory.get(indexDir,
|
||||
Directory dir = directoryFactory.get(indexDir, DirContext.DEFAULT,
|
||||
getSolrConfig().indexConfig.lockType);
|
||||
try {
|
||||
if (IndexWriter.isLocked(dir)) {
|
||||
|
|
|
@ -35,7 +35,7 @@ import org.apache.lucene.store.IOContext;
|
|||
public class StandardDirectoryFactory extends CachingDirectoryFactory {
|
||||
|
||||
@Override
|
||||
protected Directory create(String path) throws IOException {
|
||||
protected Directory create(String path, DirContext dirContext) throws IOException {
|
||||
return FSDirectory.open(new File(path));
|
||||
}
|
||||
|
||||
|
|
|
@ -60,6 +60,7 @@ import org.apache.solr.core.IndexDeletionPolicyWrapper;
|
|||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.core.SolrDeletionPolicy;
|
||||
import org.apache.solr.core.SolrEventListener;
|
||||
import org.apache.solr.core.DirectoryFactory.DirContext;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.response.BinaryQueryResponseWriter;
|
||||
import org.apache.solr.response.SolrQueryResponse;
|
||||
|
@ -361,7 +362,7 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
|
|||
// use a set to workaround possible Lucene bug which returns same file
|
||||
// name multiple times
|
||||
Collection<String> files = new HashSet<String>(commit.getFileNames());
|
||||
dir = core.getDirectoryFactory().get(core.getNewIndexDir(), core.getSolrConfig().indexConfig.lockType);
|
||||
dir = core.getDirectoryFactory().get(core.getNewIndexDir(), DirContext.DEFAULT, core.getSolrConfig().indexConfig.lockType);
|
||||
try {
|
||||
|
||||
for (String fileName : files) {
|
||||
|
@ -467,7 +468,7 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
|
|||
Directory dir;
|
||||
long size = 0;
|
||||
try {
|
||||
dir = core.getDirectoryFactory().get(core.getNewIndexDir(), core.getSolrConfig().indexConfig.lockType);
|
||||
dir = core.getDirectoryFactory().get(core.getNewIndexDir(), DirContext.DEFAULT, core.getSolrConfig().indexConfig.lockType);
|
||||
try {
|
||||
size = DirectoryFactory.sizeOfDirectory(dir);
|
||||
} finally {
|
||||
|
|
|
@ -86,6 +86,7 @@ import org.apache.solr.common.util.ExecutorUtil;
|
|||
import org.apache.solr.common.util.FastInputStream;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.core.CachingDirectoryFactory.CloseListener;
|
||||
import org.apache.solr.core.DirectoryFactory.DirContext;
|
||||
import org.apache.solr.core.DirectoryFactory;
|
||||
import org.apache.solr.core.IndexDeletionPolicyWrapper;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
|
@ -369,16 +370,18 @@ public class SnapPuller {
|
|||
filesDownloaded = Collections.synchronizedList(new ArrayList<Map<String, Object>>());
|
||||
// if the generateion of master is older than that of the slave , it means they are not compatible to be copied
|
||||
// then a new index direcory to be created and all the files need to be copied
|
||||
boolean isFullCopyNeeded = IndexDeletionPolicyWrapper.getCommitTimestamp(commit) >= latestVersion || forceReplication;
|
||||
|
||||
boolean isFullCopyNeeded = IndexDeletionPolicyWrapper
|
||||
.getCommitTimestamp(commit) >= latestVersion
|
||||
|| commit.getGeneration() >= latestGeneration || forceReplication;
|
||||
|
||||
String tmpIdxDirName = "index." + new SimpleDateFormat(SnapShooter.DATE_FMT, Locale.ROOT).format(new Date());
|
||||
tmpIndex = createTempindexDir(core, tmpIdxDirName);
|
||||
|
||||
tmpIndexDir = core.getDirectoryFactory().get(tmpIndex, core.getSolrConfig().indexConfig.lockType);
|
||||
tmpIndexDir = core.getDirectoryFactory().get(tmpIndex, DirContext.DEFAULT, core.getSolrConfig().indexConfig.lockType);
|
||||
|
||||
// make sure it's the newest known index dir...
|
||||
indexDirPath = core.getNewIndexDir();
|
||||
indexDir = core.getDirectoryFactory().get(indexDirPath, core.getSolrConfig().indexConfig.lockType);
|
||||
indexDir = core.getDirectoryFactory().get(indexDirPath, DirContext.DEFAULT, core.getSolrConfig().indexConfig.lockType);
|
||||
Directory oldDirectory = null;
|
||||
|
||||
try {
|
||||
|
@ -542,7 +545,7 @@ public class SnapPuller {
|
|||
long replicationTimeTaken = (replicationTime - getReplicationStartTime()) / 1000;
|
||||
Directory dir = null;
|
||||
try {
|
||||
dir = solrCore.getDirectoryFactory().get(solrCore.getDataDir(), solrCore.getSolrConfig().indexConfig.lockType);
|
||||
dir = solrCore.getDirectoryFactory().get(solrCore.getDataDir(), DirContext.META_DATA, solrCore.getSolrConfig().indexConfig.lockType);
|
||||
|
||||
int indexCount = 1, confFilesCount = 1;
|
||||
if (props.containsKey(TIMES_INDEX_REPLICATED)) {
|
||||
|
@ -725,7 +728,7 @@ public class SnapPuller {
|
|||
String indexDir = solrCore.getIndexDir();
|
||||
|
||||
// it's okay to use null for lock factory since we know this dir will exist
|
||||
Directory dir = solrCore.getDirectoryFactory().get(indexDir, solrCore.getSolrConfig().indexConfig.lockType);
|
||||
Directory dir = solrCore.getDirectoryFactory().get(indexDir, DirContext.DEFAULT, solrCore.getSolrConfig().indexConfig.lockType);
|
||||
try {
|
||||
for (Map<String,Object> file : filesToDownload) {
|
||||
if (!dir.fileExists((String) file.get(NAME)) || downloadCompleteIndex) {
|
||||
|
@ -848,7 +851,7 @@ public class SnapPuller {
|
|||
Properties p = new Properties();
|
||||
Directory dir = null;
|
||||
try {
|
||||
dir = solrCore.getDirectoryFactory().get(solrCore.getDataDir(), solrCore.getSolrConfig().indexConfig.lockType);
|
||||
dir = solrCore.getDirectoryFactory().get(solrCore.getDataDir(), DirContext.META_DATA, solrCore.getSolrConfig().indexConfig.lockType);
|
||||
if (dir.fileExists(SnapPuller.INDEX_PROPERTIES)){
|
||||
final IndexInput input = dir.openInput(SnapPuller.INDEX_PROPERTIES, DirectoryFactory.IOCONTEXT_NO_CACHE);
|
||||
|
||||
|
|
|
@ -35,6 +35,7 @@ import org.apache.lucene.store.Lock;
|
|||
import org.apache.lucene.store.SimpleFSLockFactory;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.core.DirectoryFactory;
|
||||
import org.apache.solr.core.DirectoryFactory.DirContext;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
@ -102,7 +103,7 @@ public class SnapShooter {
|
|||
Collection<String> files = indexCommit.getFileNames();
|
||||
FileCopier fileCopier = new FileCopier();
|
||||
|
||||
Directory dir = solrCore.getDirectoryFactory().get(solrCore.getNewIndexDir(), solrCore.getSolrConfig().indexConfig.lockType);
|
||||
Directory dir = solrCore.getDirectoryFactory().get(solrCore.getNewIndexDir(), DirContext.DEFAULT, solrCore.getSolrConfig().indexConfig.lockType);
|
||||
try {
|
||||
fileCopier.copyFiles(dir, files, snapShotDir);
|
||||
} finally {
|
||||
|
|
|
@ -57,6 +57,7 @@ import org.apache.solr.core.CoreContainer;
|
|||
import org.apache.solr.core.CoreDescriptor;
|
||||
import org.apache.solr.core.DirectoryFactory;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.core.DirectoryFactory.DirContext;
|
||||
import org.apache.solr.handler.RequestHandlerBase;
|
||||
import org.apache.solr.request.LocalSolrQueryRequest;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
|
@ -367,7 +368,7 @@ public class CoreAdminHandler extends RequestHandlerBase {
|
|||
dirsToBeReleased = new Directory[dirNames.length];
|
||||
DirectoryFactory dirFactory = core.getDirectoryFactory();
|
||||
for (int i = 0; i < dirNames.length; i++) {
|
||||
Directory dir = dirFactory.get(dirNames[i], core.getSolrConfig().indexConfig.lockType);
|
||||
Directory dir = dirFactory.get(dirNames[i], DirContext.DEFAULT, core.getSolrConfig().indexConfig.lockType);
|
||||
dirsToBeReleased[i] = dir;
|
||||
// TODO: why doesn't this use the IR factory? what is going on here?
|
||||
readersToBeClosed[i] = DirectoryReader.open(dir);
|
||||
|
@ -688,6 +689,8 @@ public class CoreAdminHandler extends RequestHandlerBase {
|
|||
SolrParams params = req.getParams();
|
||||
|
||||
String cname = params.get(CoreAdminParams.CORE);
|
||||
String indexInfo = params.get(CoreAdminParams.INDEX_INFO);
|
||||
boolean isIndexInfoNeeded = Boolean.parseBoolean(null == indexInfo ? "true" : indexInfo);
|
||||
boolean doPersist = false;
|
||||
NamedList<Object> status = new SimpleOrderedMap<Object>();
|
||||
Map<String,Exception> allFailures = coreContainer.getCoreInitFailures();
|
||||
|
@ -695,7 +698,7 @@ public class CoreAdminHandler extends RequestHandlerBase {
|
|||
if (cname == null) {
|
||||
rsp.add("defaultCoreName", coreContainer.getDefaultCoreName());
|
||||
for (String name : coreContainer.getCoreNames()) {
|
||||
status.add(name, getCoreStatus(coreContainer, name));
|
||||
status.add(name, getCoreStatus(coreContainer, name, isIndexInfoNeeded));
|
||||
}
|
||||
rsp.add("initFailures", allFailures);
|
||||
} else {
|
||||
|
@ -703,7 +706,7 @@ public class CoreAdminHandler extends RequestHandlerBase {
|
|||
? Collections.singletonMap(cname, allFailures.get(cname))
|
||||
: Collections.emptyMap();
|
||||
rsp.add("initFailures", failures);
|
||||
status.add(cname, getCoreStatus(coreContainer, cname));
|
||||
status.add(cname, getCoreStatus(coreContainer, cname, isIndexInfoNeeded));
|
||||
}
|
||||
rsp.add("status", status);
|
||||
doPersist = false; // no state change
|
||||
|
@ -987,7 +990,7 @@ public class CoreAdminHandler extends RequestHandlerBase {
|
|||
|
||||
}
|
||||
|
||||
protected NamedList<Object> getCoreStatus(CoreContainer cores, String cname) throws IOException {
|
||||
protected NamedList<Object> getCoreStatus(CoreContainer cores, String cname, boolean isIndexInfoNeeded) throws IOException {
|
||||
NamedList<Object> info = new SimpleOrderedMap<Object>();
|
||||
SolrCore core = cores.getCore(cname);
|
||||
if (core != null) {
|
||||
|
@ -1000,15 +1003,17 @@ public class CoreAdminHandler extends RequestHandlerBase {
|
|||
info.add("schema", core.getSchemaResource());
|
||||
info.add("startTime", new Date(core.getStartTime()));
|
||||
info.add("uptime", System.currentTimeMillis() - core.getStartTime());
|
||||
RefCounted<SolrIndexSearcher> searcher = core.getSearcher();
|
||||
try {
|
||||
SimpleOrderedMap<Object> indexInfo = LukeRequestHandler.getIndexInfo(searcher.get().getIndexReader());
|
||||
long size = getIndexSize(core);
|
||||
indexInfo.add("sizeInBytes", size);
|
||||
indexInfo.add("size", NumberUtils.readableSize(size));
|
||||
info.add("index", indexInfo);
|
||||
} finally {
|
||||
searcher.decref();
|
||||
if (isIndexInfoNeeded) {
|
||||
RefCounted<SolrIndexSearcher> searcher = core.getSearcher();
|
||||
try {
|
||||
SimpleOrderedMap<Object> indexInfo = LukeRequestHandler.getIndexInfo(searcher.get().getIndexReader());
|
||||
long size = getIndexSize(core);
|
||||
indexInfo.add("sizeInBytes", size);
|
||||
indexInfo.add("size", NumberUtils.readableSize(size));
|
||||
info.add("index", indexInfo);
|
||||
} finally {
|
||||
searcher.decref();
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
core.close();
|
||||
|
@ -1022,9 +1027,9 @@ public class CoreAdminHandler extends RequestHandlerBase {
|
|||
long size = 0;
|
||||
try {
|
||||
if (!core.getDirectoryFactory().exists(core.getIndexDir())) {
|
||||
dir = core.getDirectoryFactory().get(core.getNewIndexDir(), core.getSolrConfig().indexConfig.lockType);
|
||||
dir = core.getDirectoryFactory().get(core.getNewIndexDir(), DirContext.DEFAULT, core.getSolrConfig().indexConfig.lockType);
|
||||
} else {
|
||||
dir = core.getDirectoryFactory().get(core.getIndexDir(), core.getSolrConfig().indexConfig.lockType);
|
||||
dir = core.getDirectoryFactory().get(core.getIndexDir(), DirContext.DEFAULT, core.getSolrConfig().indexConfig.lockType);
|
||||
}
|
||||
|
||||
try {
|
||||
|
|
|
@ -49,6 +49,7 @@ import org.apache.solr.core.DirectoryFactory;
|
|||
import org.apache.solr.core.SolrConfig;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.core.SolrInfoMBean;
|
||||
import org.apache.solr.core.DirectoryFactory.DirContext;
|
||||
import org.apache.solr.request.LocalSolrQueryRequest;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.request.SolrRequestInfo;
|
||||
|
@ -119,7 +120,7 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable,SolrIn
|
|||
|
||||
public SolrIndexSearcher(SolrCore core, String path, IndexSchema schema, SolrIndexConfig config, String name, boolean enableCache, DirectoryFactory directoryFactory) throws IOException {
|
||||
// we don't need to reserve the directory because we get it from the factory
|
||||
this(core, schema,name, core.getIndexReaderFactory().newReader(directoryFactory.get(path, config.lockType), core), true, enableCache, false, directoryFactory);
|
||||
this(core, schema,name, core.getIndexReaderFactory().newReader(directoryFactory.get(path, DirContext.DEFAULT, config.lockType), core), true, enableCache, false, directoryFactory);
|
||||
}
|
||||
|
||||
private static String getIndexDir(Directory dir) {
|
||||
|
|
|
@ -32,6 +32,7 @@ import org.apache.lucene.util.InfoStream;
|
|||
import org.apache.lucene.util.PrintStreamInfoStream;
|
||||
import org.apache.lucene.util.ThreadInterruptedException;
|
||||
import org.apache.solr.core.DirectoryFactory;
|
||||
import org.apache.solr.core.DirectoryFactory.DirContext;
|
||||
import org.apache.solr.schema.IndexSchema;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
@ -58,7 +59,7 @@ public class SolrIndexWriter extends IndexWriter {
|
|||
public static SolrIndexWriter create(String name, String path, DirectoryFactory directoryFactory, boolean create, IndexSchema schema, SolrIndexConfig config, IndexDeletionPolicy delPolicy, Codec codec, boolean forceNewDirectory) throws IOException {
|
||||
|
||||
SolrIndexWriter w = null;
|
||||
final Directory d = directoryFactory.get(path, config.lockType, forceNewDirectory);
|
||||
final Directory d = directoryFactory.get(path, DirContext.DEFAULT, config.lockType, forceNewDirectory);
|
||||
try {
|
||||
w = new SolrIndexWriter(name, path, d, create, schema,
|
||||
config, delPolicy, codec);
|
||||
|
|
|
@ -134,7 +134,13 @@ public class SignatureUpdateProcessorFactory
|
|||
if (enabled) {
|
||||
SolrInputDocument doc = cmd.getSolrInputDocument();
|
||||
List<String> currDocSigFields = null;
|
||||
boolean isPartialUpdate = DistributedUpdateProcessor.isAtomicUpdate(cmd);
|
||||
if (sigFields == null || sigFields.size() == 0) {
|
||||
if (isPartialUpdate) {
|
||||
throw new SolrException
|
||||
(ErrorCode.SERVER_ERROR,
|
||||
"Can't use SignatureUpdateProcessor with partial updates on signature fields");
|
||||
}
|
||||
Collection<String> docFields = doc.getFieldNames();
|
||||
currDocSigFields = new ArrayList<String>(docFields.size());
|
||||
currDocSigFields.addAll(docFields);
|
||||
|
@ -149,6 +155,12 @@ public class SignatureUpdateProcessorFactory
|
|||
for (String field : currDocSigFields) {
|
||||
SolrInputField f = doc.getField(field);
|
||||
if (f != null) {
|
||||
if (isPartialUpdate) {
|
||||
throw new SolrException
|
||||
(ErrorCode.SERVER_ERROR,
|
||||
"Can't use SignatureUpdateProcessor with partial update request " +
|
||||
"containing signature field: " + field);
|
||||
}
|
||||
sig.add(field);
|
||||
Object o = f.getValue();
|
||||
if (o instanceof Collection) {
|
||||
|
|
|
@ -196,7 +196,8 @@ public class SimplePostTool {
|
|||
fatal("System Property 'data' is not valid for this tool: " + mode);
|
||||
}
|
||||
String params = System.getProperty("params", "");
|
||||
urlStr = System.getProperty("url", SimplePostTool.appendParam(DEFAULT_POST_URL, params));
|
||||
urlStr = System.getProperty("url", DEFAULT_POST_URL);
|
||||
urlStr = SimplePostTool.appendParam(urlStr, params);
|
||||
URL url = new URL(urlStr);
|
||||
boolean auto = isOn(System.getProperty("auto", DEFAULT_AUTO));
|
||||
String type = System.getProperty("type");
|
||||
|
@ -800,7 +801,7 @@ public class SimplePostTool {
|
|||
" " + urlc.getResponseMessage() + " for url "+url);
|
||||
}
|
||||
} catch (IOException e) {
|
||||
warn("An error occured posting data to "+url+". Please check that Solr is running.");
|
||||
warn("An error occurred posting data to "+url+". Please check that Solr is running.");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -47,6 +47,29 @@
|
|||
</updateLog>
|
||||
</updateHandler>
|
||||
|
||||
<updateRequestProcessorChain name="dedupe">
|
||||
<processor class="org.apache.solr.update.processor.SignatureUpdateProcessorFactory">
|
||||
<bool name="enabled">true</bool>
|
||||
<bool name="overwriteDupes">true</bool>
|
||||
<str name="fields">v_t,t_field</str>
|
||||
<str name="signatureClass">org.apache.solr.update.processor.TextProfileSignature</str>
|
||||
</processor>
|
||||
<processor class="solr.RunUpdateProcessorFactory" />
|
||||
</updateRequestProcessorChain>
|
||||
<updateRequestProcessorChain name="stored_sig">
|
||||
<!-- this chain is valid even though the signature field is not
|
||||
indexed, because we are not asking for dups to be overwritten
|
||||
-->
|
||||
<processor class="org.apache.solr.update.processor.SignatureUpdateProcessorFactory">
|
||||
<bool name="enabled">true</bool>
|
||||
<str name="signatureField">non_indexed_signature_sS</str>
|
||||
<bool name="overwriteDupes">false</bool>
|
||||
<str name="fields">v_t,t_field</str>
|
||||
<str name="signatureClass">org.apache.solr.update.processor.TextProfileSignature</str>
|
||||
</processor>
|
||||
<processor class="solr.RunUpdateProcessorFactory" />
|
||||
</updateRequestProcessorChain>
|
||||
|
||||
<requestHandler name="/admin/" class="org.apache.solr.handler.admin.AdminHandlers" />
|
||||
|
||||
</config>
|
||||
|
|
|
@ -22,6 +22,7 @@ import java.io.IOException;
|
|||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.solr.SolrTestCaseJ4;
|
||||
import org.apache.solr.core.DirectoryFactory.DirContext;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
|
@ -46,7 +47,7 @@ public class AlternateDirectoryTest extends SolrTestCaseJ4 {
|
|||
public static volatile Directory dir;
|
||||
|
||||
@Override
|
||||
public Directory create(String path) throws IOException {
|
||||
public Directory create(String path, DirContext dirContext) throws IOException {
|
||||
openCalled = true;
|
||||
|
||||
return dir = newFSDirectory(new File(path));
|
||||
|
|
|
@ -22,6 +22,7 @@ import java.io.IOException;
|
|||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.solr.core.DirectoryFactory.DirContext;
|
||||
|
||||
/**
|
||||
* Test-case for RAMDirectoryFactory
|
||||
|
@ -37,13 +38,13 @@ public class RAMDirectoryFactoryTest extends LuceneTestCase {
|
|||
final Directory directory = new RAMDirectory();
|
||||
RAMDirectoryFactory factory = new RAMDirectoryFactory() {
|
||||
@Override
|
||||
protected Directory create(String path) {
|
||||
protected Directory create(String path, DirContext dirContext) {
|
||||
return directory;
|
||||
}
|
||||
};
|
||||
String path = "/fake/path";
|
||||
Directory dir1 = factory.get(path, null);
|
||||
Directory dir2 = factory.get(path, null);
|
||||
Directory dir1 = factory.get(path, DirContext.DEFAULT, null);
|
||||
Directory dir2 = factory.get(path, DirContext.DEFAULT, null);
|
||||
assertEquals("RAMDirectoryFactory should not create new instance of RefCntRamDirectory " +
|
||||
"every time open() is called for the same path", dir1, dir2);
|
||||
|
||||
|
@ -53,7 +54,7 @@ public class RAMDirectoryFactoryTest extends LuceneTestCase {
|
|||
|
||||
private void dotestOpenSucceedForEmptyDir() throws IOException {
|
||||
RAMDirectoryFactory factory = new RAMDirectoryFactory();
|
||||
Directory dir = factory.get("/fake/path", null);
|
||||
Directory dir = factory.get("/fake/path", DirContext.DEFAULT, null);
|
||||
assertNotNull("RAMDirectoryFactory should create RefCntRamDirectory even if the path doen't lead " +
|
||||
"to index directory on the file system", dir);
|
||||
factory.release(dir);
|
||||
|
|
|
@ -64,7 +64,7 @@ public class SignatureUpdateProcessorFactoryTest extends SolrTestCaseJ4 {
|
|||
chain = "dedupe"; // set the default that most tests expect
|
||||
}
|
||||
|
||||
void checkNumDocs(int n) {
|
||||
static void checkNumDocs(int n) {
|
||||
SolrQueryRequest req = req();
|
||||
try {
|
||||
assertEquals(n, req.getSearcher().getIndexReader().numDocs());
|
||||
|
@ -353,7 +353,11 @@ public class SignatureUpdateProcessorFactoryTest extends SolrTestCaseJ4 {
|
|||
}
|
||||
}
|
||||
|
||||
private void addDoc(String doc) throws Exception {
|
||||
private void addDoc(String doc) throws Exception {
|
||||
addDoc(doc, chain);
|
||||
}
|
||||
|
||||
static void addDoc(String doc, String chain) throws Exception {
|
||||
Map<String, String[]> params = new HashMap<String, String[]>();
|
||||
MultiMapSolrParams mmparams = new MultiMapSolrParams(params);
|
||||
params.put(UpdateParams.UPDATE_CHAIN, new String[] { chain });
|
||||
|
|
|
@ -0,0 +1,74 @@
|
|||
package org.apache.solr.update.processor;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import com.google.common.collect.Maps;
|
||||
import org.apache.noggit.ObjectBuilder;
|
||||
import org.apache.solr.SolrTestCaseJ4;
|
||||
import org.apache.solr.client.solrj.request.UpdateRequest;
|
||||
import org.apache.solr.common.SolrInputDocument;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import static org.apache.solr.update.processor.SignatureUpdateProcessorFactoryTest.addDoc;
|
||||
|
||||
public class TestPartialUpdateDeduplication extends SolrTestCaseJ4 {
|
||||
@BeforeClass
|
||||
public static void beforeClass() throws Exception {
|
||||
initCore("solrconfig-tlog.xml", "schema15.xml");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testPartialUpdates() throws Exception {
|
||||
SignatureUpdateProcessorFactoryTest.checkNumDocs(0);
|
||||
String chain = "dedupe";
|
||||
// partial update
|
||||
SolrInputDocument doc = new SolrInputDocument();
|
||||
doc.addField("id", "2a");
|
||||
Map<String, Object> map = Maps.newHashMap();
|
||||
map.put("set", "Hello Dude man!");
|
||||
doc.addField("v_t", map);
|
||||
UpdateRequest req = new UpdateRequest();
|
||||
req.add(doc);
|
||||
boolean exception_ok = false;
|
||||
try {
|
||||
addDoc(req.getXML(), chain);
|
||||
} catch (Exception e) {
|
||||
exception_ok = true;
|
||||
}
|
||||
assertTrue("Should have gotten an exception with partial update on signature generating field",
|
||||
exception_ok);
|
||||
|
||||
SignatureUpdateProcessorFactoryTest.checkNumDocs(0);
|
||||
addDoc(adoc("id", "2a", "v_t", "Hello Dude man!", "name", "ali babi'"), chain);
|
||||
doc = new SolrInputDocument();
|
||||
doc.addField("id", "2a");
|
||||
map = Maps.newHashMap();
|
||||
map.put("set", "name changed");
|
||||
doc.addField("name", map);
|
||||
req = new UpdateRequest();
|
||||
req.add(doc);
|
||||
addDoc(req.getXML(), chain);
|
||||
addDoc(commit(), chain);
|
||||
SignatureUpdateProcessorFactoryTest.checkNumDocs(1);
|
||||
}
|
||||
}
|
|
@ -56,6 +56,7 @@ public class SimplePostToolTest extends SolrTestCaseJ4 {
|
|||
t_web = SimplePostTool.parseArgsAndInit(args);
|
||||
|
||||
System.setProperty("params", "param1=foo¶m2=bar");
|
||||
System.setProperty("url", "http://localhost:5150/solr/update");
|
||||
t_test = SimplePostTool.parseArgsAndInit(args);
|
||||
|
||||
pf = new MockPageFetcher();
|
||||
|
@ -76,7 +77,7 @@ public class SimplePostToolTest extends SolrTestCaseJ4 {
|
|||
assertEquals(1, t_web.recursive);
|
||||
assertEquals(10, t_web.delay);
|
||||
|
||||
assertNotNull(t_test.solrUrl);
|
||||
assertEquals("http://localhost:5150/solr/update?param1=foo¶m2=bar",t_test.solrUrl.toExternalForm());
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
|
@ -28,7 +28,7 @@
|
|||
|
||||
<jmx />
|
||||
|
||||
<lib dir="../../../../dist/" regex="apache-solr-dataimporthandler-.*\.jar" />
|
||||
<lib dir="../../../../dist/" regex="solr-dataimporthandler-.*\.jar" />
|
||||
|
||||
<!-- <indexConfig> section could go here, but we want the defaults -->
|
||||
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
<lib dir="../../../../contrib/extraction/lib" />
|
||||
|
||||
<lib dir="../../../../contrib/dataimporthandler/lib/" regex=".*jar$" />
|
||||
<lib dir="../../../../dist/" regex="apache-solr-dataimporthandler-.*\.jar" />
|
||||
<lib dir="../../../../dist/" regex="solr-dataimporthandler-.*\.jar" />
|
||||
|
||||
<!-- <indexConfig> section could go here, but we want the defaults -->
|
||||
|
||||
|
|
|
@ -28,7 +28,7 @@
|
|||
|
||||
<jmx />
|
||||
|
||||
<lib dir="../../../../dist/" regex="apache-solr-dataimporthandler-.*\.jar" />
|
||||
<lib dir="../../../../dist/" regex="solr-dataimporthandler-.*\.jar" />
|
||||
|
||||
<!-- <indexConfig> section could go here, but we want the defaults -->
|
||||
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue