Merged /lucene/dev/trunk:r1432062-1433030

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene4547@1433035 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2013-01-14 18:54:22 +00:00
commit b6c9791358
115 changed files with 2839 additions and 1117 deletions

View File

@ -197,6 +197,9 @@ def checkAll(dirName):
elif link.find('lucene.apache.org/java/docs/discussion.html') != -1:
# OK
pass
elif link.find('lucene.apache.org/core/discussion.html') != -1:
# OK
pass
elif link.find('lucene.apache.org/solr/mirrors-solr-latest-redir.html') != -1:
# OK
pass

View File

@ -308,7 +308,7 @@ def checkSigs(project, urlString, version, tmpDir, isSigned):
artifact = text
artifactURL = subURL
if project == 'solr':
expected = 'apache-solr-%s' % version
expected = 'solr-%s' % version
else:
expected = 'lucene-%s' % version
if not artifact.startswith(expected):
@ -334,9 +334,9 @@ def checkSigs(project, urlString, version, tmpDir, isSigned):
'lucene-%s.tgz' % version,
'lucene-%s.zip' % version]
else:
expected = ['apache-solr-%s-src.tgz' % version,
'apache-solr-%s.tgz' % version,
'apache-solr-%s.zip' % version]
expected = ['solr-%s-src.tgz' % version,
'solr-%s.tgz' % version,
'solr-%s.zip' % version]
actual = [x[0] for x in artifacts]
if expected != actual:
@ -556,10 +556,7 @@ def unpackAndVerify(project, tmpDir, artifact, version):
# make sure it unpacks to proper subdir
l = os.listdir(destDir)
if project == 'solr':
expected = 'apache-%s-%s' % (project, version)
else:
expected = '%s-%s' % (project, version)
expected = '%s-%s' % (project, version)
if l != [expected]:
raise RuntimeError('unpack produced entries %s; expected only %s' % (l, expected))
@ -956,7 +953,6 @@ def getDistributionsForMavenChecks(tmpDir, version, baseURL):
distributionFiles = defaultdict()
for project in ('lucene', 'solr'):
distribution = '%s-%s.tgz' % (project, version)
if project == 'solr': distribution = 'apache-' + distribution
if not os.path.exists('%s/%s' % (tmpDir, distribution)):
distURL = '%s/%s/%s' % (baseURL, project, distribution)
print(' download %s...' % distribution, end=' ')
@ -1010,8 +1006,6 @@ def checkIdenticalMavenArtifacts(distributionFiles, nonMavenizedDeps, artifacts,
distFilenames = dict()
for file in distributionFiles[project]:
baseName = os.path.basename(file)
if project == 'solr': # Remove 'apache-' prefix to allow comparison to Maven artifacts
baseName = baseName.replace('apache-', '')
distFilenames[baseName] = file
for artifact in artifacts[project]:
if reJarWar.search(artifact):
@ -1348,9 +1342,9 @@ def smokeTest(baseURL, version, tmpDir, isSigned):
print()
print('Test Solr...')
checkSigs('solr', solrPath, version, tmpDir, isSigned)
for artifact in ('apache-solr-%s.tgz' % version, 'apache-solr-%s.zip' % version):
for artifact in ('solr-%s.tgz' % version, 'solr-%s.zip' % version):
unpackAndVerify('solr', tmpDir, artifact, version)
unpackAndVerify('solr', tmpDir, 'apache-solr-%s-src.tgz' % version, version)
unpackAndVerify('solr', tmpDir, 'solr-%s-src.tgz' % version, version)
print()
print('Test Maven artifacts for Lucene and Solr...')

View File

@ -19,6 +19,16 @@ Changes in backwards compatibility policy
(Nikola Tanković, Uwe Schindler, Chris Male, Mike McCandless,
Robert Muir)
* LUCENE-4677, LUCENE-4682: unpacked FSTs now use vInt to encode the node target,
to reduce their size (Mike McCandless)
* LUCENE-4678: FST now uses a paged byte[] structure instead of a
single byte[] internally, to avoid large memory spikes during
building (James Dyer, Mike McCandless)
* LUCENE-3298: FST can now be larger than 2.1 GB / 2.1 B nodes.
(James Dyer, Mike McCandless)
======================= Lucene 4.1.0 =======================
Changes in backwards compatibility policy
@ -45,7 +55,7 @@ Changes in backwards compatibility policy
Instead of calling refresh(), you should write similar code to how you reopen
a regular DirectoryReader.
- TaxonomyReader.openIfChanged (previously refresh()) no longer throws
IncosistentTaxonomyException, and supports recreate. InconsistentTaxoEx
InconsistentTaxonomyException, and supports recreate. InconsistentTaxoEx
was removed.
- ChildrenArrays was pulled out of TaxonomyReader into a top-level class.
- TaxonomyReader was made an abstract class (instead of an interface), with
@ -94,7 +104,7 @@ Changes in backwards compatibility policy
Also, the entire IndexingParams chain is now immutable. If you need to override
a setting, you should extend the relevant class.
Additionally, FacetSearchParams is now immutable, and requires all FacetRequests
to speified at initialization time. (Shai Erera)
to specified at initialization time. (Shai Erera)
* LUCENE-4647: CategoryDocumentBuilder and EnhancementsDocumentBuilder are replaced
by FacetFields and AssociationsFacetFields respectively. CategoryEnhancement and
@ -115,6 +125,10 @@ Changes in backwards compatibility policy
result, few other classes such as Aggregator and CategoryListIterator were
changed to handle bulk category ordinals. (Shai Erera)
* LUCENE-4683: CategoryListIterator and Aggregator are now per-segment. As such
their implementations no longer take a top-level IndexReader in the constructor
but rather implement a setNextReader. (Shai Erera)
New Features
* LUCENE-4226: New experimental StoredFieldsFormat that compresses chunks of
@ -152,11 +166,6 @@ New Features
* LUCENE-4515: MemoryIndex now supports adding the same field multiple
times. (Simon Willnauer)
* LUCENE-4540: Added an experimental Norm.setPackedLong, which allows
the use of VAR_INTS-encoded norms. This can be useful for cases where
you only need a few bits per-document, or where you might want exact
document length, and so on. (Robert Muir)
* LUCENE-4489: Added consumeAllTokens option to LimitTokenCountFilter
(hossman, Robert Muir)
@ -267,7 +276,7 @@ Bug Fixes
allow 1+maxMergeCount merges threads to be created, instead of just
maxMergeCount (Radim Kolar, Mike McCandless)
* LUCENE-4567: Fixed NullPointerException in analzying, fuzzy, and
* LUCENE-4567: Fixed NullPointerException in analyzing, fuzzy, and
WFST suggesters when no suggestions were added (selckin via Mike
McCandless)
@ -527,7 +536,7 @@ API Changes
StoredFieldVisitor API. (Mike McCandless)
* LUCENE-4343: Made Tokenizer.setReader final. This is a setter that should
not be overriden by subclasses: per-stream initialization should happen
not be overridden by subclasses: per-stream initialization should happen
in reset(). (Robert Muir)
* LUCENE-4377: Remove IndexInput.copyBytes(IndexOutput, long).
@ -753,7 +762,7 @@ API Changes
* LUCENE-4273: When pulling a DocsEnum, you can pass an int flags
instead of the previous boolean needsFlags; consistent with the changes
for DocsAndPositionsEnum in LUCENE-4230. Currently othe only flag
for DocsAndPositionsEnum in LUCENE-4230. Currently the only flag
is DocsEnum.FLAG_FREQS. (Robert Muir, Mike McCandless)
* LUCENE-3616: TextField(String, Reader, Store) was reduced to TextField(String, Reader),
@ -825,7 +834,7 @@ Bug Fixes
instance are already checked out and queued up but not yet flushed.
(Simon Willnauer)
* LUCENE-4282: Automaton FuzzyQuery didnt always deliver all results.
* LUCENE-4282: Automaton FuzzyQuery didn't always deliver all results.
(Johannes Christen, Uwe Schindler, Robert Muir)
* LUCENE-4289: Fix minor idf inconsistencies/inefficiencies in highlighter.
@ -1055,7 +1064,7 @@ Changes in backwards compatibility policy
Query/Weight/Scorer. If you extended Similarity directly before, you should
extend TFIDFSimilarity instead. Similarity is now a lower-level API to
implement other scoring algorithms. See MIGRATE.txt for more details.
(David Nemeskey, Simon Willnauer, Mike Mccandless, Robert Muir)
(David Nemeskey, Simon Willnauer, Mike McCandless, Robert Muir)
* LUCENE-3330: The expert visitor API in Scorer has been simplified and
extended to support arbitrary relationships. To navigate to a scorer's
@ -1163,12 +1172,12 @@ Changes in Runtime Behavior
omitNorms(true) for field "a" for 1000 documents, but then add a document with
omitNorms(false) for field "a", all documents for field "a" will have no
norms. Previously, Lucene would fill the first 1000 documents with
"fake norms" from Similarity.getDefault(). (Robert Muir, Mike Mccandless)
"fake norms" from Similarity.getDefault(). (Robert Muir, Mike McCandless)
* LUCENE-2846: When some documents contain field "a", and others do not, the
documents that don't have the field get a norm byte value of 0. Previously,
Lucene would populate "fake norms" with Similarity.getDefault() for these
documents. (Robert Muir, Mike Mccandless)
documents. (Robert Muir, Mike McCandless)
* LUCENE-2720: IndexWriter throws IndexFormatTooOldException on open, rather
than later when e.g. a merge starts.
@ -1201,13 +1210,13 @@ Changes in Runtime Behavior
update or delete on IndexWriter. By default DWPTs are flushed either on
maxBufferedDocs per DWPT or the global active used memory. Once the active
memory exceeds ramBufferSizeMB only the largest DWPT is selected for
flushing and the memory used by this DWPT is substracted from the active
flushing and the memory used by this DWPT is subtracted from the active
memory and added to a flushing memory pool, which can lead to temporarily
higher memory usage due to ongoing indexing.
- IndexWriter now can utilize ramBufferSize > 2048 MB. Each DWPT can address
up to 2048 MB memory such that the ramBufferSize is now bounded by the max
number of DWPT avaliable in the used DocumentsWriterPerThreadPool.
number of DWPT available in the used DocumentsWriterPerThreadPool.
IndexWriters net memory consumption can grow far beyond the 2048 MB limit if
the application can use all available DWPTs. To prevent a DWPT from
exhausting its address space IndexWriter will forcefully flush a DWPT if its
@ -1215,7 +1224,7 @@ Changes in Runtime Behavior
via IndexWriterConfig and defaults to 1945 MB.
Since IndexWriter flushes DWPT concurrently not all memory is released
immediately. Applications should still use a ramBufferSize significantly
lower than the JVMs avaliable heap memory since under high load multiple
lower than the JVMs available heap memory since under high load multiple
flushing DWPT can consume substantial transient memory when IO performance
is slow relative to indexing rate.
@ -1223,7 +1232,7 @@ Changes in Runtime Behavior
'currently' RAM resident documents to disk. Yet, flushes that occur while a
a full flush is running are queued and will happen after all DWPT involved
in the full flush are done flushing. Applications using multiple threads
during indexing and trigger a full flush (eg call commmit() or open a new
during indexing and trigger a full flush (eg call commit() or open a new
NRT reader) can use significantly more transient memory.
- IndexWriter#addDocument and IndexWriter.updateDocument can block indexing
@ -1266,7 +1275,7 @@ Changes in Runtime Behavior
* LUCENE-3455: QueryParserBase.newFieldQuery() will throw a ParseException if
any of the calls to the Analyzer throw an IOException. QueryParseBase.analyzeRangePart()
will throw a RuntimException if an IOException is thrown by the Analyzer.
will throw a RuntimeException if an IOException is thrown by the Analyzer.
* LUCENE-4127: IndexWriter will now throw IllegalArgumentException if
the first token of an indexed field has 0 positionIncrement
@ -1356,7 +1365,7 @@ API Changes
customized on a per-field basis. (Robert Muir)
* LUCENE-3308: DuplicateFilter keepMode and processingMode have been converted to
enums DuplicateFilter.KeepMode and DuplicateFilter.ProcessingMode repsectively.
enums DuplicateFilter.KeepMode and DuplicateFilter.ProcessingMode respectively.
* LUCENE-3483: Move Function grouping collectors from Solr to grouping module.
(Martijn van Groningen)
@ -1514,7 +1523,7 @@ New features
* LUCENE-2742: Add native per-field postings format support. Codec lets you now
register a postings format for each field and which is in turn recorded
into the index. Postings formtas are maintained on a per-segment basis and be
into the index. Postings formats are maintained on a per-segment basis and be
resolved without knowing the actual postings format used for writing the segment.
(Simon Willnauer)
@ -1722,7 +1731,7 @@ New features
- o.a.l.analysis.miscellaneous.CapitalizationFilter: A TokenFilter that applies
capitalization rules to tokens.
- o.a.l.analysis.pattern: Package for pattern-based analysis, containing a
CharFilter, Tokenizer, and Tokenfilter for transforming text with regexes.
CharFilter, Tokenizer, and TokenFilter for transforming text with regexes.
- o.a.l.analysis.synonym.SynonymFilter: A synonym filter that supports multi-word
synonyms.
- o.a.l.analysis.phonetic: Package for phonetic search, containing various
@ -1894,7 +1903,7 @@ Bug fixes
DocsAndPositionsEnum while merging (Marc Sturlese, Erick Erickson,
Robert Muir, Simon Willnauer, Mike McCandless)
* LUCENE-3589: BytesRef copy(short) didnt set length.
* LUCENE-3589: BytesRef copy(short) didn't set length.
(Peter Chang via Robert Muir)
* LUCENE-3045: fixed QueryNodeImpl.containsTag(String key) that was
@ -1997,6 +2006,51 @@ Build
XSL. (Greg Bowyer, Uwe Schindler)
======================= Lucene 3.6.2 =======================
Bug Fixes
* LUCENE-4234: Exception when FacetsCollector is used with ScoreFacetRequest,
and the number of matching documents is too large. (Gilad Barkai via Shai Erera)
* LUCENE-2686, LUCENE-3505, LUCENE-4401: Fix BooleanQuery scorers to
return correct freq().
(Koji Sekiguchi, Mike McCandless, Liu Chao, Robert Muir)
* LUCENE-2501: Fixed rare thread-safety issue that could cause
ArrayIndexOutOfBoundsException inside ByteBlockPool (Robert Muir,
Mike McCandless)
* LUCENE-4297: BooleanScorer2 would multiply the coord() factor
twice for conjunctions: for most users this is no problem, but
if you had a customized Similarity that returned something other
than 1 when overlap == maxOverlap (always the case for conjunctions),
then the score would be incorrect. (Pascal Chollet, Robert Muir)
* LUCENE-4300: BooleanQuery's rewrite was not always safe: if you
had a custom Similarity where coord(1,1) != 1F, then the rewritten
query would be scored differently. (Robert Muir)
* LUCENE-4398: If you index many different field names in your
documents then due to a bug in how it measures its RAM
usage, IndexWriter would flush each segment too early eventually
reaching the point where it flushes after every doc. (Tim Smith via
Mike McCandless)
* LUCENE-4411: when sampling is enabled for a FacetRequest, its depth
parameter is reset to the default (1), even if set otherwise.
(Gilad Barkai via Shai Erera)
* LUCENE-4635: Fixed ArrayIndexOutOfBoundsException when in-memory
terms index requires more than 2.1 GB RAM (indices with billions of
terms). (Tom Burton-West via Mike McCandless)
Documentation
* LUCENE-4302: Fix facet userguide to have HTML loose doctype like
all other javadocs. (Karl Nicholas via Uwe Schindler)
======================= Lucene 3.6.1 =======================
More information about this release, including any errata related to the
release notes, upgrade instructions, or other changes may be found online at:
@ -2043,7 +2097,7 @@ Tests
random graph tokens. (Mike McCandless)
* LUCENE-3968: factor out LookaheadTokenFilter from
MockGraphTokenFilter (Mike Mccandless)
MockGraphTokenFilter (Mike McCandless)
======================= Lucene 3.6.0 =======================
@ -2323,7 +2377,7 @@ Bug fixes
* LUCENE-3876: Fix bug where positions for a document exceeding
Integer.MAX_VALUE/2 would produce a corrupt index.
(Simon Willnauer, Mike Mccandless, Robert Muir)
(Simon Willnauer, Mike McCandless, Robert Muir)
* LUCENE-3880: UAX29URLEmailTokenizer now recognizes emails when the mailto:
scheme is prepended. (Kai Gülzau, Steve Rowe)

View File

@ -19,8 +19,8 @@ package org.apache.lucene.analysis.ja.dict;
import java.io.IOException;
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.FST.Arc;
import org.apache.lucene.util.fst.FST;
/**
* Thin wrapper around an FST with root-arc caching for Japanese.
@ -48,7 +48,7 @@ public final class TokenInfoFST {
rootCache = cacheRootArcs();
}
@SuppressWarnings("unchecked")
@SuppressWarnings({"rawtypes","unchecked"})
private FST.Arc<Long>[] cacheRootArcs() throws IOException {
FST.Arc<Long> rootCache[] = new FST.Arc[1+(cacheCeiling-0x3040)];
FST.Arc<Long> firstArc = new FST.Arc<Long>();

View File

@ -132,7 +132,7 @@ public class TokenInfoDictionaryBuilder {
System.out.println(" encode...");
PositiveIntOutputs fstOutput = PositiveIntOutputs.getSingleton(true);
Builder<Long> fstBuilder = new Builder<Long>(FST.INPUT_TYPE.BYTE2, 0, 0, true, true, Integer.MAX_VALUE, fstOutput, null, true, true);
Builder<Long> fstBuilder = new Builder<Long>(FST.INPUT_TYPE.BYTE2, 0, 0, true, true, Integer.MAX_VALUE, fstOutput, null, true, PackedInts.DEFAULT, true, 15);
IntsRef scratch = new IntsRef();
long ord = -1; // first ord will be 0
String lastValue = null;

View File

@ -113,7 +113,7 @@ public final class MemoryPostingsFormat extends PostingsFormat {
this.field = field;
this.doPackFST = doPackFST;
this.acceptableOverheadRatio = acceptableOverheadRatio;
builder = new Builder<BytesRef>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, null, doPackFST, acceptableOverheadRatio, true);
builder = new Builder<BytesRef>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, null, doPackFST, acceptableOverheadRatio, true, 15);
}
private class PostingsWriter extends PostingsConsumer {

View File

@ -230,7 +230,7 @@ and proximity searches (though sentence identification is not provided by Lucene
create, or a combination of existing and newly created components. Before
pursuing this approach, you may find it worthwhile to explore the
<a href="{@docRoot}/../analyzers-common/overview-summary.html">analyzers-common</a> library and/or ask on the
<a href="http://lucene.apache.org/java/docs/mailinglists.html"
<a href="http://lucene.apache.org/core/discussion.html"
>java-user@lucene.apache.org mailing list</a> first to see if what you
need already exists. If you are still committed to creating your own
Analyzer, have a look at the source code of any one of the many samples

View File

@ -276,13 +276,13 @@ public class BlockTreeTermsReader extends FieldsProducer {
*/
public static class Stats {
/** How many nodes in the index FST. */
public int indexNodeCount;
public long indexNodeCount;
/** How many arcs in the index FST. */
public int indexArcCount;
public long indexArcCount;
/** Byte size of the index. */
public int indexNumBytes;
public long indexNumBytes;
/** Total number of terms in the field. */
public long totalTermCount;

View File

@ -23,7 +23,6 @@ import java.util.Comparator;
import java.util.List;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexFileNames;
@ -41,6 +40,7 @@ import org.apache.lucene.util.fst.BytesRefFSTEnum;
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.NoOutputs;
import org.apache.lucene.util.fst.Util;
import org.apache.lucene.util.packed.PackedInts;
/*
TODO:
@ -187,7 +187,7 @@ public class BlockTreeTermsWriter extends FieldsConsumer {
public final static int DEFAULT_MAX_BLOCK_SIZE = 48;
//public final static boolean DEBUG = false;
private final static boolean SAVE_DOT_FILES = false;
//private final static boolean SAVE_DOT_FILES = false;
static final int OUTPUT_FLAGS_NUM_BITS = 2;
static final int OUTPUT_FLAGS_MASK = 0x3;
@ -419,7 +419,8 @@ public class BlockTreeTermsWriter extends FieldsConsumer {
final ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton();
final Builder<BytesRef> indexBuilder = new Builder<BytesRef>(FST.INPUT_TYPE.BYTE1,
0, 0, true, false, Integer.MAX_VALUE,
outputs, null, false, true);
outputs, null, false,
PackedInts.COMPACT, true, 15);
//if (DEBUG) {
// System.out.println(" compile index for prefix=" + prefix);
//}
@ -962,7 +963,9 @@ public class BlockTreeTermsWriter extends FieldsConsumer {
0, 0, true,
true, Integer.MAX_VALUE,
noOutputs,
new FindBlocks(), false, true);
new FindBlocks(), false,
PackedInts.COMPACT,
true, 15);
postingsWriter.setField(fieldInfo);
}

View File

@ -22,6 +22,7 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
@ -3475,6 +3476,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
diagnostics.put("os.version", Constants.OS_VERSION);
diagnostics.put("java.version", Constants.JAVA_VERSION);
diagnostics.put("java.vendor", Constants.JAVA_VENDOR);
diagnostics.put("timestamp", Long.toString(new Date().getTime()));
if (details != null) {
diagnostics.putAll(details);
}

View File

@ -115,15 +115,6 @@ public final class Norm {
setType(Type.FIXED_INTS_64);
this.field.setLongValue(norm);
}
/**
* Sets a packed long norm value.
* @lucene.experimental
*/
public void setPackedLong(long norm) {
setType(Type.VAR_INTS);
this.field.setLongValue(norm);
}
/**
* Sets a byte norm value

View File

@ -38,7 +38,7 @@ import org.apache.lucene.search.DocIdSetIterator;
public final class FixedBitSet extends DocIdSet implements Bits {
private final long[] bits;
private int numBits;
private final int numBits;
/** returns the number of 64 bit words it would take to hold numBits */
public static int bits2words(int numBits) {

View File

@ -36,9 +36,13 @@ import org.apache.lucene.util.packed.PackedInts;
* <p>NOTE: The algorithm is described at
* http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.24.3698</p>
*
* The parameterized type T is the output type. See the
* <p>The parameterized type T is the output type. See the
* subclasses of {@link Outputs}.
*
* <p>FSTs larger than 2.1GB are now possible (as of Lucene
* 4.2). FSTs containing more than 2.1B nodes are also now
* possible, however they cannot be packed.
*
* @lucene.experimental
*/
@ -84,22 +88,11 @@ public class Builder<T> {
/**
* Instantiates an FST/FSA builder without any pruning. A shortcut
* to {@link #Builder(FST.INPUT_TYPE, int, int, boolean,
* boolean, int, Outputs, FreezeTail, boolean, boolean)} with
* pruning options turned off.
* boolean, int, Outputs, FreezeTail, boolean, float,
* boolean, int)} with pruning options turned off.
*/
public Builder(FST.INPUT_TYPE inputType, Outputs<T> outputs) {
this(inputType, 0, 0, true, true, Integer.MAX_VALUE, outputs, null, false, PackedInts.COMPACT, true);
}
/**
* Instantiates an FST/FSA builder with {@link PackedInts#DEFAULT}
* <code>acceptableOverheadRatio</code>.
*/
public Builder(FST.INPUT_TYPE inputType, int minSuffixCount1, int minSuffixCount2, boolean doShareSuffix,
boolean doShareNonSingletonNodes, int shareMaxTailLength, Outputs<T> outputs,
FreezeTail<T> freezeTail, boolean willPackFST, boolean allowArrayArcs) {
this(inputType, minSuffixCount1, minSuffixCount2, doShareSuffix, doShareNonSingletonNodes,
shareMaxTailLength, outputs, freezeTail, willPackFST, PackedInts.DEFAULT, allowArrayArcs);
this(inputType, 0, 0, true, true, Integer.MAX_VALUE, outputs, null, false, PackedInts.COMPACT, true, 15);
}
/**
@ -147,10 +140,16 @@ public class Builder<T> {
* @param allowArrayArcs Pass false to disable the array arc optimization
* while building the FST; this will make the resulting
* FST smaller but slower to traverse.
*
* @param bytesPageBits How many bits wide to make each
* byte[] block in the BytesStore; if you know the FST
* will be large then make this larger. For example 15
* bits = 32768 byte pages.
*/
public Builder(FST.INPUT_TYPE inputType, int minSuffixCount1, int minSuffixCount2, boolean doShareSuffix,
boolean doShareNonSingletonNodes, int shareMaxTailLength, Outputs<T> outputs,
FreezeTail<T> freezeTail, boolean doPackFST, float acceptableOverheadRatio, boolean allowArrayArcs) {
FreezeTail<T> freezeTail, boolean doPackFST, float acceptableOverheadRatio, boolean allowArrayArcs,
int bytesPageBits) {
this.minSuffixCount1 = minSuffixCount1;
this.minSuffixCount2 = minSuffixCount2;
this.freezeTail = freezeTail;
@ -158,9 +157,9 @@ public class Builder<T> {
this.shareMaxTailLength = shareMaxTailLength;
this.doPackFST = doPackFST;
this.acceptableOverheadRatio = acceptableOverheadRatio;
fst = new FST<T>(inputType, outputs, doPackFST, acceptableOverheadRatio, allowArrayArcs);
fst = new FST<T>(inputType, outputs, doPackFST, acceptableOverheadRatio, allowArrayArcs, bytesPageBits);
if (doShareSuffix) {
dedupHash = new NodeHash<T>(fst);
dedupHash = new NodeHash<T>(fst, fst.bytes.getReverseReader(false));
} else {
dedupHash = null;
}
@ -174,7 +173,7 @@ public class Builder<T> {
}
}
public int getTotStateCount() {
public long getTotStateCount() {
return fst.nodeCount;
}
@ -182,12 +181,12 @@ public class Builder<T> {
return frontier[0].inputCount;
}
public int getMappedStateCount() {
public long getMappedStateCount() {
return dedupHash == null ? 0 : fst.nodeCount;
}
private CompiledNode compileNode(UnCompiledNode<T> nodeIn, int tailLength) throws IOException {
final int node;
final long node;
if (dedupHash != null && (doShareNonSingletonNodes || nodeIn.numArcs <= 1) && tailLength <= shareMaxTailLength) {
if (nodeIn.numArcs == 0) {
node = fst.addNode(nodeIn);
@ -475,7 +474,7 @@ public class Builder<T> {
fst.finish(compileNode(root, lastInput.length).node);
if (doPackFST) {
return fst.pack(3, Math.max(10, fst.getNodeCount()/4), acceptableOverheadRatio);
return fst.pack(3, Math.max(10, (int) (fst.getNodeCount()/4)), acceptableOverheadRatio);
} else {
return fst;
}
@ -513,8 +512,12 @@ public class Builder<T> {
boolean isCompiled();
}
public long fstSizeInBytes() {
return fst.sizeInBytes();
}
static final class CompiledNode implements Node {
int node;
long node;
@Override
public boolean isCompiled() {
return true;

View File

@ -0,0 +1,468 @@
package org.apache.lucene.util.fst;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.DataOutput;
// TODO: merge with PagedBytes, except PagedBytes doesn't
// let you read while writing which FST needs
class BytesStore extends DataOutput {
private final List<byte[]> blocks = new ArrayList<byte[]>();
private final int blockSize;
private final int blockBits;
private final int blockMask;
private byte[] current;
private int nextWrite;
public BytesStore(int blockBits) {
this.blockBits = blockBits;
blockSize = 1 << blockBits;
blockMask = blockSize-1;
nextWrite = blockSize;
}
/** Pulls bytes from the provided IndexInput. */
public BytesStore(DataInput in, int numBytes, int maxBlockSize) throws IOException {
int blockSize = 2;
int blockBits = 1;
while(blockSize < numBytes && blockSize < maxBlockSize) {
blockSize *= 2;
blockBits++;
}
this.blockBits = blockBits;
this.blockSize = blockSize;
this.blockMask = blockSize-1;
int left = numBytes;
while(left > 0) {
final int chunk = Math.min(blockSize, left);
byte[] block = new byte[chunk];
in.readBytes(block, 0, block.length);
blocks.add(block);
left -= chunk;
}
// So .getPosition still works
nextWrite = blocks.get(blocks.size()-1).length;
}
/** Absolute write byte; you must ensure dest is < max
* position written so far. */
public void writeByte(int dest, byte b) {
int blockIndex = dest >> blockBits;
byte[] block = blocks.get(blockIndex);
block[dest & blockMask] = b;
}
@Override
public void writeByte(byte b) {
if (nextWrite == blockSize) {
current = new byte[blockSize];
blocks.add(current);
nextWrite = 0;
}
current[nextWrite++] = b;
}
@Override
public void writeBytes(byte[] b, int offset, int len) {
while (len > 0) {
int chunk = blockSize - nextWrite;
if (len <= chunk) {
System.arraycopy(b, offset, current, nextWrite, len);
nextWrite += len;
break;
} else {
if (chunk > 0) {
System.arraycopy(b, offset, current, nextWrite, chunk);
offset += chunk;
len -= chunk;
}
current = new byte[blockSize];
blocks.add(current);
nextWrite = 0;
}
}
}
int getBlockBits() {
return blockBits;
}
/** Absolute writeBytes without changing the current
* position. Note: this cannot "grow" the bytes, so you
* must only call it on already written parts. */
void writeBytes(long dest, byte[] b, int offset, int len) {
//System.out.println(" BS.writeBytes dest=" + dest + " offset=" + offset + " len=" + len);
assert dest + len <= getPosition(): "dest=" + dest + " pos=" + getPosition() + " len=" + len;
// Note: weird: must go "backwards" because copyBytes
// calls us with overlapping src/dest. If we
// go forwards then we overwrite bytes before we can
// copy them:
/*
int blockIndex = dest >> blockBits;
int upto = dest & blockMask;
byte[] block = blocks.get(blockIndex);
while (len > 0) {
int chunk = blockSize - upto;
System.out.println(" cycle chunk=" + chunk + " len=" + len);
if (len <= chunk) {
System.arraycopy(b, offset, block, upto, len);
break;
} else {
System.arraycopy(b, offset, block, upto, chunk);
offset += chunk;
len -= chunk;
blockIndex++;
block = blocks.get(blockIndex);
upto = 0;
}
}
*/
final long end = dest + len;
int blockIndex = (int) (end >> blockBits);
int downTo = (int) (end & blockMask);
if (downTo == 0) {
blockIndex--;
downTo = blockSize;
}
byte[] block = blocks.get(blockIndex);
while (len > 0) {
//System.out.println(" cycle downTo=" + downTo + " len=" + len);
if (len <= downTo) {
//System.out.println(" final: offset=" + offset + " len=" + len + " dest=" + (downTo-len));
System.arraycopy(b, offset, block, downTo-len, len);
break;
} else {
len -= downTo;
//System.out.println(" partial: offset=" + (offset + len) + " len=" + downTo + " dest=0");
System.arraycopy(b, offset + len, block, 0, downTo);
blockIndex--;
block = blocks.get(blockIndex);
downTo = blockSize;
}
}
}
/** Absolute copy bytes self to self, without changing the
* position. Note: this cannot "grow" the bytes, so must
* only call it on already written parts. */
public void copyBytes(long src, long dest, int len) {
//System.out.println("BS.copyBytes src=" + src + " dest=" + dest + " len=" + len);
assert src < dest;
// Note: weird: must go "backwards" because copyBytes
// calls us with overlapping src/dest. If we
// go forwards then we overwrite bytes before we can
// copy them:
/*
int blockIndex = src >> blockBits;
int upto = src & blockMask;
byte[] block = blocks.get(blockIndex);
while (len > 0) {
int chunk = blockSize - upto;
System.out.println(" cycle: chunk=" + chunk + " len=" + len);
if (len <= chunk) {
writeBytes(dest, block, upto, len);
break;
} else {
writeBytes(dest, block, upto, chunk);
blockIndex++;
block = blocks.get(blockIndex);
upto = 0;
len -= chunk;
dest += chunk;
}
}
*/
long end = src + len;
int blockIndex = (int) (end >> blockBits);
int downTo = (int) (end & blockMask);
if (downTo == 0) {
blockIndex--;
downTo = blockSize;
}
byte[] block = blocks.get(blockIndex);
while (len > 0) {
//System.out.println(" cycle downTo=" + downTo);
if (len <= downTo) {
//System.out.println(" finish");
writeBytes(dest, block, downTo-len, len);
break;
} else {
//System.out.println(" partial");
len -= downTo;
writeBytes(dest + len, block, 0, downTo);
blockIndex--;
block = blocks.get(blockIndex);
downTo = blockSize;
}
}
}
/** Writes an int at the absolute position without
* changing the current pointer. */
public void writeInt(long pos, int value) {
int blockIndex = (int) (pos >> blockBits);
int upto = (int) (pos & blockMask);
byte[] block = blocks.get(blockIndex);
int shift = 24;
for(int i=0;i<4;i++) {
block[upto++] = (byte) (value >> shift);
shift -= 8;
if (upto == blockSize) {
upto = 0;
blockIndex++;
block = blocks.get(blockIndex);
}
}
}
/** Reverse from srcPos, inclusive, to destPos, inclusive. */
public void reverse(long srcPos, long destPos) {
assert srcPos < destPos;
assert destPos < getPosition();
//System.out.println("reverse src=" + srcPos + " dest=" + destPos);
int srcBlockIndex = (int) (srcPos >> blockBits);
int src = (int) (srcPos & blockMask);
byte[] srcBlock = blocks.get(srcBlockIndex);
int destBlockIndex = (int) (destPos >> blockBits);
int dest = (int) (destPos & blockMask);
byte[] destBlock = blocks.get(destBlockIndex);
//System.out.println(" srcBlock=" + srcBlockIndex + " destBlock=" + destBlockIndex);
int limit = (int) (destPos - srcPos + 1)/2;
for(int i=0;i<limit;i++) {
//System.out.println(" cycle src=" + src + " dest=" + dest);
byte b = srcBlock[src];
srcBlock[src] = destBlock[dest];
destBlock[dest] = b;
src++;
if (src == blockSize) {
srcBlockIndex++;
srcBlock = blocks.get(srcBlockIndex);
//System.out.println(" set destBlock=" + destBlock + " srcBlock=" + srcBlock);
src = 0;
}
dest--;
if (dest == -1) {
destBlockIndex--;
destBlock = blocks.get(destBlockIndex);
//System.out.println(" set destBlock=" + destBlock + " srcBlock=" + srcBlock);
dest = blockSize-1;
}
}
}
public void skipBytes(int len) {
while (len > 0) {
int chunk = blockSize - nextWrite;
if (len <= chunk) {
nextWrite += len;
break;
} else {
len -= chunk;
current = new byte[blockSize];
blocks.add(current);
nextWrite = 0;
}
}
}
public long getPosition() {
return ((long) blocks.size()-1) * blockSize + nextWrite;
}
/** Pos must be less than the max position written so far!
* Ie, you cannot "grow" the file with this! */
public void truncate(long newLen) {
assert newLen <= getPosition();
assert newLen >= 0;
int blockIndex = (int) (newLen >> blockBits);
nextWrite = (int) (newLen & blockMask);
if (nextWrite == 0) {
blockIndex--;
nextWrite = blockSize;
}
blocks.subList(blockIndex+1, blocks.size()).clear();
if (newLen == 0) {
current = null;
} else {
current = blocks.get(blockIndex);
}
assert newLen == getPosition();
}
public void finish() {
if (current != null) {
byte[] lastBuffer = new byte[nextWrite];
System.arraycopy(current, 0, lastBuffer, 0, nextWrite);
blocks.set(blocks.size()-1, lastBuffer);
current = null;
}
}
/** Writes all of our bytes to the target {@link DataOutput}. */
public void writeTo(DataOutput out) throws IOException {
for(byte[] block : blocks) {
out.writeBytes(block, 0, block.length);
}
}
public FST.BytesReader getForwardReader() {
if (blocks.size() == 1) {
return new ForwardBytesReader(blocks.get(0));
}
return new FST.BytesReader() {
private byte[] current;
private int nextBuffer;
private int nextRead = blockSize;
@Override
public byte readByte() {
if (nextRead == blockSize) {
current = blocks.get(nextBuffer++);
nextRead = 0;
}
return current[nextRead++];
}
@Override
public void skipBytes(int count) {
setPosition(getPosition() + count);
}
@Override
public void readBytes(byte[] b, int offset, int len) {
while(len > 0) {
int chunkLeft = blockSize - nextRead;
if (len <= chunkLeft) {
System.arraycopy(current, nextRead, b, offset, len);
nextRead += len;
break;
} else {
if (chunkLeft > 0) {
System.arraycopy(current, nextRead, b, offset, chunkLeft);
offset += chunkLeft;
len -= chunkLeft;
}
current = blocks.get(nextBuffer++);
nextRead = 0;
}
}
}
@Override
public long getPosition() {
return ((long) nextBuffer-1)*blockSize + nextRead;
}
@Override
public void setPosition(long pos) {
int bufferIndex = (int) (pos >> blockBits);
nextBuffer = bufferIndex+1;
current = blocks.get(bufferIndex);
nextRead = (int) (pos & blockMask);
assert getPosition() == pos;
}
@Override
public boolean reversed() {
return false;
}
};
}
public FST.BytesReader getReverseReader() {
return getReverseReader(true);
}
FST.BytesReader getReverseReader(boolean allowSingle) {
if (allowSingle && blocks.size() == 1) {
return new ReverseBytesReader(blocks.get(0));
}
return new FST.BytesReader() {
private byte[] current = blocks.size() == 0 ? null : blocks.get(0);
private int nextBuffer = -1;
private int nextRead = 0;
@Override
public byte readByte() {
if (nextRead == -1) {
current = blocks.get(nextBuffer--);
nextRead = blockSize-1;
}
return current[nextRead--];
}
@Override
public void skipBytes(int count) {
setPosition(getPosition() - count);
}
@Override
public void readBytes(byte[] b, int offset, int len) {
for(int i=0;i<len;i++) {
b[offset+i] = readByte();
}
}
@Override
public long getPosition() {
return ((long) nextBuffer+1)*blockSize + nextRead;
}
@Override
public void setPosition(long pos) {
// NOTE: a little weird because if you
// setPosition(0), the next byte you read is
// bytes[0] ... but I would expect bytes[-1] (ie,
// EOF)...?
int bufferIndex = (int) (pos >> blockBits);
nextBuffer = bufferIndex-1;
current = blocks.get(bufferIndex);
nextRead = (int) (pos & blockMask);
assert getPosition() == pos: "pos=" + pos + " getPos()=" + getPosition();
}
@Override
public boolean reversed() {
return true;
}
};
}
}

File diff suppressed because it is too large Load Diff

View File

@ -17,11 +17,11 @@ package org.apache.lucene.util.fst;
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.RamUsageEstimator;
import java.io.IOException;
/** Can next() and advance() through the terms in an FST
*
* @lucene.experimental
@ -153,8 +153,8 @@ abstract class FSTEnum<T> {
boolean found = false;
while (low <= high) {
mid = (low + high) >>> 1;
in.pos = arc.posArcsStart;
in.skip(arc.bytesPerArc*mid+1);
in.setPosition(arc.posArcsStart);
in.skipBytes(arc.bytesPerArc*mid+1);
final int midLabel = fst.readLabel(in);
final int cmp = midLabel - targetLabel;
//System.out.println(" cycle low=" + low + " high=" + high + " mid=" + mid + " midLabel=" + midLabel + " cmp=" + cmp);
@ -292,8 +292,8 @@ abstract class FSTEnum<T> {
boolean found = false;
while (low <= high) {
mid = (low + high) >>> 1;
in.pos = arc.posArcsStart;
in.skip(arc.bytesPerArc*mid+1);
in.setPosition(arc.posArcsStart);
in.skipBytes(arc.bytesPerArc*mid+1);
final int midLabel = fst.readLabel(in);
final int cmp = midLabel - targetLabel;
//System.out.println(" cycle low=" + low + " high=" + high + " mid=" + mid + " midLabel=" + midLabel + " cmp=" + cmp);

View File

@ -0,0 +1,62 @@
package org.apache.lucene.util.fst;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// TODO: can we use just ByteArrayDataInput...? need to
// add a .skipBytes to DataInput.. hmm and .setPosition
/** Reads from a single byte[]. */
final class ForwardBytesReader extends FST.BytesReader {
private final byte[] bytes;
private int pos;
public ForwardBytesReader(byte[] bytes) {
this.bytes = bytes;
}
@Override
public byte readByte() {
return bytes[pos++];
}
@Override
public void readBytes(byte[] b, int offset, int len) {
System.arraycopy(bytes, pos, b, offset, len);
pos += len;
}
@Override
public void skipBytes(int count) {
pos += count;
}
@Override
public long getPosition() {
return pos;
}
@Override
public void setPosition(long pos) {
this.pos = (int) pos;
}
@Override
public boolean reversed() {
return false;
}
}

View File

@ -19,22 +19,27 @@ package org.apache.lucene.util.fst;
import java.io.IOException;
import org.apache.lucene.util.packed.GrowableWriter;
import org.apache.lucene.util.packed.PackedInts;
// Used to dedup states (lookup already-frozen states)
final class NodeHash<T> {
private int[] table;
private GrowableWriter table;
private int count;
private int mask;
private final FST<T> fst;
private final FST.Arc<T> scratchArc = new FST.Arc<T>();
private final FST.BytesReader in;
public NodeHash(FST<T> fst) {
table = new int[16];
public NodeHash(FST<T> fst, FST.BytesReader in) {
table = new GrowableWriter(8, 16, PackedInts.COMPACT);
mask = 15;
this.fst = fst;
this.in = in;
}
private boolean nodesEqual(Builder.UnCompiledNode<T> node, int address, FST.BytesReader in) throws IOException {
private boolean nodesEqual(Builder.UnCompiledNode<T> node, long address) throws IOException {
fst.readFirstRealTargetArc(address, scratchArc, in);
if (scratchArc.bytesPerArc != 0 && node.numArcs != scratchArc.numArcs) {
return false;
@ -73,7 +78,8 @@ final class NodeHash<T> {
final Builder.Arc<T> arc = node.arcs[arcIdx];
//System.out.println(" label=" + arc.label + " target=" + ((Builder.CompiledNode) arc.target).node + " h=" + h + " output=" + fst.outputs.outputToString(arc.output) + " isFinal?=" + arc.isFinal);
h = PRIME * h + arc.label;
h = PRIME * h + ((Builder.CompiledNode) arc.target).node;
long n = ((Builder.CompiledNode) arc.target).node;
h = PRIME * h + (int) (n^(n>>32));
h = PRIME * h + arc.output.hashCode();
h = PRIME * h + arc.nextFinalOutput.hashCode();
if (arc.isFinal) {
@ -85,16 +91,15 @@ final class NodeHash<T> {
}
// hash code for a frozen node
private int hash(int node) throws IOException {
private int hash(long node) throws IOException {
final int PRIME = 31;
final FST.BytesReader in = fst.getBytesReader(0);
//System.out.println("hash frozen node=" + node);
int h = 0;
fst.readFirstRealTargetArc(node, scratchArc, in);
while(true) {
//System.out.println(" label=" + scratchArc.label + " target=" + scratchArc.target + " h=" + h + " output=" + fst.outputs.outputToString(scratchArc.output) + " next?=" + scratchArc.flag(4) + " final?=" + scratchArc.isFinal());
//System.out.println(" label=" + scratchArc.label + " target=" + scratchArc.target + " h=" + h + " output=" + fst.outputs.outputToString(scratchArc.output) + " next?=" + scratchArc.flag(4) + " final?=" + scratchArc.isFinal() + " pos=" + in.getPosition());
h = PRIME * h + scratchArc.label;
h = PRIME * h + scratchArc.target;
h = PRIME * h + (int) (scratchArc.target^(scratchArc.target>>32));
h = PRIME * h + scratchArc.output.hashCode();
h = PRIME * h + scratchArc.nextFinalOutput.hashCode();
if (scratchArc.isFinal()) {
@ -109,26 +114,25 @@ final class NodeHash<T> {
return h & Integer.MAX_VALUE;
}
public int add(Builder.UnCompiledNode<T> nodeIn) throws IOException {
// System.out.println("hash: add count=" + count + " vs " + table.length);
final FST.BytesReader in = fst.getBytesReader(0);
public long add(Builder.UnCompiledNode<T> nodeIn) throws IOException {
// System.out.println("hash: add count=" + count + " vs " + table.size());
final int h = hash(nodeIn);
int pos = h & mask;
int c = 0;
while(true) {
final int v = table[pos];
final long v = table.get(pos);
if (v == 0) {
// freeze & add
final int node = fst.addNode(nodeIn);
final long node = fst.addNode(nodeIn);
//System.out.println(" now freeze node=" + node);
assert hash(node) == h : "frozenHash=" + hash(node) + " vs h=" + h;
count++;
table[pos] = node;
if (table.length < 2*count) {
table.set(pos, node);
if (table.size() < 2*count) {
rehash();
}
return node;
} else if (nodesEqual(nodeIn, v, in)) {
} else if (nodesEqual(nodeIn, v)) {
// same node is already here
return v;
}
@ -139,12 +143,12 @@ final class NodeHash<T> {
}
// called only by rehash
private void addNew(int address) throws IOException {
private void addNew(long address) throws IOException {
int pos = hash(address) & mask;
int c = 0;
while(true) {
if (table[pos] == 0) {
table[pos] = address;
if (table.get(pos) == 0) {
table.set(pos, address);
break;
}
@ -154,16 +158,16 @@ final class NodeHash<T> {
}
private void rehash() throws IOException {
final int[] oldTable = table;
final GrowableWriter oldTable = table;
if (oldTable.length >= Integer.MAX_VALUE/2) {
if (oldTable.size() >= Integer.MAX_VALUE/2) {
throw new IllegalStateException("FST too large (> 2.1 GB)");
}
table = new int[2*table.length];
mask = table.length-1;
for(int idx=0;idx<oldTable.length;idx++) {
final int address = oldTable[idx];
table = new GrowableWriter(oldTable.getBitsPerValue(), 2*oldTable.size(), PackedInts.COMPACT);
mask = table.size()-1;
for(int idx=0;idx<oldTable.size();idx++) {
final long address = oldTable.get(idx);
if (address != 0) {
addNew(address);
}

View File

@ -0,0 +1,61 @@
package org.apache.lucene.util.fst;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/** Reads in reverse from a single byte[]. */
final class ReverseBytesReader extends FST.BytesReader {
private final byte[] bytes;
private int pos;
public ReverseBytesReader(byte[] bytes) {
this.bytes = bytes;
}
@Override
public byte readByte() {
return bytes[pos--];
}
@Override
public void readBytes(byte[] b, int offset, int len) {
for(int i=0;i<len;i++) {
b[offset+i] = bytes[pos--];
}
}
@Override
public void skipBytes(int count) {
pos -= count;
}
@Override
public long getPosition() {
return pos;
}
@Override
public void setPosition(long pos) {
this.pos = (int) pos;
}
@Override
public boolean reversed() {
return true;
}
}

View File

@ -39,7 +39,7 @@ public final class Util {
// TODO: would be nice not to alloc this on every lookup
final FST.Arc<T> arc = fst.getFirstArc(new FST.Arc<T>());
final FST.BytesReader fstReader = fst.getBytesReader(0);
final BytesReader fstReader = fst.getBytesReader(0);
// Accumulate output as we go
T output = fst.outputs.getNoOutput();
@ -64,7 +64,7 @@ public final class Util {
public static<T> T get(FST<T> fst, BytesRef input) throws IOException {
assert fst.inputType == FST.INPUT_TYPE.BYTE1;
final FST.BytesReader fstReader = fst.getBytesReader(0);
final BytesReader fstReader = fst.getBytesReader(0);
// TODO: would be nice not to alloc this on every lookup
final FST.Arc<T> arc = fst.getFirstArc(new FST.Arc<T>());
@ -101,7 +101,7 @@ public final class Util {
* fit this. */
public static IntsRef getByOutput(FST<Long> fst, long targetOutput) throws IOException {
final FST.BytesReader in = fst.getBytesReader(0);
final BytesReader in = fst.getBytesReader(0);
// TODO: would be nice not to alloc this on every lookup
FST.Arc<Long> arc = fst.getFirstArc(new FST.Arc<Long>());
@ -147,8 +147,8 @@ public final class Util {
boolean exact = false;
while (low <= high) {
mid = (low + high) >>> 1;
in.pos = arc.posArcsStart;
in.skip(arc.bytesPerArc*mid);
in.setPosition(arc.posArcsStart);
in.skipBytes(arc.bytesPerArc*mid);
final byte flags = in.readByte();
fst.readLabel(in);
final long minArcOutput;
@ -273,7 +273,7 @@ public final class Util {
public static class TopNSearcher<T> {
private final FST<T> fst;
private final FST.BytesReader bytesReader;
private final BytesReader bytesReader;
private final int topN;
private final int maxQueueDepth;
@ -374,7 +374,7 @@ public final class Util {
//System.out.println("search topN=" + topN);
final FST.BytesReader fstReader = fst.getBytesReader(0);
final BytesReader fstReader = fst.getBytesReader(0);
final T NO_OUTPUT = fst.outputs.getNoOutput();
// TODO: we could enable FST to sorting arcs by weight
@ -544,7 +544,9 @@ public final class Util {
* </pre>
*
* <p>
* Note: larger FSTs (a few thousand nodes) won't even render, don't bother.
* Note: larger FSTs (a few thousand nodes) won't even
* render, don't bother. If the FST is > 2.1 GB in size
* then this method will throw strange exceptions.
*
* @param sameRank
* If <code>true</code>, the resulting <code>dot</code> file will try
@ -578,7 +580,7 @@ public final class Util {
// A bitset of already seen states (target offset).
final BitSet seen = new BitSet();
seen.set(startArc.target);
seen.set((int) startArc.target);
// Shape for states.
final String stateShape = "circle";
@ -595,7 +597,7 @@ public final class Util {
emitDotState(out, "initial", "point", "white", "");
final T NO_OUTPUT = fst.outputs.getNoOutput();
final FST.BytesReader r = fst.getBytesReader(0);
final BytesReader r = fst.getBytesReader(0);
// final FST.Arc<T> scratchArc = new FST.Arc<T>();
@ -617,7 +619,7 @@ public final class Util {
finalOutput = null;
}
emitDotState(out, Integer.toString(startArc.target), isFinal ? finalStateShape : stateShape, stateColor, finalOutput == null ? "" : fst.outputs.outputToString(finalOutput));
emitDotState(out, Long.toString(startArc.target), isFinal ? finalStateShape : stateShape, stateColor, finalOutput == null ? "" : fst.outputs.outputToString(finalOutput));
}
out.write(" initial -> " + startArc.target + "\n");
@ -638,7 +640,8 @@ public final class Util {
if (FST.targetHasArcs(arc)) {
// scan all target arcs
//System.out.println(" readFirstTarget...");
final int node = arc.target;
final long node = arc.target;
fst.readFirstRealTargetArc(arc.target, arc, r);
@ -648,7 +651,7 @@ public final class Util {
//System.out.println(" cycle arc=" + arc);
// Emit the unseen state and add it to the queue for the next level.
if (arc.target >= 0 && !seen.get(arc.target)) {
if (arc.target >= 0 && !seen.get((int) arc.target)) {
/*
boolean isFinal = false;
@ -675,12 +678,12 @@ public final class Util {
finalOutput = "";
}
emitDotState(out, Integer.toString(arc.target), stateShape, stateColor, finalOutput);
emitDotState(out, Long.toString(arc.target), stateShape, stateColor, finalOutput);
// To see the node address, use this instead:
//emitDotState(out, Integer.toString(arc.target), stateShape, stateColor, String.valueOf(arc.target));
seen.set(arc.target);
seen.set((int) arc.target);
nextLevelQueue.add(new FST.Arc<T>().copyFrom(arc));
sameLevelStates.add(arc.target);
sameLevelStates.add((int) arc.target);
}
String outs;
@ -893,8 +896,8 @@ public final class Util {
// " targetLabel=" + targetLabel);
while (low <= high) {
mid = (low + high) >>> 1;
in.pos = arc.posArcsStart;
in.skip(arc.bytesPerArc * mid + 1);
in.setPosition(arc.posArcsStart);
in.skipBytes(arc.bytesPerArc * mid + 1);
final int midLabel = fst.readLabel(in);
final int cmp = midLabel - label;
// System.out.println(" cycle low=" + low + " high=" + high + " mid=" +

View File

@ -99,7 +99,7 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
createIndex("index.nocfs", false, false);
}
*/
/*
// These are only needed for the special upgrade test to verify
// that also single-segment indexes are correctly upgraded by IndexUpgrader.
@ -115,8 +115,40 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
}
*/
/*
public void testCreateMoreTermsIndex() throws Exception {
// we use a real directory name that is not cleaned up,
// because this method is only used to create backwards
// indexes:
File indexDir = new File("moreterms");
_TestUtil.rmDir(indexDir);
Directory dir = newFSDirectory(indexDir);
LogByteSizeMergePolicy mp = new LogByteSizeMergePolicy();
mp.setUseCompoundFile(false);
mp.setNoCFSRatio(1.0);
mp.setMaxCFSSegmentSizeMB(Double.POSITIVE_INFINITY);
// TODO: remove randomness
IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))
.setMergePolicy(mp);
conf.setCodec(Codec.forName("Lucene40"));
IndexWriter writer = new IndexWriter(dir, conf);
LineFileDocs docs = new LineFileDocs(null, true);
for(int i=0;i<50;i++) {
writer.addDocument(docs.nextDoc());
}
writer.close();
dir.close();
// Gives you time to copy the index out!: (there is also
// a test option to not remove temp dir...):
Thread.sleep(100000);
}
*/
final static String[] oldNames = {"40.cfs",
"40.nocfs",
"40.nocfs",
};
final String[] unsupportedNames = {"19.cfs",
@ -144,7 +176,7 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
};
final static String[] oldSingleSegmentNames = {"40.optimized.cfs",
"40.optimized.nocfs",
"40.optimized.nocfs",
};
static Map<String,Directory> oldIndexDirs;
@ -916,4 +948,15 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
dir.close();
}
}
public static final String moreTermsIndex = "moreterms.40.zip";
public void testMoreTerms() throws Exception {
File oldIndexDir = _TestUtil.getTempDir("moreterms");
_TestUtil.unzip(getDataFile(moreTermsIndex), oldIndexDir);
Directory dir = newFSDirectory(oldIndexDir);
// TODO: more tests
_TestUtil.checkIndex(dir);
dir.close();
}
}

View File

@ -22,7 +22,6 @@ import java.util.Random;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DocValues.Source;
import org.apache.lucene.index.DocValues.Type;
@ -31,12 +30,14 @@ import org.apache.lucene.search.TermStatistics;
import org.apache.lucene.search.similarities.DefaultSimilarity;
import org.apache.lucene.search.similarities.PerFieldSimilarityWrapper;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.search.similarities.Similarity.ExactSimScorer;
import org.apache.lucene.search.similarities.Similarity.SimWeight;
import org.apache.lucene.search.similarities.Similarity.SloppySimScorer;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LineFileDocs;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil;
/**
*
@ -86,39 +87,6 @@ public class TestCustomNorms extends LuceneTestCase {
dir.close();
docs.close();
}
public void testPackedNorms() throws IOException {
Directory dir = newDirectory();
IndexWriterConfig config = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
config.setSimilarity(new PackedNormSimilarity());
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, config);
int num = _TestUtil.nextInt(random(), 1, 1000);
for (int i = 0; i < num; i++) {
Document doc = new Document();
doc.add(new StringField("len", Integer.toString(i), Field.Store.YES));
StringBuilder sb = new StringBuilder();
for (int j = 0; j < i; j++) {
sb.append(" token");
}
doc.add(new TextField("content", sb.toString(), Field.Store.NO));
writer.addDocument(doc);
}
DirectoryReader ir = writer.getReader();
writer.close();
for (AtomicReaderContext context : ir.leaves()) {
AtomicReader reader = context.reader();
DocValues norms = reader.normValues("content");
assertNotNull(norms);
Source source = norms.getSource();
assertEquals(Type.VAR_INTS, source.getType());
for (int i = 0; i < reader.maxDoc(); i++) {
assertEquals(source.getInt(i), Long.parseLong(reader.document(i).get("len")));
}
}
ir.close();
dir.close();
}
public void testExceptionOnRandomType() throws IOException {
Directory dir = newDirectory();
@ -334,28 +302,5 @@ public class TestCustomNorms extends LuceneTestCase {
throw new UnsupportedOperationException();
}
}
class PackedNormSimilarity extends Similarity {
@Override
public void computeNorm(FieldInvertState state, Norm norm) {
norm.setPackedLong(state.getLength());
}
@Override
public SimWeight computeWeight(float queryBoost, CollectionStatistics collectionStats, TermStatistics... termStats) {
throw new UnsupportedOperationException();
}
@Override
public ExactSimScorer exactSimScorer(SimWeight weight, AtomicReaderContext context) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public SloppySimScorer sloppySimScorer(SimWeight weight, AtomicReaderContext context) throws IOException {
throw new UnsupportedOperationException();
}
}
}

View File

@ -0,0 +1,261 @@
package org.apache.lucene.util.fst;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.util.Arrays;
import java.util.Random;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TimeUnits;
import org.apache.lucene.util.packed.PackedInts;
import org.junit.Ignore;
import com.carrotsearch.randomizedtesting.annotations.TimeoutSuite;
@Ignore("Requires tons of heap to run (10G works)")
@TimeoutSuite(millis = 100 * TimeUnits.HOUR)
public class Test2BFST extends LuceneTestCase {
private static long LIMIT = 3L*1024*1024*1024;
public void test() throws Exception {
int[] ints = new int[7];
IntsRef input = new IntsRef(ints, 0, ints.length);
long seed = random().nextLong();
for(int doPackIter=0;doPackIter<2;doPackIter++) {
boolean doPack = doPackIter == 1;
// Build FST w/ NoOutputs and stop when nodeCount > 3B
if (!doPack) {
System.out.println("\nTEST: 3B nodes; doPack=false output=NO_OUTPUTS");
Outputs<Object> outputs = NoOutputs.getSingleton();
Object NO_OUTPUT = outputs.getNoOutput();
final Builder<Object> b = new Builder<Object>(FST.INPUT_TYPE.BYTE1, 0, 0, false, false, Integer.MAX_VALUE, outputs,
null, doPack, PackedInts.COMPACT, true, 15);
int count = 0;
Random r = new Random(seed);
int[] ints2 = new int[200];
IntsRef input2 = new IntsRef(ints2, 0, ints2.length);
while(true) {
//System.out.println("add: " + input + " -> " + output);
for(int i=10;i<ints2.length;i++) {
ints2[i] = r.nextInt(256);
}
b.add(input2, NO_OUTPUT);
count++;
if (count % 100000 == 0) {
System.out.println(count + ": " + b.fstSizeInBytes() + " bytes; " + b.getTotStateCount() + " nodes");
}
if (b.getTotStateCount() > LIMIT) {
break;
}
nextInput(r, ints2);
}
FST<Object> fst = b.finish();
System.out.println("\nTEST: now verify [fst size=" + fst.sizeInBytes() + "; nodeCount=" + fst.getNodeCount() + "; arcCount=" + fst.getArcCount() + "]");
Arrays.fill(ints2, 0);
r = new Random(seed);
for(int i=0;i<count;i++) {
if (i % 1000000 == 0) {
System.out.println(i + "...: ");
}
for(int j=10;j<ints2.length;j++) {
ints2[j] = r.nextInt(256);
}
assertEquals(NO_OUTPUT, Util.get(fst, input2));
nextInput(r, ints2);
}
System.out.println("\nTEST: enum all input/outputs");
IntsRefFSTEnum<Object> fstEnum = new IntsRefFSTEnum<Object>(fst);
Arrays.fill(ints2, 0);
r = new Random(seed);
int upto = 0;
while(true) {
IntsRefFSTEnum.InputOutput<Object> pair = fstEnum.next();
if (pair == null) {
break;
}
for(int j=10;j<ints2.length;j++) {
ints2[j] = r.nextInt(256);
}
assertEquals(input2, pair.input);
assertEquals(NO_OUTPUT, pair.output);
upto++;
nextInput(r, ints2);
}
assertEquals(count, upto);
}
// Build FST w/ ByteSequenceOutputs and stop when FST
// size = 3GB
{
System.out.println("\nTEST: 3 GB size; doPack=" + doPack + " outputs=bytes");
Outputs<BytesRef> outputs = ByteSequenceOutputs.getSingleton();
final Builder<BytesRef> b = new Builder<BytesRef>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs,
null, doPack, PackedInts.COMPACT, true, 15);
byte[] outputBytes = new byte[20];
BytesRef output = new BytesRef(outputBytes);
Arrays.fill(ints, 0);
int count = 0;
Random r = new Random(seed);
while(true) {
r.nextBytes(outputBytes);
//System.out.println("add: " + input + " -> " + output);
b.add(input, BytesRef.deepCopyOf(output));
count++;
if (count % 1000000 == 0) {
System.out.println(count + "...: " + b.fstSizeInBytes() + " bytes");
}
if (b.fstSizeInBytes() > LIMIT) {
break;
}
nextInput(r, ints);
}
FST<BytesRef> fst = b.finish();
System.out.println("\nTEST: now verify [fst size=" + fst.sizeInBytes() + "; nodeCount=" + fst.getNodeCount() + "; arcCount=" + fst.getArcCount() + "]");
r = new Random(seed);
Arrays.fill(ints, 0);
for(int i=0;i<count;i++) {
if (i % 1000000 == 0) {
System.out.println(i + "...: ");
}
r.nextBytes(outputBytes);
assertEquals(output, Util.get(fst, input));
nextInput(r, ints);
}
System.out.println("\nTEST: enum all input/outputs");
IntsRefFSTEnum<BytesRef> fstEnum = new IntsRefFSTEnum<BytesRef>(fst);
Arrays.fill(ints, 0);
r = new Random(seed);
int upto = 0;
while(true) {
IntsRefFSTEnum.InputOutput<BytesRef> pair = fstEnum.next();
if (pair == null) {
break;
}
assertEquals(input, pair.input);
r.nextBytes(outputBytes);
assertEquals(output, pair.output);
upto++;
nextInput(r, ints);
}
assertEquals(count, upto);
}
// Build FST w/ PositiveIntOutputs and stop when FST
// size = 3GB
{
System.out.println("\nTEST: 3 GB size; doPack=" + doPack + " outputs=long");
Outputs<Long> outputs = PositiveIntOutputs.getSingleton();
final Builder<Long> b = new Builder<Long>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs,
null, doPack, PackedInts.COMPACT, true, 15);
long output = 1;
Arrays.fill(ints, 0);
int count = 0;
Random r = new Random(seed);
while(true) {
//System.out.println("add: " + input + " -> " + output);
b.add(input, output);
output += 1+r.nextInt(10);
count++;
if (count % 1000000 == 0) {
System.out.println(count + "...: " + b.fstSizeInBytes() + " bytes");
}
if (b.fstSizeInBytes() > LIMIT) {
break;
}
nextInput(r, ints);
}
FST<Long> fst = b.finish();
System.out.println("\nTEST: now verify [fst size=" + fst.sizeInBytes() + "; nodeCount=" + fst.getNodeCount() + "; arcCount=" + fst.getArcCount() + "]");
Arrays.fill(ints, 0);
output = 1;
r = new Random(seed);
for(int i=0;i<count;i++) {
if (i % 1000000 == 0) {
System.out.println(i + "...: ");
}
// forward lookup:
assertEquals(output, Util.get(fst, input).longValue());
// reverse lookup:
assertEquals(input, Util.getByOutput(fst, output));
output += 1 + r.nextInt(10);
nextInput(r, ints);
}
System.out.println("\nTEST: enum all input/outputs");
IntsRefFSTEnum<Long> fstEnum = new IntsRefFSTEnum<Long>(fst);
Arrays.fill(ints, 0);
r = new Random(seed);
int upto = 0;
output = 1;
while(true) {
IntsRefFSTEnum.InputOutput<Long> pair = fstEnum.next();
if (pair == null) {
break;
}
assertEquals(input, pair.input);
assertEquals(output, pair.output.longValue());
output += 1 + r.nextInt(10);
upto++;
nextInput(r, ints);
}
assertEquals(count, upto);
}
}
}
private void nextInput(Random r, int[] ints) {
int downTo = 6;
while(downTo >= 0) {
// Must add random amounts (and not just 1) because
// otherwise FST outsmarts us and remains tiny:
ints[downTo] += 1+r.nextInt(10);
if (ints[downTo] < 256) {
break;
} else {
ints[downTo] = 0;
downTo--;
}
}
}
}

View File

@ -0,0 +1,360 @@
package org.apache.lucene.util.fst;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.util.Arrays;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil;
public class TestBytesStore extends LuceneTestCase {
public void testRandom() throws Exception {
final int iters = atLeast(10);
for(int iter=0;iter<iters;iter++) {
final int numBytes = _TestUtil.nextInt(random(), 1, 200000);
final byte[] expected = new byte[numBytes];
final int blockBits = _TestUtil.nextInt(random(), 8, 15);
final BytesStore bytes = new BytesStore(blockBits);
if (VERBOSE) {
System.out.println("TEST: iter=" + iter + " numBytes=" + numBytes + " blockBits=" + blockBits);
}
int pos = 0;
while(pos < numBytes) {
int op = random().nextInt(8);
if (VERBOSE) {
System.out.println(" cycle pos=" + pos);
}
switch(op) {
case 0:
{
// write random byte
byte b = (byte) random().nextInt(256);
if (VERBOSE) {
System.out.println(" writeByte b=" + b);
}
expected[pos++] = b;
bytes.writeByte(b);
}
break;
case 1:
{
// write random byte[]
int len = random().nextInt(Math.min(numBytes - pos, 100));
byte[] temp = new byte[len];
random().nextBytes(temp);
if (VERBOSE) {
System.out.println(" writeBytes len=" + len + " bytes=" + Arrays.toString(temp));
}
System.arraycopy(temp, 0, expected, pos, temp.length);
bytes.writeBytes(temp, 0, temp.length);
pos += len;
}
break;
case 2:
{
// write int @ absolute pos
if (pos > 4) {
int x = random().nextInt();
int randomPos = random().nextInt(pos-4);
if (VERBOSE) {
System.out.println(" abs writeInt pos=" + randomPos + " x=" + x);
}
bytes.writeInt(randomPos, x);
expected[randomPos++] = (byte) (x >> 24);
expected[randomPos++] = (byte) (x >> 16);
expected[randomPos++] = (byte) (x >> 8);
expected[randomPos++] = (byte) x;
}
}
break;
case 3:
{
// reverse bytes
if (pos > 1) {
int len = _TestUtil.nextInt(random(), 2, Math.min(100, pos));
int start;
if (len == pos) {
start = 0;
} else {
start = random().nextInt(pos - len);
}
int end = start + len - 1;
if (VERBOSE) {
System.out.println(" reverse start=" + start + " end=" + end + " len=" + len + " pos=" + pos);
}
bytes.reverse(start, end);
while(start <= end) {
byte b = expected[end];
expected[end] = expected[start];
expected[start] = b;
start++;
end--;
}
}
}
break;
case 4:
{
// abs write random byte[]
if (pos > 2) {
int randomPos = random().nextInt(pos-1);
int len = _TestUtil.nextInt(random(), 1, Math.min(pos - randomPos - 1, 100));
byte[] temp = new byte[len];
random().nextBytes(temp);
if (VERBOSE) {
System.out.println(" abs writeBytes pos=" + randomPos + " len=" + len + " bytes=" + Arrays.toString(temp));
}
System.arraycopy(temp, 0, expected, randomPos, temp.length);
bytes.writeBytes(randomPos, temp, 0, temp.length);
}
}
break;
case 5:
{
// copyBytes
if (pos > 1) {
int src = random().nextInt(pos-1);
int dest = _TestUtil.nextInt(random(), src+1, pos-1);
int len = _TestUtil.nextInt(random(), 1, Math.min(300, pos - dest));
if (VERBOSE) {
System.out.println(" copyBytes src=" + src + " dest=" + dest + " len=" + len);
}
System.arraycopy(expected, src, expected, dest, len);
bytes.copyBytes(src, dest, len);
}
}
break;
case 6:
{
// skip
int len = random().nextInt(Math.min(100, numBytes - pos));
if (VERBOSE) {
System.out.println(" skip len=" + len);
}
pos += len;
bytes.skipBytes(len);
// NOTE: must fill in zeros in case truncate was
// used, else we get false fails:
if (len > 0) {
byte[] zeros = new byte[len];
bytes.writeBytes(pos-len, zeros, 0, len);
}
}
break;
case 7:
{
// absWriteByte
if (pos > 0) {
int dest = random().nextInt(pos);
byte b = (byte) random().nextInt(256);
expected[dest] = b;
bytes.writeByte(dest, b);
}
break;
}
}
assertEquals(pos, bytes.getPosition());
if (pos > 0 && random().nextInt(50) == 17) {
// truncate
int len = _TestUtil.nextInt(random(), 1, Math.min(pos, 100));
bytes.truncate(pos - len);
pos -= len;
Arrays.fill(expected, pos, pos+len, (byte) 0);
if (VERBOSE) {
System.out.println(" truncate len=" + len + " newPos=" + pos);
}
}
if ((pos > 0 && random().nextInt(200) == 17)) {
verify(bytes, expected, pos);
}
}
BytesStore bytesToVerify;
if (random().nextBoolean()) {
if (VERBOSE) {
System.out.println("TEST: save/load final bytes");
}
Directory dir = newDirectory();
IndexOutput out = dir.createOutput("bytes", IOContext.DEFAULT);
bytes.writeTo(out);
out.close();
IndexInput in = dir.openInput("bytes", IOContext.DEFAULT);
bytesToVerify = new BytesStore(in, numBytes, _TestUtil.nextInt(random(), 256, Integer.MAX_VALUE));
in.close();
dir.close();
} else {
bytesToVerify = bytes;
}
verify(bytesToVerify, expected, numBytes);
}
}
private void verify(BytesStore bytes, byte[] expected, int totalLength) throws Exception {
assertEquals(totalLength, bytes.getPosition());
if (totalLength == 0) {
return;
}
if (VERBOSE) {
System.out.println(" verify...");
}
// First verify whole thing in one blast:
byte[] actual = new byte[totalLength];
if (random().nextBoolean()) {
if (VERBOSE) {
System.out.println(" bulk: reversed");
}
// reversed
FST.BytesReader r = bytes.getReverseReader();
assertTrue(r.reversed());
r.setPosition(totalLength-1);
r.readBytes(actual, 0, actual.length);
int start = 0;
int end = totalLength - 1;
while(start < end) {
byte b = actual[start];
actual[start] = actual[end];
actual[end] = b;
start++;
end--;
}
} else {
// forward
if (VERBOSE) {
System.out.println(" bulk: forward");
}
FST.BytesReader r = bytes.getForwardReader();
assertFalse(r.reversed());
r.readBytes(actual, 0, actual.length);
}
for(int i=0;i<totalLength;i++) {
assertEquals("byte @ index=" + i, expected[i], actual[i]);
}
FST.BytesReader r;
// Then verify ops:
boolean reversed = random().nextBoolean();
if (reversed) {
if (VERBOSE) {
System.out.println(" ops: reversed");
}
r = bytes.getReverseReader();
} else {
if (VERBOSE) {
System.out.println(" ops: forward");
}
r = bytes.getForwardReader();
}
if (totalLength > 1) {
int numOps = _TestUtil.nextInt(random(), 100, 200);
for(int op=0;op<numOps;op++) {
int numBytes = random().nextInt(Math.min(1000, totalLength-1));
int pos;
if (reversed) {
pos = _TestUtil.nextInt(random(), numBytes, totalLength-1);
} else {
pos = random().nextInt(totalLength-numBytes);
}
if (VERBOSE) {
System.out.println(" op iter=" + op + " reversed=" + reversed + " numBytes=" + numBytes + " pos=" + pos);
}
byte[] temp = new byte[numBytes];
r.setPosition(pos);
assertEquals(pos, r.getPosition());
r.readBytes(temp, 0, temp.length);
for(int i=0;i<numBytes;i++) {
byte expectedByte;
if (reversed) {
expectedByte = expected[pos - i];
} else {
expectedByte = expected[pos + i];
}
assertEquals("byte @ index=" + i, expectedByte, temp[i]);
}
int left;
int expectedPos;
if (reversed) {
expectedPos = pos-numBytes;
left = (int) r.getPosition();
} else {
expectedPos = pos+numBytes;
left = (int) (totalLength - r.getPosition());
}
assertEquals(expectedPos, r.getPosition());
if (left > 4) {
int skipBytes = random().nextInt(left-4);
int expectedInt = 0;
if (reversed) {
expectedPos -= skipBytes;
expectedInt |= (expected[expectedPos--]&0xFF)<<24;
expectedInt |= (expected[expectedPos--]&0xFF)<<16;
expectedInt |= (expected[expectedPos--]&0xFF)<<8;
expectedInt |= (expected[expectedPos--]&0xFF);
} else {
expectedPos += skipBytes;
expectedInt |= (expected[expectedPos++]&0xFF)<<24;
expectedInt |= (expected[expectedPos++]&0xFF)<<16;
expectedInt |= (expected[expectedPos++]&0xFF)<<8;
expectedInt |= (expected[expectedPos++]&0xFF);
}
if (VERBOSE) {
System.out.println(" skip numBytes=" + skipBytes);
System.out.println(" readInt");
}
r.skipBytes(skipBytes);
assertEquals(expectedInt, r.readInt());
}
}
}
}
}

View File

@ -310,7 +310,7 @@ public class TestFSTs extends LuceneTestCase {
final boolean doRewrite = random().nextBoolean();
Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, null, doRewrite, true);
Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, null, doRewrite, PackedInts.DEFAULT, true, 15);
boolean storeOrd = random().nextBoolean();
if (VERBOSE) {
@ -453,7 +453,7 @@ public class TestFSTs extends LuceneTestCase {
this.outputs = outputs;
this.doPack = doPack;
builder = new Builder<T>(inputMode == 0 ? FST.INPUT_TYPE.BYTE1 : FST.INPUT_TYPE.BYTE4, 0, prune, prune == 0, true, Integer.MAX_VALUE, outputs, null, doPack, !noArcArrays);
builder = new Builder<T>(inputMode == 0 ? FST.INPUT_TYPE.BYTE1 : FST.INPUT_TYPE.BYTE4, 0, prune, prune == 0, true, Integer.MAX_VALUE, outputs, null, doPack, PackedInts.DEFAULT, !noArcArrays, 15);
}
protected abstract T getOutput(IntsRef input, int ord) throws IOException;
@ -484,8 +484,13 @@ public class TestFSTs extends LuceneTestCase {
}
}
long tMid = System.currentTimeMillis();
System.out.println(((tMid-tStart) / 1000.0) + " sec to add all terms");
assert builder.getTermCount() == ord;
FST<T> fst = builder.finish();
long tEnd = System.currentTimeMillis();
System.out.println(((tEnd-tMid) / 1000.0) + " sec to finish/pack");
if (fst == null) {
System.out.println("FST was fully pruned!");
System.exit(0);
@ -513,6 +518,12 @@ public class TestFSTs extends LuceneTestCase {
return;
}
/*
IndexInput in = dir.openInput("fst.bin", IOContext.DEFAULT);
fst = new FST<T>(in, outputs);
in.close();
*/
System.out.println("\nNow verify...");
while(true) {
@ -576,7 +587,7 @@ public class TestFSTs extends LuceneTestCase {
}
}
// java -cp build/classes/test:build/classes/test-framework:build/classes/java:lib/junit-4.10.jar org.apache.lucene.util.fst.TestFSTs /x/tmp/allTerms3.txt out
// java -cp ../build/codecs/classes/java:../test-framework/lib/randomizedtesting-runner-2.0.8.jar:../build/core/classes/test:../build/core/classes/test-framework:../build/core/classes/java:../build/test-framework/classes/java:../test-framework/lib/junit-4.10.jar org.apache.lucene.util.fst.TestFSTs /xold/tmp/allTerms3.txt out
public static void main(String[] args) throws IOException {
int prune = 0;
int limit = Integer.MAX_VALUE;
@ -1022,7 +1033,7 @@ public class TestFSTs extends LuceneTestCase {
throws IOException {
if (FST.targetHasArcs(arc)) {
int childCount = 0;
FST.BytesReader fstReader = fst.getBytesReader(0);
BytesReader fstReader = fst.getBytesReader(0);
for (arc = fst.readFirstTargetArc(arc, arc, fstReader);;
arc = fst.readNextArc(arc, fstReader), childCount++)
{
@ -1062,7 +1073,7 @@ public class TestFSTs extends LuceneTestCase {
public void testFinalOutputOnEndState() throws Exception {
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
final Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE4, 2, 0, true, true, Integer.MAX_VALUE, outputs, null, random().nextBoolean(), true);
final Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE4, 2, 0, true, true, Integer.MAX_VALUE, outputs, null, random().nextBoolean(), PackedInts.DEFAULT, true, 15);
builder.add(Util.toUTF32("stat", new IntsRef()), 17L);
builder.add(Util.toUTF32("station", new IntsRef()), 10L);
final FST<Long> fst = builder.finish();
@ -1077,7 +1088,7 @@ public class TestFSTs extends LuceneTestCase {
public void testInternalFinalState() throws Exception {
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
final boolean willRewrite = random().nextBoolean();
final Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, null, willRewrite, true);
final Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, null, willRewrite, PackedInts.DEFAULT, true, 15);
builder.add(Util.toIntsRef(new BytesRef("stat"), new IntsRef()), outputs.getNoOutput());
builder.add(Util.toIntsRef(new BytesRef("station"), new IntsRef()), outputs.getNoOutput());
final FST<Long> fst = builder.finish();
@ -1100,7 +1111,7 @@ public class TestFSTs extends LuceneTestCase {
final Long nothing = outputs.getNoOutput();
final Builder<Long> b = new Builder<Long>(FST.INPUT_TYPE.BYTE1, outputs);
final FST<Long> fst = new FST<Long>(FST.INPUT_TYPE.BYTE1, outputs, false, PackedInts.COMPACT, true);
final FST<Long> fst = new FST<Long>(FST.INPUT_TYPE.BYTE1, outputs, false, PackedInts.COMPACT, true, 15);
final Builder.UnCompiledNode<Long> rootNode = new Builder.UnCompiledNode<Long>(b, 0);

View File

@ -46,7 +46,7 @@ public class SearchFiles {
/** Simple command-line based search demo. */
public static void main(String[] args) throws Exception {
String usage =
"Usage:\tjava org.apache.lucene.demo.SearchFiles [-index dir] [-field f] [-repeat n] [-queries file] [-query string] [-raw] [-paging hitsPerPage]\n\nSee http://lucene.apache.org/java/4_0/demo.html for details.";
"Usage:\tjava org.apache.lucene.demo.SearchFiles [-index dir] [-field f] [-repeat n] [-queries file] [-query string] [-raw] [-paging hitsPerPage]\n\nSee http://lucene.apache.org/core/4_1_0/demo/ for details.";
if (args.length > 0 && ("-h".equals(args[0]) || "-help".equals(args[0]))) {
System.out.println(usage);
System.exit(0);

View File

@ -3,7 +3,7 @@ package org.apache.lucene.facet.associations;
import java.io.IOException;
import org.apache.lucene.facet.search.PayloadIterator;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.util.BytesRef;
@ -46,12 +46,21 @@ public abstract class AssociationsPayloadIterator<T extends CategoryAssociation>
* It is assumed that all association values can be deserialized with the
* given {@link CategoryAssociation}.
*/
public AssociationsPayloadIterator(IndexReader reader, String field, T association) throws IOException {
pi = new PayloadIterator(reader, new Term(field, association.getCategoryListID()));
hasAssociations = pi.init();
public AssociationsPayloadIterator(String field, T association) throws IOException {
pi = new PayloadIterator(new Term(field, association.getCategoryListID()));
this.association = association;
}
/**
* Sets the {@link AtomicReaderContext} for which {@link #setNextDoc(int)}
* calls will be made. Returns true iff this reader has associations for any
* of the documents belonging to the association given to the constructor.
*/
public final boolean setNextReader(AtomicReaderContext context) throws IOException {
hasAssociations = pi.setNextReader(context);
return hasAssociations;
}
/**
* Skip to the requested document. Returns true iff the document has category
* association values and they were read successfully. Associations are

View File

@ -2,7 +2,6 @@ package org.apache.lucene.facet.associations;
import java.io.IOException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.util.collections.IntToFloatMap;
/*
@ -31,9 +30,8 @@ public class FloatAssociationsPayloadIterator extends AssociationsPayloadIterato
private final IntToFloatMap ordinalAssociations = new IntToFloatMap();
public FloatAssociationsPayloadIterator(IndexReader reader, String field, CategoryFloatAssociation association)
throws IOException {
super(reader, field, association);
public FloatAssociationsPayloadIterator(String field, CategoryFloatAssociation association) throws IOException {
super(field, association);
}
@Override

View File

@ -2,7 +2,6 @@ package org.apache.lucene.facet.associations;
import java.io.IOException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.util.collections.IntToIntMap;
/*
@ -31,9 +30,8 @@ public class IntAssociationsPayloadIterator extends AssociationsPayloadIterator<
private final IntToIntMap ordinalAssociations = new IntToIntMap();
public IntAssociationsPayloadIterator(IndexReader reader, String field, CategoryIntAssociation association)
throws IOException {
super(reader, field, association);
public IntAssociationsPayloadIterator(String field, CategoryIntAssociation association) throws IOException {
super(field, association);
}
@Override

View File

@ -3,13 +3,10 @@ package org.apache.lucene.facet.index.params;
import java.io.IOException;
import java.io.Serializable;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.facet.search.CategoryListIterator;
import org.apache.lucene.facet.search.PayloadCategoryListIteraor;
import org.apache.lucene.facet.search.TotalFacetCounts;
import org.apache.lucene.facet.util.PartitionsUtils;
import org.apache.lucene.index.Term;
import org.apache.lucene.util.encoding.DGapIntEncoder;
import org.apache.lucene.util.encoding.IntDecoder;
import org.apache.lucene.util.encoding.IntEncoder;
@ -98,11 +95,6 @@ public class CategoryListParams implements Serializable {
return new SortingIntEncoder(new UniqueValuesIntEncoder(new DGapIntEncoder(new VInt8IntEncoder())));
}
/**
* Equality is defined by the 'term' that defines this category list.
* Sub-classes should override this method if a more complex calculation
* is needed to ensure equality.
*/
@Override
public boolean equals(Object o) {
if (o == this) {
@ -121,29 +113,16 @@ public class CategoryListParams implements Serializable {
return this.term.equals(other.term);
}
/**
* Hashcode is similar to {@link #equals(Object)}, in that it uses
* the term that defines this category list to derive the hashcode.
* Subclasses need to ensure that equality/hashcode is correctly defined,
* or there could be side-effects in the {@link TotalFacetCounts} caching
* mechanism (as the filename for a Total Facet Counts array cache
* is dependent on the hashCode, so it should consistently return the same
* hash for identity).
*/
@Override
public int hashCode() {
return this.hashCode;
}
/**
* Create the category list iterator for the specified partition.
*/
public CategoryListIterator createCategoryListIterator(IndexReader reader,
int partition) throws IOException {
/** Create the {@link CategoryListIterator} for the specified partition. */
public CategoryListIterator createCategoryListIterator(int partition) throws IOException {
String categoryListTermStr = PartitionsUtils.partitionName(this, partition);
Term payloadTerm = new Term(term.field(), categoryListTermStr);
return new PayloadCategoryListIteraor(reader, payloadTerm,
createEncoder().createMatchingDecoder());
return new PayloadCategoryListIteraor(payloadTerm, createEncoder().createMatchingDecoder());
}
}

View File

@ -50,7 +50,7 @@ public final class AdaptiveFacetsAccumulator extends StandardFacetsAccumulator {
* Create an {@link AdaptiveFacetsAccumulator}
* @see StandardFacetsAccumulator#StandardFacetsAccumulator(FacetSearchParams, IndexReader, TaxonomyReader)
*/
public AdaptiveFacetsAccumulator(FacetSearchParams searchParams, IndexReader indexReader,
public AdaptiveFacetsAccumulator(FacetSearchParams searchParams, IndexReader indexReader,
TaxonomyReader taxonomyReader) {
super(searchParams, indexReader, taxonomyReader);
}

View File

@ -2,6 +2,7 @@ package org.apache.lucene.facet.search;
import java.io.IOException;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.util.IntsRef;
/*
@ -23,6 +24,8 @@ import org.apache.lucene.util.IntsRef;
/**
* An interface for obtaining the category ordinals of documents.
* {@link #getOrdinals(int, IntsRef)} calls are done with document IDs that are
* local to the reader given to {@link #setNextReader(AtomicReaderContext)}.
* <p>
* <b>NOTE:</b> this class operates as a key to a map, and therefore you should
* implement {@code equals()} and {@code hashCode()} for proper behavior.
@ -32,19 +35,20 @@ import org.apache.lucene.util.IntsRef;
public interface CategoryListIterator {
/**
* Initializes the iterator. This method must be called before any calls to
* {@link #getOrdinals(int, IntsRef)}, and its return value indicates whether there are
* any relevant documents for this iterator.
* Sets the {@link AtomicReaderContext} for which
* {@link #getOrdinals(int, IntsRef)} calls will be made. Returns true iff any
* of the documents in this reader have category ordinals. This method must be
* called before any calls to {@link #getOrdinals(int, IntsRef)}.
*/
public boolean init() throws IOException;
public boolean setNextReader(AtomicReaderContext context) throws IOException;
/**
* Stores the category ordinals of the given document ID in the given
* {@link IntsRef}, starting at position 0 upto {@link IntsRef#length}. Grows
* the {@link IntsRef} if it is not large enough.
*
* <p>
* <b>NOTE:</b> if the requested document does not category ordinals
* <b>NOTE:</b> if the requested document does not have category ordinals
* associated with it, {@link IntsRef#length} is set to zero.
*/
public void getOrdinals(int docID, IntsRef ints) throws IOException;

View File

@ -2,7 +2,7 @@ package org.apache.lucene.facet.search;
import java.io.IOException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IntsRef;
@ -34,17 +34,15 @@ import org.apache.lucene.util.encoding.IntDecoder;
public class PayloadCategoryListIteraor implements CategoryListIterator {
private final IntDecoder decoder;
private final IndexReader indexReader;
private final Term term;
private final PayloadIterator pi;
private final int hashCode;
public PayloadCategoryListIteraor(IndexReader indexReader, Term term, IntDecoder decoder) throws IOException {
pi = new PayloadIterator(indexReader, term);
public PayloadCategoryListIteraor(Term term, IntDecoder decoder) throws IOException {
pi = new PayloadIterator(term);
this.decoder = decoder;
hashCode = indexReader.hashCode() ^ term.hashCode();
hashCode = term.hashCode();
this.term = term;
this.indexReader = indexReader;
}
@Override
@ -58,7 +56,7 @@ public class PayloadCategoryListIteraor implements CategoryListIterator {
}
// Hash codes are the same, check equals() to avoid cases of hash-collisions.
return indexReader.equals(that.indexReader) && term.equals(that.term);
return term.equals(that.term);
}
@Override
@ -67,8 +65,8 @@ public class PayloadCategoryListIteraor implements CategoryListIterator {
}
@Override
public boolean init() throws IOException {
return pi.init();
public boolean setNextReader(AtomicReaderContext context) throws IOException {
return pi.setNextReader(context);
}
@Override

View File

@ -1,12 +1,10 @@
package org.apache.lucene.facet.search;
import java.io.IOException;
import java.util.Iterator;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
@ -42,99 +40,75 @@ import org.apache.lucene.util.BytesRef;
*/
public class PayloadIterator {
protected BytesRef data;
private TermsEnum reuseTE;
private DocsAndPositionsEnum currentDPE;
private DocsAndPositionsEnum dpe;
private boolean hasMore;
private int curDocID, curDocBase;
private int curDocID;
private final Iterator<AtomicReaderContext> leaves;
private final Term term;
public PayloadIterator(IndexReader indexReader, Term term) throws IOException {
leaves = indexReader.leaves().iterator();
public PayloadIterator(Term term) throws IOException {
this.term = term;
}
private void nextSegment() throws IOException {
/**
* Sets the {@link AtomicReaderContext} for which {@link #getPayload(int)}
* calls will be made. Returns true iff this reader has payload for any of the
* documents belonging to the {@link Term} given to the constructor.
*/
public boolean setNextReader(AtomicReaderContext context) throws IOException {
hasMore = false;
while (leaves.hasNext()) {
AtomicReaderContext ctx = leaves.next();
curDocBase = ctx.docBase;
Fields fields = ctx.reader().fields();
if (fields != null) {
Terms terms = fields.terms(term.field());
if (terms != null) {
reuseTE = terms.iterator(reuseTE);
if (reuseTE.seekExact(term.bytes(), true)) {
// this class is usually used to iterate on whatever a Query matched
// if it didn't match deleted documents, we won't receive them. if it
// did, we should iterate on them too, therefore we pass liveDocs=null
currentDPE = reuseTE.docsAndPositions(null, currentDPE, DocsAndPositionsEnum.FLAG_PAYLOADS);
if (currentDPE != null && (curDocID = currentDPE.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
hasMore = true;
break;
}
Fields fields = context.reader().fields();
if (fields != null) {
Terms terms = fields.terms(term.field());
if (terms != null) {
reuseTE = terms.iterator(reuseTE);
if (reuseTE.seekExact(term.bytes(), true)) {
// this class is usually used to iterate on whatever a Query matched
// if it didn't match deleted documents, we won't receive them. if it
// did, we should iterate on them too, therefore we pass liveDocs=null
dpe = reuseTE.docsAndPositions(null, dpe, DocsAndPositionsEnum.FLAG_PAYLOADS);
if (dpe != null && (curDocID = dpe.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
hasMore = true;
}
}
}
}
return hasMore;
}
/**
* Initialize the iterator. Should be done before the first call to
* {@link #getPayload(int)}. Returns {@code false} if no category list is
* found, or the category list has no documents.
*/
public boolean init() throws IOException {
nextSegment();
return hasMore;
}
/**
* Returns the {@link BytesRef payload} of the given document, or {@code null}
* if the document does not exist, there are no more documents in the posting
* list, or the document exists but has not payload. You should call
* {@link #init()} before the first call to this method.
* list, or the document exists but has not payload. The given document IDs
* are treated as local to the reader given to
* {@link #setNextReader(AtomicReaderContext)}.
*/
public BytesRef getPayload(int docID) throws IOException {
if (!hasMore) {
return null;
}
// re-basing docId->localDocID is done fewer times than currentDoc->globalDoc
int localDocID = docID - curDocBase;
if (curDocID > localDocID) {
if (curDocID > docID) {
// document does not exist
return null;
}
if (curDocID < localDocID) {
// look for the document either in that segment, or others
while (hasMore && (curDocID = currentDPE.advance(localDocID)) == DocIdSetIterator.NO_MORE_DOCS) {
nextSegment(); // also updates curDocID
localDocID = docID - curDocBase;
// nextSegment advances to nextDoc, so check if we still need to advance
if (curDocID >= localDocID) {
break;
if (curDocID < docID) {
curDocID = dpe.advance(docID);
if (curDocID != docID) { // requested document does not have a payload
if (curDocID == DocIdSetIterator.NO_MORE_DOCS) { // no more docs in this reader
hasMore = false;
}
}
// we break from the above loop when:
// 1. we iterated over all segments (hasMore=false)
// 2. current segment advanced to a doc, either requested or higher
if (!hasMore || curDocID != localDocID) {
return null;
}
}
// we're on the document
assert currentDPE.freq() == 1 : "expecting freq=1 (got " + currentDPE.freq() + ") term=" + term + " doc=" + (curDocID + curDocBase);
int pos = currentDPE.nextPosition();
assert pos != -1 : "no positions for term=" + term + " doc=" + (curDocID + curDocBase);
return currentDPE.getPayload();
assert dpe.freq() == 1 : "expecting freq=1 (got " + dpe.freq() + ") term=" + term + " doc=" + curDocID;
int pos = dpe.nextPosition();
assert pos != -1 : "no positions for term=" + term + " doc=" + curDocID;
return dpe.getPayload();
}
}

View File

@ -62,7 +62,7 @@ public abstract class ScoredDocIdCollector extends Collector {
}
@Override
public ScoredDocIDsIterator scoredDocIdsIterator() {
protected ScoredDocIDsIterator scoredDocIdsIterator() {
return new ScoredDocIDsIterator() {
private DocIdSetIterator docIdsIter = docIds.iterator();
@ -129,7 +129,7 @@ public abstract class ScoredDocIdCollector extends Collector {
}
@Override
public ScoredDocIDsIterator scoredDocIdsIterator() {
protected ScoredDocIDsIterator scoredDocIdsIterator() {
return new ScoredDocIDsIterator() {
private DocIdSetIterator docIdsIter = docIds.iterator();
@ -189,8 +189,7 @@ public abstract class ScoredDocIdCollector extends Collector {
* do not require scoring, it is better to set it to <i>false</i>.
*/
public static ScoredDocIdCollector create(int maxDoc, boolean enableScoring) {
return enableScoring ? new ScoringDocIdCollector(maxDoc)
: new NonScoringDocIdCollector(maxDoc);
return enableScoring ? new ScoringDocIdCollector(maxDoc) : new NonScoringDocIdCollector(maxDoc);
}
private ScoredDocIdCollector(int maxDoc) {
@ -198,13 +197,14 @@ public abstract class ScoredDocIdCollector extends Collector {
docIds = new FixedBitSet(maxDoc);
}
protected abstract ScoredDocIDsIterator scoredDocIdsIterator() throws IOException;
/** Returns the default score used when scoring is disabled. */
public abstract float getDefaultScore();
/** Set the default score. Only applicable if scoring is disabled. */
public abstract void setDefaultScore(float defaultScore);
public abstract ScoredDocIDsIterator scoredDocIdsIterator() throws IOException;
public ScoredDocIDs getScoredDocIDs() {
return new ScoredDocIDs() {

View File

@ -4,22 +4,23 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map.Entry;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.facet.search.aggregator.Aggregator;
import org.apache.lucene.facet.search.params.FacetSearchParams;
import org.apache.lucene.facet.search.params.FacetRequest;
import org.apache.lucene.facet.search.params.FacetSearchParams;
import org.apache.lucene.facet.search.results.FacetResult;
import org.apache.lucene.facet.search.results.IntermediateFacetResult;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.facet.util.PartitionsUtils;
import org.apache.lucene.facet.util.ScoredDocIdsUtils;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.util.IntsRef;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
@ -179,11 +180,11 @@ public class StandardFacetsAccumulator extends FacetsAccumulator {
List<FacetResult> res = new ArrayList<FacetResult>();
for (FacetRequest fr : searchParams.getFacetRequests()) {
FacetResultsHandler frHndlr = fr.createFacetResultsHandler(taxonomyReader);
IntermediateFacetResult tmpResult = fr2tmpRes.get(fr);
IntermediateFacetResult tmpResult = fr2tmpRes.get(fr);
if (tmpResult == null) {
continue; // do not add a null to the list.
}
FacetResult facetRes = frHndlr.renderFacetResult(tmpResult);
FacetResult facetRes = frHndlr.renderFacetResult(tmpResult);
// final labeling if allowed (because labeling is a costly operation)
if (isAllowLabeling()) {
frHndlr.labelResult(facetRes);
@ -213,18 +214,15 @@ public class StandardFacetsAccumulator extends FacetsAccumulator {
/** Check if it is worth to use complements */
protected boolean shouldComplement(ScoredDocIDs docids) {
return
mayComplement() &&
(docids.size() > indexReader.numDocs() * getComplementThreshold()) ;
return mayComplement() && (docids.size() > indexReader.numDocs() * getComplementThreshold()) ;
}
/**
* Iterate over the documents for this partition and fill the facet arrays with the correct
* count/complement count/value.
* @throws IOException If there is a low-level I/O error.
*/
private final void fillArraysForPartition(ScoredDocIDs docids,
FacetArrays facetArrays, int partition) throws IOException {
private final void fillArraysForPartition(ScoredDocIDs docids, FacetArrays facetArrays, int partition)
throws IOException {
if (isUsingComplements) {
initArraysByTotalCounts(facetArrays, partition, docids.size());
@ -236,27 +234,41 @@ public class StandardFacetsAccumulator extends FacetsAccumulator {
IntsRef ordinals = new IntsRef(32); // a reasonable start capacity for most common apps
for (Entry<CategoryListIterator, Aggregator> entry : categoryLists.entrySet()) {
CategoryListIterator categoryList = entry.getKey();
if (!categoryList.init()) {
continue;
}
Aggregator categorator = entry.getValue();
ScoredDocIDsIterator iterator = docids.iterator();
final ScoredDocIDsIterator iterator = docids.iterator();
final CategoryListIterator categoryListIter = entry.getKey();
final Aggregator aggregator = entry.getValue();
Iterator<AtomicReaderContext> contexts = indexReader.leaves().iterator();
AtomicReaderContext current = null;
int maxDoc = -1;
while (iterator.next()) {
int docID = iterator.getDocID();
categoryList.getOrdinals(docID, ordinals);
if (ordinals.length == 0) {
continue;
while (docID >= maxDoc) { // find the segment which contains this document
if (!contexts.hasNext()) {
throw new RuntimeException("ScoredDocIDs contains documents outside this reader's segments !?");
}
current = contexts.next();
maxDoc = current.docBase + current.reader().maxDoc();
if (docID < maxDoc) { // segment has docs, check if it has categories
boolean validSegment = categoryListIter.setNextReader(current);
validSegment &= aggregator.setNextReader(current);
if (!validSegment) { // if categoryList or aggregtor say it's an invalid segment, skip all docs
while (docID < maxDoc && iterator.next()) {
docID = iterator.getDocID();
}
}
}
}
categorator.aggregate(docID, iterator.getScore(), ordinals);
docID -= current.docBase;
categoryListIter.getOrdinals(docID, ordinals);
if (ordinals.length == 0) {
continue; // document does not have category ordinals
}
aggregator.aggregate(docID, iterator.getScore(), ordinals);
}
}
}
/**
* Init arrays for partition by total counts, optionally applying a factor
*/
/** Init arrays for partition by total counts, optionally applying a factor */
private final void initArraysByTotalCounts(FacetArrays facetArrays, int partition, int nAccumulatedDocs) {
int[] intArray = facetArrays.getIntArray();
totalFacetCounts.fillTotalCountsForPartition(intArray, partition);
@ -302,10 +314,9 @@ public class StandardFacetsAccumulator extends FacetsAccumulator {
for (FacetRequest facetRequest : searchParams.getFacetRequests()) {
Aggregator categoryAggregator = facetRequest.createAggregator(
isUsingComplements, facetArrays, indexReader, taxonomyReader);
isUsingComplements, facetArrays, taxonomyReader);
CategoryListIterator cli =
facetRequest.createCategoryListIterator(indexReader, taxonomyReader, searchParams, partition);
CategoryListIterator cli = facetRequest.createCategoryListIterator(taxonomyReader, searchParams, partition);
// get the aggregator
Aggregator old = categoryLists.put(cli, categoryAggregator);

View File

@ -170,7 +170,7 @@ public class TotalFacetCounts {
Aggregator aggregator = new CountingAggregator(counts[partition]);
HashMap<CategoryListIterator, Aggregator> map = new HashMap<CategoryListIterator, Aggregator>();
for (CategoryListParams clp: facetIndexingParams.getAllCategoryListParams()) {
final CategoryListIterator cli = clIteraor(clCache, clp, indexReader, partition);
final CategoryListIterator cli = clIteraor(clCache, clp, partition);
map.put(cli, aggregator);
}
return map;
@ -181,14 +181,14 @@ public class TotalFacetCounts {
return new TotalFacetCounts(taxonomy, facetIndexingParams, counts, CreationType.Computed);
}
static CategoryListIterator clIteraor(CategoryListCache clCache, CategoryListParams clp,
IndexReader indexReader, int partition) throws IOException {
static CategoryListIterator clIteraor(CategoryListCache clCache, CategoryListParams clp, int partition)
throws IOException {
if (clCache != null) {
CategoryListData cld = clCache.get(clp);
if (cld != null) {
return cld.iterator(partition);
}
}
return clp.createCategoryListIterator(indexReader, partition);
return clp.createCategoryListIterator(partition);
}
}

View File

@ -2,6 +2,7 @@ package org.apache.lucene.facet.search.aggregator;
import java.io.IOException;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.util.IntsRef;
/*
@ -22,21 +23,22 @@ import org.apache.lucene.util.IntsRef;
*/
/**
* An Aggregator is the analogue of Lucene's Collector (see
* {@link org.apache.lucene.search.Collector}), for processing the categories
* belonging to a certain document. The Aggregator is responsible for doing
* whatever it wishes with the categories it is fed, e.g., counting the number
* of times that each category appears, or performing some computation on their
* association values.
* <P>
* Much of the function of an Aggregator implementation is not described by this
* interface. This includes the constructor and getter methods to retrieve the
* results of the aggregation.
* Aggregates the categories of documents given to
* {@link #aggregate(int, float, IntsRef)}. Note that the document IDs are local
* to the reader given to {@link #setNextReader(AtomicReaderContext)}.
*
* @lucene.experimental
*/
public interface Aggregator {
/**
* Sets the {@link AtomicReaderContext} for which
* {@link #aggregate(int, float, IntsRef)} calls will be made. If this method
* returns false, {@link #aggregate(int, float, IntsRef)} should not be called
* for this reader.
*/
public boolean setNextReader(AtomicReaderContext context) throws IOException;
/**
* Aggregate the ordinals of the given document ID (and its score). The given
* ordinals offset is always zero.

View File

@ -2,6 +2,7 @@ package org.apache.lucene.facet.search.aggregator;
import java.io.IOException;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.util.IntsRef;
/*
@ -57,4 +58,9 @@ public class CountingAggregator implements Aggregator {
return counterArray == null ? 0 : counterArray.hashCode();
}
@Override
public boolean setNextReader(AtomicReaderContext context) throws IOException {
return true;
}
}

View File

@ -2,6 +2,7 @@ package org.apache.lucene.facet.search.aggregator;
import java.io.IOException;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.util.IntsRef;
/*
@ -58,4 +59,9 @@ public class ScoringAggregator implements Aggregator {
return hashCode;
}
@Override
public boolean setNextReader(AtomicReaderContext context) throws IOException {
return true;
}
}

View File

@ -6,7 +6,7 @@ import org.apache.lucene.facet.associations.CategoryFloatAssociation;
import org.apache.lucene.facet.associations.FloatAssociationsPayloadIterator;
import org.apache.lucene.facet.index.params.CategoryListParams;
import org.apache.lucene.facet.search.aggregator.Aggregator;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.collections.IntToFloatMap;
@ -39,13 +39,13 @@ public class AssociationFloatSumAggregator implements Aggregator {
protected final float[] sumArray;
protected final FloatAssociationsPayloadIterator associations;
public AssociationFloatSumAggregator(IndexReader reader, float[] sumArray) throws IOException {
this(CategoryListParams.DEFAULT_TERM.field(), reader, sumArray);
public AssociationFloatSumAggregator(float[] sumArray) throws IOException {
this(CategoryListParams.DEFAULT_TERM.field(), sumArray);
}
public AssociationFloatSumAggregator(String field, IndexReader reader, float[] sumArray) throws IOException {
public AssociationFloatSumAggregator(String field, float[] sumArray) throws IOException {
this.field = field;
associations = new FloatAssociationsPayloadIterator(reader, field, new CategoryFloatAssociation());
associations = new FloatAssociationsPayloadIterator(field, new CategoryFloatAssociation());
this.sumArray = sumArray;
}
@ -76,4 +76,9 @@ public class AssociationFloatSumAggregator implements Aggregator {
return field.hashCode();
}
@Override
public boolean setNextReader(AtomicReaderContext context) throws IOException {
return associations.setNextReader(context);
}
}

View File

@ -6,7 +6,7 @@ import org.apache.lucene.facet.associations.CategoryIntAssociation;
import org.apache.lucene.facet.associations.IntAssociationsPayloadIterator;
import org.apache.lucene.facet.index.params.CategoryListParams;
import org.apache.lucene.facet.search.aggregator.Aggregator;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.collections.IntToIntMap;
@ -39,13 +39,13 @@ public class AssociationIntSumAggregator implements Aggregator {
protected final int[] sumArray;
protected final IntAssociationsPayloadIterator associations;
public AssociationIntSumAggregator(IndexReader reader, int[] sumArray) throws IOException {
this(CategoryListParams.DEFAULT_TERM.field(), reader, sumArray);
public AssociationIntSumAggregator(int[] sumArray) throws IOException {
this(CategoryListParams.DEFAULT_TERM.field(), sumArray);
}
public AssociationIntSumAggregator(String field, IndexReader reader, int[] sumArray) throws IOException {
public AssociationIntSumAggregator(String field, int[] sumArray) throws IOException {
this.field = field;
associations = new IntAssociationsPayloadIterator(reader, field, new CategoryIntAssociation());
associations = new IntAssociationsPayloadIterator(field, new CategoryIntAssociation());
this.sumArray = sumArray;
}
@ -76,4 +76,9 @@ public class AssociationIntSumAggregator implements Aggregator {
return field.hashCode();
}
@Override
public boolean setNextReader(AtomicReaderContext context) throws IOException {
return associations.setNextReader(context);
}
}

View File

@ -6,6 +6,7 @@ import org.apache.lucene.facet.index.params.CategoryListParams;
import org.apache.lucene.facet.index.params.FacetIndexingParams;
import org.apache.lucene.facet.search.CategoryListIterator;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.util.IntsRef;
@ -56,25 +57,30 @@ public class CategoryListData {
}
/** Compute category list data for caching for faster iteration. */
CategoryListData(IndexReader reader, TaxonomyReader taxo,
FacetIndexingParams iparams, CategoryListParams clp) throws IOException {
CategoryListData(IndexReader reader, TaxonomyReader taxo, FacetIndexingParams iparams, CategoryListParams clp)
throws IOException {
final int maxDoc = reader.maxDoc();
int[][][]dpf = new int[maxDoc][][];
int[][][]dpf = new int[reader.maxDoc()][][];
int numPartitions = (int)Math.ceil(taxo.getSize()/(double)iparams.getPartitionSize());
IntsRef ordinals = new IntsRef(32);
for (int part = 0; part < numPartitions; part++) {
CategoryListIterator cli = clp.createCategoryListIterator(reader, part);
if (cli.init()) {
for (int doc = 0; doc < maxDoc; doc++) {
cli.getOrdinals(doc, ordinals);
if (ordinals.length > 0) {
if (dpf[doc] == null) {
dpf[doc] = new int[numPartitions][];
}
dpf[doc][part] = new int[ordinals.length];
for (int i = 0; i < ordinals.length; i++) {
dpf[doc][part][i] = ordinals.ints[i];
for (AtomicReaderContext context : reader.leaves()) {
CategoryListIterator cli = clp.createCategoryListIterator(part);
if (cli.setNextReader(context)) {
final int maxDoc = context.reader().maxDoc();
for (int i = 0; i < maxDoc; i++) {
cli.getOrdinals(i, ordinals);
if (ordinals.length > 0) {
int doc = i + context.docBase;
if (dpf[doc] == null) {
dpf[doc] = new int[numPartitions][];
}
if (dpf[doc][part] == null) {
dpf[doc][part] = new int[ordinals.length];
}
for (int j = 0; j < ordinals.length; j++) {
dpf[doc][part][j] = ordinals.ints[j];
}
}
}
}
@ -93,6 +99,7 @@ public class CategoryListData {
/** Internal: category list iterator over uncompressed category info in RAM */
private static class RAMCategoryListIterator implements CategoryListIterator {
private int docBase;
private final int part;
private final int[][][] dpc;
@ -102,13 +109,15 @@ public class CategoryListData {
}
@Override
public boolean init() throws IOException {
public boolean setNextReader(AtomicReaderContext context) throws IOException {
docBase = context.docBase;
return dpc != null && dpc.length > part;
}
@Override
public void getOrdinals(int docID, IntsRef ints) throws IOException {
ints.length = 0;
docID += docBase;
if (dpc.length > docID && dpc[docID] != null && dpc[docID][part] != null) {
if (ints.ints.length < dpc[docID][part].length) {
ints.grow(dpc[docID][part].length);

View File

@ -1,7 +1,5 @@
package org.apache.lucene.facet.search.params;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.facet.search.FacetArrays;
import org.apache.lucene.facet.search.aggregator.Aggregator;
import org.apache.lucene.facet.search.aggregator.ComplementCountingAggregator;
@ -47,8 +45,7 @@ public class CountFacetRequest extends FacetRequest {
}
@Override
public Aggregator createAggregator(boolean useComplements,
FacetArrays arrays, IndexReader reader, TaxonomyReader taxonomy) {
public Aggregator createAggregator(boolean useComplements, FacetArrays arrays, TaxonomyReader taxonomy) {
// we rely on that, if needed, result is cleared by arrays!
int[] a = arrays.getIntArray();
if (useComplements) {

View File

@ -2,8 +2,6 @@ package org.apache.lucene.facet.search.params;
import java.io.IOException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.facet.index.params.CategoryListParams;
import org.apache.lucene.facet.search.CategoryListIterator;
import org.apache.lucene.facet.search.FacetArrays;
@ -11,8 +9,8 @@ import org.apache.lucene.facet.search.FacetResultsHandler;
import org.apache.lucene.facet.search.TopKFacetResultsHandler;
import org.apache.lucene.facet.search.TopKInEachNodeHandler;
import org.apache.lucene.facet.search.aggregator.Aggregator;
import org.apache.lucene.facet.search.cache.CategoryListData;
import org.apache.lucene.facet.search.cache.CategoryListCache;
import org.apache.lucene.facet.search.cache.CategoryListData;
import org.apache.lucene.facet.taxonomy.CategoryPath;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
@ -314,33 +312,29 @@ public abstract class FacetRequest implements Cloneable {
* computation.
* @param arrays
* provider for facet arrays in use for current computation.
* @param indexReader
* index reader in effect.
* @param taxonomy
* reader of taxonomy in effect.
* @throws IOException If there is a low-level I/O error.
*/
public abstract Aggregator createAggregator(boolean useComplements,
FacetArrays arrays, IndexReader indexReader,
TaxonomyReader taxonomy) throws IOException;
public abstract Aggregator createAggregator(boolean useComplements, FacetArrays arrays, TaxonomyReader taxonomy)
throws IOException;
/**
* Create the category list iterator for the specified partition.
* If a non null cache is provided which contains the required data,
* use it for the iteration.
* Create the category list iterator for the specified partition. If a non
* null cache is provided which contains the required data, use it for the
* iteration.
*/
public CategoryListIterator createCategoryListIterator(IndexReader reader,
TaxonomyReader taxo, FacetSearchParams sParams, int partition)
public CategoryListIterator createCategoryListIterator(TaxonomyReader taxo, FacetSearchParams sParams, int partition)
throws IOException {
CategoryListCache clCache = sParams.getCategoryListCache();
CategoryListParams clParams = sParams.getFacetIndexingParams().getCategoryListParams(categoryPath);
if (clCache!=null) {
if (clCache != null) {
CategoryListData clData = clCache.get(clParams);
if (clData!=null) {
if (clData != null) {
return clData.iterator(partition);
}
}
return clParams.createCategoryListIterator(reader, partition);
return clParams.createCategoryListIterator(partition);
}
/**

View File

@ -1,7 +1,5 @@
package org.apache.lucene.facet.search.params;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.facet.search.FacetArrays;
import org.apache.lucene.facet.search.aggregator.Aggregator;
import org.apache.lucene.facet.search.aggregator.ScoringAggregator;
@ -38,9 +36,7 @@ public class ScoreFacetRequest extends FacetRequest {
}
@Override
public Aggregator createAggregator(boolean useComplements,
FacetArrays arrays, IndexReader reader,
TaxonomyReader taxonomy) {
public Aggregator createAggregator(boolean useComplements, FacetArrays arrays, TaxonomyReader taxonomy) {
assert !useComplements : "complements are not supported by this FacetRequest";
return new ScoringAggregator(arrays.getFloatArray());
}

View File

@ -2,8 +2,6 @@ package org.apache.lucene.facet.search.params.associations;
import java.io.IOException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.facet.search.FacetArrays;
import org.apache.lucene.facet.search.aggregator.Aggregator;
import org.apache.lucene.facet.search.aggregator.associations.AssociationFloatSumAggregator;
@ -45,10 +43,10 @@ public class AssociationFloatSumFacetRequest extends FacetRequest {
}
@Override
public Aggregator createAggregator(boolean useComplements, FacetArrays arrays, IndexReader reader,
TaxonomyReader taxonomy) throws IOException {
public Aggregator createAggregator(boolean useComplements, FacetArrays arrays, TaxonomyReader taxonomy)
throws IOException {
assert !useComplements : "complements are not supported by this FacetRequest";
return new AssociationFloatSumAggregator(reader, arrays.getFloatArray());
return new AssociationFloatSumAggregator(arrays.getFloatArray());
}
@Override

View File

@ -2,8 +2,6 @@ package org.apache.lucene.facet.search.params.associations;
import java.io.IOException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.facet.search.FacetArrays;
import org.apache.lucene.facet.search.aggregator.Aggregator;
import org.apache.lucene.facet.search.aggregator.associations.AssociationIntSumAggregator;
@ -45,10 +43,10 @@ public class AssociationIntSumFacetRequest extends FacetRequest {
}
@Override
public Aggregator createAggregator(boolean useComplements, FacetArrays arrays, IndexReader reader,
TaxonomyReader taxonomy) throws IOException {
public Aggregator createAggregator(boolean useComplements, FacetArrays arrays, TaxonomyReader taxonomy)
throws IOException {
assert !useComplements : "complements are not supported by this FacetRequest";
return new AssociationIntSumAggregator(reader, arrays.getIntArray());
return new AssociationIntSumAggregator(arrays.getIntArray());
}
@Override

View File

@ -60,6 +60,7 @@ public abstract class Sampler {
/**
* Construct with certain {@link SamplingParams}
*
* @param params sampling params in effect
* @throws IllegalArgumentException if the provided SamplingParams are not valid
*/
@ -110,16 +111,15 @@ public abstract class Sampler {
* @param sampleSetSize required size of sample set
* @return sample of the input set in the required size
*/
protected abstract SampleResult createSample(ScoredDocIDs docids, int actualSize,
int sampleSetSize) throws IOException;
protected abstract SampleResult createSample(ScoredDocIDs docids, int actualSize, int sampleSetSize)
throws IOException;
/**
* Get a fixer of sample facet accumulation results. Default implementation
* returns a <code>TakmiSampleFixer</code> which is adequate only for
* counting. For any other accumulator, provide a different fixer.
*/
public SampleFixer getSampleFixer(
IndexReader indexReader, TaxonomyReader taxonomyReader,
public SampleFixer getSampleFixer(IndexReader indexReader, TaxonomyReader taxonomyReader,
FacetSearchParams searchParams) {
return new TakmiSampleFixer(indexReader, taxonomyReader, searchParams);
}
@ -161,10 +161,10 @@ public abstract class Sampler {
OverSampledFacetRequest sampledFreq = null;
try {
sampledFreq = (OverSampledFacetRequest)facetResult.getFacetRequest();
sampledFreq = (OverSampledFacetRequest) facetResult.getFacetRequest();
} catch (ClassCastException e) {
throw new IllegalArgumentException(
"It is only valid to call this method with result obtained for a" +
"It is only valid to call this method with result obtained for a " +
"facet request created through sampler.overSamlpingSearchParams()",
e);
}
@ -215,19 +215,15 @@ public abstract class Sampler {
}
@Override
public CategoryListIterator createCategoryListIterator(IndexReader reader,
TaxonomyReader taxo, FacetSearchParams sParams, int partition)
throws IOException {
return orig.createCategoryListIterator(reader, taxo, sParams, partition);
public CategoryListIterator createCategoryListIterator(TaxonomyReader taxo, FacetSearchParams sParams,
int partition) throws IOException {
return orig.createCategoryListIterator(taxo, sParams, partition);
}
@Override
public Aggregator createAggregator(boolean useComplements,
FacetArrays arrays, IndexReader indexReader,
TaxonomyReader taxonomy) throws IOException {
return orig.createAggregator(useComplements, arrays, indexReader,
taxonomy);
public Aggregator createAggregator(boolean useComplements, FacetArrays arrays, TaxonomyReader taxonomy)
throws IOException {
return orig.createAggregator(useComplements, arrays, taxonomy);
}
@Override
@ -245,4 +241,5 @@ public abstract class Sampler {
return orig.supportsComplements();
}
}
}

View File

@ -91,8 +91,7 @@ class TakmiSampleFixer implements SampleFixer {
* full set of matching documents.
* @throws IOException If there is a low-level I/O error.
*/
private void recount(FacetResultNode fresNode, ScoredDocIDs docIds)
throws IOException {
private void recount(FacetResultNode fresNode, ScoredDocIDs docIds) throws IOException {
// TODO (Facet): change from void to return the new, smaller docSet, and use
// that for the children, as this will make their intersection ops faster.
// can do this only when the new set is "sufficiently" smaller.
@ -109,8 +108,7 @@ class TakmiSampleFixer implements SampleFixer {
Bits liveDocs = MultiFields.getLiveDocs(indexReader);
int updatedCount = countIntersection(MultiFields.getTermDocsEnum(indexReader, liveDocs,
drillDownTerm.field(), drillDownTerm.bytes(),
0),
docIds.iterator());
0), docIds.iterator());
fresNode.setValue(updatedCount);
}

View File

@ -5,6 +5,7 @@ import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.facet.search.CategoryListIterator;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.util.IntsRef;
/*
@ -42,9 +43,10 @@ public class MultiCategoryListIterator implements CategoryListIterator {
}
@Override
public boolean init() throws IOException {
public boolean setNextReader(AtomicReaderContext context) throws IOException {
validIterators.clear();
for (CategoryListIterator cli : iterators) {
if (cli.init()) {
if (cli.setNextReader(context)) {
validIterators.add(cli);
}
}

View File

@ -3,17 +3,18 @@ package org.apache.lucene.facet.util;
import java.io.IOException;
import java.util.Arrays;
import org.apache.lucene.facet.search.ScoredDocIDs;
import org.apache.lucene.facet.search.ScoredDocIDsIterator;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.OpenBitSet;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.OpenBitSetDISI;
import org.apache.lucene.facet.search.ScoredDocIDs;
import org.apache.lucene.facet.search.ScoredDocIDsIterator;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@ -49,48 +50,57 @@ public class ScoredDocIdsUtils {
* @param reader holding the number of documents & information about deletions.
*/
public final static ScoredDocIDs getComplementSet(final ScoredDocIDs docids, final IndexReader reader)
throws IOException {
throws IOException {
final int maxDoc = reader.maxDoc();
DocIdSet docIdSet = docids.getDocIDs();
final OpenBitSet complement;
if (docIdSet instanceof OpenBitSet) {
final FixedBitSet complement;
if (docIdSet instanceof FixedBitSet) {
// That is the most common case, if ScoredDocIdsCollector was used.
complement = ((OpenBitSet) docIdSet).clone();
complement = ((FixedBitSet) docIdSet).clone();
} else {
complement = new OpenBitSetDISI(docIdSet.iterator(), maxDoc);
complement = new FixedBitSet(maxDoc);
DocIdSetIterator iter = docIdSet.iterator();
int doc;
while ((doc = iter.nextDoc()) < maxDoc) {
complement.set(doc);
}
}
complement.flip(0, maxDoc);
// Remove all Deletions from the complement set
clearDeleted(reader, complement);
return createScoredDocIds(complement, maxDoc);
}
/**
* Clear all deleted documents from a given open-bit-set according to a given reader
*/
private static void clearDeleted(final IndexReader reader,
final OpenBitSet set) throws IOException {
/** Clear all deleted documents from a given open-bit-set according to a given reader */
private static void clearDeleted(final IndexReader reader, final FixedBitSet set) throws IOException {
// If there are no deleted docs
if (!reader.hasDeletions()) {
return; // return immediately
}
Bits bits = MultiFields.getLiveDocs(reader);
DocIdSetIterator it = set.iterator();
int doc = DocIdSetIterator.NO_MORE_DOCS;
while ((doc = it.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
if (!bits.get(doc)) {
set.fastClear(doc);
int doc = it.nextDoc();
for (AtomicReaderContext context : reader.leaves()) {
AtomicReader r = context.reader();
final int maxDoc = r.maxDoc() + context.docBase;
if (doc >= maxDoc) { // skip this segment
continue;
}
if (!r.hasDeletions()) { // skip all docs that belong to this reader as it has no deletions
while ((doc = it.nextDoc()) < maxDoc) {}
continue;
}
Bits liveDocs = r.getLiveDocs();
do {
if (!liveDocs.get(doc - context.docBase)) {
set.clear(doc);
}
} while ((doc = it.nextDoc()) < maxDoc);
}
}
/**
* Create a subset of an existing ScoredDocIDs object.
*
@ -274,8 +284,7 @@ public class ScoredDocIdsUtils {
if (target <= next) {
target = next + 1;
}
return next = target >= maxDoc ? NO_MORE_DOCS
: target;
return next = target >= maxDoc ? NO_MORE_DOCS : target;
}
@Override
@ -420,4 +429,5 @@ public class ScoredDocIdsUtils {
}
}
}
}

View File

@ -317,8 +317,7 @@ public abstract class FacetTestBase extends LuceneTestCase {
}
/** Validate results equality */
protected static void assertSameResults(List<FacetResult> expected,
List<FacetResult> actual) {
protected static void assertSameResults(List<FacetResult> expected, List<FacetResult> actual) {
String expectedResults = resStringValueOnly(expected);
String actualResults = resStringValueOnly(actual);
if (!expectedResults.equals(actualResults)) {

View File

@ -29,12 +29,11 @@ import org.apache.lucene.facet.taxonomy.TaxonomyReader;
public class AdaptiveAccumulatorTest extends BaseSampleTestTopK {
@Override
protected FacetsAccumulator getSamplingAccumulator(Sampler sampler,
TaxonomyReader taxoReader, IndexReader indexReader,
FacetSearchParams searchParams) {
AdaptiveFacetsAccumulator res = new AdaptiveFacetsAccumulator(searchParams,
indexReader, taxoReader);
protected FacetsAccumulator getSamplingAccumulator(Sampler sampler, TaxonomyReader taxoReader,
IndexReader indexReader, FacetSearchParams searchParams) {
AdaptiveFacetsAccumulator res = new AdaptiveFacetsAccumulator(searchParams, indexReader, taxoReader);
res.setSampler(sampler);
return res;
}
}

View File

@ -14,6 +14,7 @@ import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
@ -106,30 +107,31 @@ public class CategoryListIteratorTest extends LuceneTestCase {
IndexReader reader = writer.getReader();
writer.close();
IntsRef ordinals = new IntsRef();
CategoryListIterator cli = new PayloadCategoryListIteraor(reader, new Term("f","1"), encoder.createMatchingDecoder());
cli.init();
int totalCategories = 0;
for (int i = 0; i < data.length; i++) {
Set<Integer> values = new HashSet<Integer>();
for (int j = 0; j < data[i].length; j++) {
values.add(data[i].ints[j]);
IntsRef ordinals = new IntsRef();
CategoryListIterator cli = new PayloadCategoryListIteraor(new Term("f","1"), encoder.createMatchingDecoder());
for (AtomicReaderContext context : reader.leaves()) {
cli.setNextReader(context);
int maxDoc = context.reader().maxDoc();
int dataIdx = context.docBase;
for (int doc = 0; doc < maxDoc; doc++, dataIdx++) {
Set<Integer> values = new HashSet<Integer>();
for (int j = 0; j < data[dataIdx].length; j++) {
values.add(data[dataIdx].ints[j]);
}
cli.getOrdinals(doc, ordinals);
assertTrue("no ordinals for document " + doc, ordinals.length > 0);
for (int j = 0; j < ordinals.length; j++) {
assertTrue("expected category not found: " + ordinals.ints[j], values.contains(ordinals.ints[j]));
}
totalCategories += ordinals.length;
}
cli.getOrdinals(i, ordinals);
assertTrue("no ordinals for document " + i, ordinals.length > 0);
for (int j = 0; j < ordinals.length; j++) {
assertTrue("expected category not found: " + ordinals.ints[j], values.contains(ordinals.ints[j]));
}
totalCategories += ordinals.length;
}
assertEquals("Missing categories!",10,totalCategories);
assertEquals("Missing categories!", 10, totalCategories);
reader.close();
dir.close();
}
/**
* Test that a document with no payloads does not confuse the payload decoder.
*/
@Test
public void testPayloadIteratorWithInvalidDoc() throws Exception {
Directory dir = newDirectory();
@ -160,24 +162,28 @@ public class CategoryListIteratorTest extends LuceneTestCase {
IndexReader reader = writer.getReader();
writer.close();
IntsRef ordinals = new IntsRef();
CategoryListIterator cli = new PayloadCategoryListIteraor(reader, new Term("f","1"), encoder.createMatchingDecoder());
assertTrue("Failed to initialize payload iterator", cli.init());
int totalCategories = 0;
for (int i = 0; i < data.length; i++) {
Set<Integer> values = new HashSet<Integer>();
for (int j = 0; j < data[i].length; j++) {
values.add(data[i].ints[j]);
}
cli.getOrdinals(i, ordinals);
if (i == 0) {
assertTrue("document 0 must have a payload", ordinals.length > 0);
for (int j = 0; j < ordinals.length; j++) {
assertTrue("expected category not found: " + ordinals.ints[j], values.contains(ordinals.ints[j]));
IntsRef ordinals = new IntsRef();
CategoryListIterator cli = new PayloadCategoryListIteraor(new Term("f","1"), encoder.createMatchingDecoder());
for (AtomicReaderContext context : reader.leaves()) {
cli.setNextReader(context);
int maxDoc = context.reader().maxDoc();
int dataIdx = context.docBase;
for (int doc = 0; doc < maxDoc; doc++, dataIdx++) {
Set<Integer> values = new HashSet<Integer>();
for (int j = 0; j < data[dataIdx].length; j++) {
values.add(data[dataIdx].ints[j]);
}
cli.getOrdinals(doc, ordinals);
if (dataIdx == 0) {
assertTrue("document 0 must have a payload", ordinals.length > 0);
for (int j = 0; j < ordinals.length; j++) {
assertTrue("expected category not found: " + ordinals.ints[j], values.contains(ordinals.ints[j]));
}
totalCategories += ordinals.length;
} else {
assertTrue("only document 0 should have a payload", ordinals.length == 0);
}
totalCategories += ordinals.length;
} else {
assertTrue("only document 0 should have a payload", ordinals.length == 0);
}
}
assertEquals("Wrong number of total categories!", 2, totalCategories);

View File

@ -22,6 +22,7 @@ import org.apache.lucene.facet.search.params.FacetRequest;
import org.apache.lucene.facet.search.params.FacetSearchParams;
import org.apache.lucene.facet.search.results.FacetResult;
import org.apache.lucene.facet.taxonomy.CategoryPath;
import org.apache.lucene.index.AtomicReaderContext;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
@ -132,8 +133,8 @@ public class TestCategoryListCache extends FacetTestBase {
}
}
@Override
public boolean init() throws IOException {
return it.init();
public boolean setNextReader(AtomicReaderContext context) throws IOException {
return it.setNextReader(context);
}
};
}

View File

@ -0,0 +1,128 @@
package org.apache.lucene.facet.search;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.StringField;
import org.apache.lucene.facet.index.FacetFields;
import org.apache.lucene.facet.index.params.CategoryListParams;
import org.apache.lucene.facet.index.params.FacetIndexingParams;
import org.apache.lucene.facet.search.params.CountFacetRequest;
import org.apache.lucene.facet.search.params.FacetRequest;
import org.apache.lucene.facet.search.params.FacetSearchParams;
import org.apache.lucene.facet.search.results.FacetResult;
import org.apache.lucene.facet.search.results.FacetResultNode;
import org.apache.lucene.facet.taxonomy.CategoryPath;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
import org.apache.lucene.facet.util.AssertingCategoryListIterator;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.NoMergePolicy;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LuceneTestCase;
import org.junit.Test;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
public class TestStandardFacetsAccumulator extends LuceneTestCase {
private void indexTwoDocs(IndexWriter indexWriter, FacetFields facetFields, boolean withContent) throws Exception {
for (int i = 0; i < 2; i++) {
Document doc = new Document();
if (withContent) {
doc.add(new StringField("f", "a", Store.NO));
}
if (facetFields != null) {
facetFields.addFields(doc, Collections.singletonList(new CategoryPath("A", Integer.toString(i))));
}
indexWriter.addDocument(doc);
}
indexWriter.commit();
}
@Test
public void testSegmentsWithoutCategoriesOrResults() throws Exception {
// tests the accumulator when there are segments with no results
Directory indexDir = newDirectory();
Directory taxoDir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
iwc.setMergePolicy(NoMergePolicy.COMPOUND_FILES); // prevent merges
IndexWriter indexWriter = new IndexWriter(indexDir, iwc);
FacetIndexingParams fip = new FacetIndexingParams(new CategoryListParams() {
@Override
public CategoryListIterator createCategoryListIterator(int partition) throws IOException {
return new AssertingCategoryListIterator(super.createCategoryListIterator(partition));
}
});
TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
FacetFields facetFields = new FacetFields(taxoWriter, fip);
indexTwoDocs(indexWriter, facetFields, false); // 1st segment, no content, with categories
indexTwoDocs(indexWriter, null, true); // 2nd segment, with content, no categories
indexTwoDocs(indexWriter, facetFields, true); // 3rd segment ok
indexTwoDocs(indexWriter, null, false); // 4th segment, no content, or categories
indexTwoDocs(indexWriter, null, true); // 5th segment, with content, no categories
indexTwoDocs(indexWriter, facetFields, true); // 6th segment, with content, with categories
IOUtils.close(indexWriter, taxoWriter);
DirectoryReader indexReader = DirectoryReader.open(indexDir);
TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);
IndexSearcher indexSearcher = new IndexSearcher(indexReader);
// search for "f:a", only segments 1 and 3 should match results
Query q = new TermQuery(new Term("f", "a"));
ArrayList<FacetRequest> requests = new ArrayList<FacetRequest>(1);
CountFacetRequest countNoComplements = new CountFacetRequest(new CategoryPath("A"), 10) {
@Override
public boolean supportsComplements() {
return false; // disable complements
}
};
requests.add(countNoComplements);
FacetSearchParams fsp = new FacetSearchParams(requests, fip);
FacetsCollector fc = new FacetsCollector(fsp , indexReader, taxoReader);
indexSearcher.search(q, fc);
List<FacetResult> results = fc.getFacetResults();
assertEquals("received too many facet results", 1, results.size());
FacetResultNode frn = results.get(0).getFacetResultNode();
assertEquals("wrong weight for \"A\"", 4, (int) frn.getValue());
assertEquals("wrong number of children", 2, frn.getNumSubResults());
for (FacetResultNode node : frn.getSubResults()) {
assertEquals("wrong weight for child " + node.getLabel(), 2, (int) node.getValue());
}
IOUtils.close(indexReader, taxoReader);
IOUtils.close(indexDir, taxoDir);
}
}

View File

@ -17,6 +17,7 @@ import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
import org.apache.lucene.facet.util.MultiCategoryListIterator;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
@ -100,21 +101,24 @@ public class MultiCategoryListIteratorTest extends LuceneTestCase {
clCache.loadAndRegister(clp, indexReader, taxoReader, indexingParams);
iterators[i] = clCache.get(clp).iterator(0); // no partitions
} else {
iterators[i] = new PayloadCategoryListIteraor(indexReader, clp.getTerm(), decoder);
iterators[i] = new PayloadCategoryListIteraor(clp.getTerm(), decoder);
}
}
MultiCategoryListIterator cli = new MultiCategoryListIterator(iterators);
assertTrue("failed to init multi-iterator", cli.init());
IntsRef ordinals = new IntsRef();
int maxDoc = indexReader.maxDoc();
for (int i = 0; i < maxDoc; i++) {
cli.getOrdinals(i, ordinals);
assertTrue("document " + i + " does not have categories", ordinals.length > 0);
for (int j = 0; j < ordinals.length; j++) {
CategoryPath cp = taxoReader.getPath(ordinals.ints[j]);
assertNotNull("ordinal " + ordinals.ints[j] + " not found in taxonomy", cp);
if (cp.length == 2) {
assertEquals("invalid category for document " + i, i, Integer.parseInt(cp.components[1]));
for (AtomicReaderContext context : indexReader.leaves()) {
assertTrue("failed to init multi-iterator", cli.setNextReader(context));
IntsRef ordinals = new IntsRef();
final int maxDoc = context.reader().maxDoc();
for (int i = 0; i < maxDoc; i++) {
cli.getOrdinals(i, ordinals);
assertTrue("document " + i + " does not have categories", ordinals.length > 0);
for (int j = 0; j < ordinals.length; j++) {
CategoryPath cp = taxoReader.getPath(ordinals.ints[j]);
assertNotNull("ordinal " + ordinals.ints[j] + " not found in taxonomy", cp);
if (cp.length == 2) {
int globalDoc = i + context.docBase;
assertEquals("invalid category for document " + globalDoc, globalDoc, Integer.parseInt(cp.components[1]));
}
}
}
}

View File

@ -59,9 +59,8 @@ public abstract class BaseSampleTestTopK extends BaseTestTopK {
return res;
}
protected abstract FacetsAccumulator getSamplingAccumulator(Sampler sampler,
TaxonomyReader taxoReader, IndexReader indexReader,
FacetSearchParams searchParams);
protected abstract FacetsAccumulator getSamplingAccumulator(Sampler sampler, TaxonomyReader taxoReader,
IndexReader indexReader, FacetSearchParams searchParams);
/**
* Try out faceted search with sampling enabled and complements either disabled or enforced
@ -89,7 +88,7 @@ public abstract class BaseSampleTestTopK extends BaseTestTopK {
// try several times in case of failure, because the test has a chance to fail
// if the top K facets are not sufficiently common with the sample set
for (int nTrial=0; nTrial<RETRIES; nTrial++) {
for (int nTrial = 0; nTrial < RETRIES; nTrial++) {
try {
// complement with sampling!
final Sampler sampler = createSampler(nTrial, docCollector.getScoredDocIDs(), useRandomSampler);
@ -99,7 +98,7 @@ public abstract class BaseSampleTestTopK extends BaseTestTopK {
break; // succeeded
} catch (NotSameResultError e) {
if (nTrial>=RETRIES-1) {
if (nTrial >= RETRIES - 1) {
throw e; // no more retries allowed, must fail
}
}
@ -119,14 +118,11 @@ public abstract class BaseSampleTestTopK extends BaseTestTopK {
assertSameResults(expected, sampledResults);
}
private FacetsCollector samplingCollector(
final boolean complement,
final Sampler sampler,
private FacetsCollector samplingCollector(final boolean complement, final Sampler sampler,
FacetSearchParams samplingSearchParams) {
FacetsCollector samplingFC = new FacetsCollector(samplingSearchParams, indexReader, taxoReader) {
@Override
protected FacetsAccumulator initFacetsAccumulator(
FacetSearchParams facetSearchParams, IndexReader indexReader,
protected FacetsAccumulator initFacetsAccumulator(FacetSearchParams facetSearchParams, IndexReader indexReader,
TaxonomyReader taxonomyReader) {
FacetsAccumulator acc = getSamplingAccumulator(sampler, taxonomyReader, indexReader, facetSearchParams);
acc.setComplementThreshold(complement ? FacetsAccumulator.FORCE_COMPLEMENT : FacetsAccumulator.DISABLE_COMPLEMENT);
@ -144,12 +140,13 @@ public abstract class BaseSampleTestTopK extends BaseTestTopK {
samplingParams.setMinSampleSize((int) (100 * retryFactor));
samplingParams.setMaxSampleSize((int) (10000 * retryFactor));
samplingParams.setOversampleFactor(5.0 * retryFactor);
samplingParams.setSamplingThreshold(11000); //force sampling
samplingParams.setSamplingThreshold(11000); //force sampling
Sampler sampler = useRandomSampler ?
new RandomSampler(samplingParams, new Random(random().nextLong())) :
new RepeatableSampler(samplingParams);
assertTrue("must enable sampling for this test!",sampler.shouldSample(scoredDocIDs));
return sampler;
}
}

View File

@ -0,0 +1,65 @@
package org.apache.lucene.facet.util;
import java.io.IOException;
import org.apache.lucene.facet.search.CategoryListIterator;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.util.IntsRef;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* A {@link CategoryListIterator} which asserts that
* {@link #getOrdinals(int, IntsRef)} is not called before
* {@link #setNextReader(AtomicReaderContext)} and that if
* {@link #setNextReader(AtomicReaderContext)} returns false,
* {@link #getOrdinals(int, IntsRef)} isn't called.
*/
public class AssertingCategoryListIterator implements CategoryListIterator {
private final CategoryListIterator delegate;
private boolean setNextReaderCalled = false;
private boolean validSegment = false;
private int maxDoc;
public AssertingCategoryListIterator(CategoryListIterator delegate) {
this.delegate = delegate;
}
@Override
public boolean setNextReader(AtomicReaderContext context) throws IOException {
setNextReaderCalled = true;
maxDoc = context.reader().maxDoc();
return validSegment = delegate.setNextReader(context);
}
@Override
public void getOrdinals(int docID, IntsRef ints) throws IOException {
if (!setNextReaderCalled) {
throw new RuntimeException("should not call getOrdinals without setNextReader first");
}
if (!validSegment) {
throw new RuntimeException("should not call getOrdinals if setNextReader returned false");
}
if (docID >= maxDoc) {
throw new RuntimeException("docID is larger than current maxDoc; forgot to call setNextReader?");
}
delegate.getOrdinals(docID, ints);
}
}

View File

@ -9,6 +9,9 @@ import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.StringField;
import org.apache.lucene.facet.search.ScoredDocIDs;
import org.apache.lucene.facet.search.ScoredDocIDsIterator;
import org.apache.lucene.facet.search.ScoredDocIdCollector;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MultiFields;
@ -21,14 +24,9 @@ import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.OpenBitSet;
import org.apache.lucene.util.OpenBitSetDISI;
import org.junit.Test;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.facet.search.ScoredDocIDs;
import org.apache.lucene.facet.search.ScoredDocIDsIterator;
import org.apache.lucene.facet.search.ScoredDocIdCollector;
import org.junit.Test;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
@ -52,21 +50,21 @@ public class TestScoredDocIDsUtils extends LuceneTestCase {
@Test
public void testComplementIterator() throws Exception {
final int n = atLeast(10000);
final OpenBitSet bits = new OpenBitSet(n);
for (int i = 0; i < 5 * n; i++) {
bits.flip(random().nextInt(n));
final FixedBitSet bits = new FixedBitSet(n);
Random random = random();
for (int i = 0; i < n; i++) {
int idx = random.nextInt(n);
bits.flip(idx, idx + 1);
}
OpenBitSet verify = new OpenBitSet(n);
verify.or(bits);
FixedBitSet verify = new FixedBitSet(bits);
ScoredDocIDs scoredDocIDs = ScoredDocIdsUtils.createScoredDocIds(bits, n);
Directory dir = newDirectory();
IndexReader reader = createReaderWithNDocs(random(), n, dir);
IndexReader reader = createReaderWithNDocs(random, n, dir);
try {
assertEquals(n - verify.cardinality(), ScoredDocIdsUtils.getComplementSet(scoredDocIDs,
reader).size());
assertEquals(n - verify.cardinality(), ScoredDocIdsUtils.getComplementSet(scoredDocIDs, reader).size());
} finally {
reader.close();
dir.close();
@ -147,7 +145,7 @@ public class TestScoredDocIDsUtils extends LuceneTestCase {
searcher.search(q, collector);
ScoredDocIDs scoredDocIds = collector.getScoredDocIDs();
OpenBitSet resultSet = new OpenBitSetDISI(scoredDocIds.getDocIDs().iterator(), reader.maxDoc());
FixedBitSet resultSet = (FixedBitSet) scoredDocIds.getDocIDs();
// Getting the complement set of the query result
ScoredDocIDs complementSet = ScoredDocIdsUtils.getComplementSet(scoredDocIds, reader);
@ -164,12 +162,11 @@ public class TestScoredDocIDsUtils extends LuceneTestCase {
assertFalse(
"Complement-Set must not contain deleted documents (doc="+docNum+")",
live != null && !live.get(docNum));
assertNull(
"Complement-Set must not contain docs from the original set (doc="+ docNum+")",
assertNull("Complement-Set must not contain docs from the original set (doc="+ docNum+")",
reader.document(docNum).getField("del"));
assertFalse(
"Complement-Set must not contain docs from the original set (doc="+docNum+")",
resultSet.fastGet(docNum));
resultSet.get(docNum));
}
} finally {
reader.close();

View File

@ -24,6 +24,7 @@ import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefIterator;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.fst.*;
import org.apache.lucene.util.packed.PackedInts;
/**
* Finite state automata based implementation of "autocomplete" functionality.
@ -237,7 +238,8 @@ public class FSTCompletionBuilder {
final Object empty = outputs.getNoOutput();
final Builder<Object> builder = new Builder<Object>(
FST.INPUT_TYPE.BYTE1, 0, 0, true, true,
shareMaxTailLength, outputs, null, false, true);
shareMaxTailLength, outputs, null, false,
PackedInts.DEFAULT, true, 15);
BytesRef scratch = new BytesRef();
BytesRef entry;

View File

@ -40,6 +40,7 @@ import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.UnicodeUtil;
import org.apache.lucene.util._TestUtil;
import org.apache.lucene.util.packed.PackedInts;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
@ -288,7 +289,16 @@ public class FSTTester<T> {
outputs,
null,
willRewrite,
true);
PackedInts.DEFAULT,
true,
15);
if (LuceneTestCase.VERBOSE) {
if (willRewrite) {
System.out.println("TEST: packed FST");
} else {
System.out.println("TEST: non-packed FST");
}
}
for(InputOutput<T> pair : pairs) {
if (pair.output instanceof List) {

View File

@ -41,8 +41,6 @@ Detailed Change List
Other Changes
----------------------
* SOLR-3735: Relocate the example mime-to-extension mapping, and
upgrade Velocity Engine to 1.7 (ehatcher)
================== 4.1.0 ==================
@ -50,14 +48,14 @@ Versions of Major Components
---------------------
Apache Tika 1.2
Carrot2 3.6.2
Velocity 1.6.4 and Velocity Tools 2.0
Velocity 1.7 and Velocity Tools 2.0
Apache UIMA 2.3.1
Apache ZooKeeper 3.4.5
Upgrading from Solr 4.0.0
----------------------
Custom java parsing plugins need to migrade from throwing the internal
Custom java parsing plugins need to migrate from throwing the internal
ParseException to throwing SyntaxError.
BaseDistributedSearchTestCase now randomizes the servlet context it uses when
@ -150,7 +148,7 @@ New Features
CoreAdmin API the same way as the data directory. (Mark Miller)
* SOLR-4028: When using ZK chroot, it would be nice if Solr would create the
initial path when it doesn't exist. (Tomas Fernandez Lobbe via Mark Miller)
initial path when it doesn't exist. (Tomás Fernández Löbbe via Mark Miller)
* SOLR-3948: Calculate/display deleted documents in admin interface.
(Shawn Heisey via Mark Miller)
@ -209,6 +207,9 @@ New Features
* SOLR-2201: DIH's "formatDate" function now supports a timezone as an optional
fourth parameter (James Dyer, Mark Waddle)
* SOLR-4302: New parameter 'indexInfo' (defaults to true) in CoreAdmin STATUS
command can be used to omit index specific information (Shahar Davidson via shalin)
Optimizations
----------------------
@ -226,12 +227,12 @@ Optimizations
dynamicField's (steffkes)
* SOLR-3941: The "commitOnLeader" part of distributed recovery can use
openSearcher=false. (Tomas Fernandez Lobbe via Mark Miller)
openSearcher=false. (Tomás Fernández Löbbe via Mark Miller)
* SOLR-4063: Allow CoreContainer to load multiple SolrCores in parallel rather
than just serially. (Mark Miller)
* SOLR-4199: When doing zk retries due to connectionloss, rather than just
* SOLR-4199: When doing zk retries due to connection loss, rather than just
retrying for 2 minutes, retry in proportion to the session timeout.
(Mark Miller)
@ -250,6 +251,10 @@ Optimizations
* SOLR-3982: Admin UI: Various Dataimport Improvements (steffkes)
* SOLR-4296: Admin UI: Improve Dataimport Auto-Refresh (steffkes)
* SOLR-3458: Allow multiple Items to stay open on Plugins-Page (steffkes)
Bug Fixes
----------------------
@ -362,7 +367,7 @@ Bug Fixes
* SOLR-4081: QueryParsing.toString, used during debugQuery=true, did not
correctly handle ExtendedQueries such as WrappedQuery
(used when cache=false), spatial queries, and frange queires.
(used when cache=false), spatial queries, and frange queries.
(Eirik Lygre, yonik)
* SOLR-3959: Ensure the internal comma separator of poly fields is escaped
@ -403,7 +408,7 @@ Bug Fixes
* SOLR-4162: ZkCli usage examples are not correct because the zkhost parameter
is not present and it is mandatory for all commands.
(Tomas Fernandez Lobbe via Mark Miller)
(Tomás Fernández Löbbe via Mark Miller)
* SOLR-4071: Validate that name is pass to Collections API create, and behave the
same way as on startup when collection.configName is not explicitly passed.
@ -495,7 +500,7 @@ Bug Fixes
* SOLR-4279: Wrong exception message if _version_ field is multivalued (shalin)
* SOLR-4170: The 'backup' ReplicationHandler command can sometimes use a stale
index directory rather than the current one. (Mark Miller, Marcin Rzewuck)
index directory rather than the current one. (Mark Miller, Marcin Rzewucki)
* SOLR-3876: Solr Admin UI is completely dysfunctional on IE 9 (steffkes)
@ -503,6 +508,17 @@ Bug Fixes
import works fine with SolrCloud clusters (Deniz Durmus, James Dyer,
Erick Erickson, shalin)
* SOLR-4291: Harden the Overseer work queue thread loop. (Mark Miller)
* SOLR-3820: Solr Admin Query form is missing some edismax request parameters
(steffkes)
* SOLR-4217: post.jar no longer ignores -Dparams when -Durl is used.
(Alexandre Rafalovitch, ehatcher)
* SOLR-4303: On replication, if the generation of the master is lower than the
slave we need to force a full copy of the index. (Mark Miller, Gregg Donovan)
Other Changes
----------------------
@ -580,6 +596,16 @@ Other Changes
* SOLR-4208: ExtendedDismaxQParserPlugin has been refactored to make
subclassing easier. (Tomás Fernández Löbbe, hossman)
* SOLR-3735: Relocate the example mime-to-extension mapping, and
upgrade Velocity Engine to 1.7 (ehatcher)
* SOLR-4287: Removed "apache-" prefix from Solr distribution and artifact
filenames. (Ryan Ernst, Robert Muir, Steve Rowe)
* SOLR-4016: Deduplication does not work with atomic/partial updates so
disallow atomic update requests which change signature generating fields.
(Joel Nothman, yonik, shalin)
================== 4.0.0 ==================
Versions of Major Components
@ -862,7 +888,7 @@ Bug Fixes
* SOLR-3527: SolrCmdDistributor drops some of the important commit attributes
(maxOptimizeSegments, softCommit, expungeDeletes) when sending a commit to
replicas. (Andy Laird, Tomas Fernandez Lobbe, Mark Miller)
replicas. (Andy Laird, Tomás Fernández Löbbe, Mark Miller)
* SOLR-3844: SolrCore reload can fail because it tries to remove the index
write lock while already holding it. (Mark Miller)
@ -1273,7 +1299,7 @@ New Features
* SOLR-571: The autowarmCount for LRUCaches (LRUCache and FastLRUCache) now
supports "percentages" which get evaluated relative the current size of
the cache when warming happens.
(Tomas Fernandez Lobbe and hossman)
(Tomás Fernández Löbbe and hossman)
* SOLR-1932: New relevancy function queries: termfreq, tf, docfreq, idf
norm, maxdoc, numdocs. (yonik)
@ -1644,12 +1670,12 @@ Bug Fixes
down to it via acceptDocs since LUCENE-1536. (Mike Hugo, yonik)
* SOLR-3214: If you use multiple fl entries rather than a comma separated list, all but the first
entry can be ignored if you are using distributed search. (Tomas Fernandez Lobbe via Mark Miller)
entry can be ignored if you are using distributed search. (Tomás Fernández Löbbe via Mark Miller)
* SOLR-3352: eDismax: pf2 should kick in for a query with 2 terms (janhoy)
* SOLR-3361: ReplicationHandler "maxNumberOfBackups" doesn't work if backups are triggered on commit
(James Dyer, Tomas Fernandez Lobbe)
(James Dyer, Tomás Fernández Löbbe)
* SOLR-2605: fixed tracking of the 'defaultCoreName' in CoreContainer so that
CoreAdminHandler could return consistent information regardless of wether
@ -1864,7 +1890,17 @@ Documentation
* SOLR-2232: Improved README info on solr.solr.home in examples
(Eric Pugh and hossman)
================== 3.6.2 ==================
Bug Fixes
----------------------
* SOLR-3790: ConcurrentModificationException could be thrown when using hl.fl=*.
(yonik, koji)
* SOLR-3589: Edismax parser does not honor mm parameter if analyzer splits a token.
(Tom Burton-West, Robert Muir)
================== 3.6.1 ==================
More information about this release, including any errata related to the
release notes, upgrade instructions, or other changes may be found online at:
@ -1877,7 +1913,7 @@ Bug Fixes
(Uwe Schindler, Mike McCandless, Robert Muir)
* SOLR-3361: ReplicationHandler "maxNumberOfBackups" doesn't work if backups are triggered on commit
(James Dyer, Tomas Fernandez Lobbe)
(James Dyer, Tomás Fernández Löbbe)
* SOLR-3375: Fix charset problems with HttpSolrServer (Roger Håkansson, yonik, siren)

View File

@ -45,11 +45,11 @@ example/
Please see example/README.txt for information about running this
example.
dist/apache-solr-XX.war
dist/solr-XX.war
The Apache Solr Application. Deploy this WAR file to any servlet
container to run Apache Solr.
dist/apache-solr-<component>-XX.jar
dist/solr-<component>-XX.jar
The Apache Solr libraries. To compile Apache Solr Plugins,
one or more of these will be required. The core library is
required at a minimum. (see http://wiki.apache.org/solr/SolrPlugins

View File

@ -25,7 +25,7 @@
<property name="Name" value="Solr" />
<property name="version" value="5.0-SNAPSHOT"/>
<property name="fullname" value="apache-${ant.project.name}"/>
<property name="fullname" value="${ant.project.name}"/>
<property name="fullnamever" value="${fullname}-${version}"/>
<property name="final.name" value="${fullnamever}"/>
@ -114,7 +114,7 @@
<attribute name="property" default="@{name}.uptodate"/>
<attribute name="classpath.property" default="@{name}.jar"/>
<!-- set jarfile only, if the target jar file has no generic name -->
<attribute name="jarfile" default="${common-solr.dir}/build/contrib/solr-@{name}/apache-solr-@{name}-${version}.jar"/>
<attribute name="jarfile" default="${common-solr.dir}/build/contrib/solr-@{name}/solr-@{name}-${version}.jar"/>
<sequential>
<!--<echo message="Checking '@{jarfile}' against source folder '${common.dir}/contrib/@{name}/src/java'"/>-->
<property name="@{classpath.property}" location="@{jarfile}"/>
@ -214,13 +214,13 @@
</target>
<target name="check-solr-core-javadocs-uptodate" unless="solr-core-javadocs.uptodate">
<uptodate property="solr-core-javadocs.uptodate" targetfile="${build.dir}/solr-core/apache-solr-core-${version}-javadoc.jar">
<uptodate property="solr-core-javadocs.uptodate" targetfile="${build.dir}/solr-core/solr-core-${version}-javadoc.jar">
<srcfiles dir="${common-solr.dir}/core/src/java" includes="**/*.java"/>
</uptodate>
</target>
<target name="check-solrj-javadocs-uptodate" unless="solrj-javadocs.uptodate">
<uptodate property="solrj-javadocs.uptodate" targetfile="${build.dir}/solr-solrj/apache-solr-solrj-${version}-javadoc.jar">
<uptodate property="solrj-javadocs.uptodate" targetfile="${build.dir}/solr-solrj/solr-solrj-${version}-javadoc.jar">
<srcfiles dir="${common-solr.dir}/solrj/src/java" includes="**/*.java"/>
</uptodate>
</target>

View File

@ -19,7 +19,7 @@ To start using Solr UIMA Metadata Extraction Library you should go through the f
<lib dir="../../contrib/uima/lib" />
<lib dir="../../contrib/uima/lucene-libs" />
<lib dir="../../dist/" regex="apache-solr-uima-\d.*\.jar" />
<lib dir="../../dist/" regex="solr-uima-\d.*\.jar" />
2. modify your schema.xml adding the fields you want to be hold metadata specifying proper values for type, indexed, stored and multiValued options:

View File

@ -44,8 +44,8 @@
in that directory which completely match the regex (anchored on both
ends) will be included.
-->
<lib dir="../../dist/" regex="apache-solr-cell-\d.*\.jar" />
<lib dir="../../dist/" regex="apache-solr-clustering-\d.*\.jar" />
<lib dir="../../dist/" regex="solr-cell-\d.*\.jar" />
<lib dir="../../dist/" regex="solr-clustering-\d.*\.jar" />
<!--
If a dir option (with or without a regex) is used and nothing is
found that matches, it will be ignored

View File

@ -44,8 +44,8 @@
in that directory which completely match the regex (anchored on both
ends) will be included.
-->
<lib dir="../../dist/" regex="apache-solr-cell-\d.*\.jar" />
<lib dir="../../dist/" regex="apache-solr-clustering-\d.*\.jar" />
<lib dir="../../dist/" regex="solr-cell-\d.*\.jar" />
<lib dir="../../dist/" regex="solr-clustering-\d.*\.jar" />
<!--
If a dir option (with or without a regex) is used and nothing is
found that matches, it will be ignored

View File

@ -24,7 +24,7 @@
<luceneMatchVersion>${tests.luceneMatchVersion:LUCENE_CURRENT}</luceneMatchVersion>
<lib dir="../../contrib/velocity/lib" />
<lib dir="../../dist/" regex="apache-solr-velocity-\d.*\.jar" />
<lib dir="../../dist/" regex="solr-velocity-\d.*\.jar" />
<dataDir>${solr.data.dir:}</dataDir>

View File

@ -22,14 +22,12 @@ import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.cloud.ClusterState;
import org.apache.solr.common.cloud.ClosableThread;
import org.apache.solr.common.cloud.ClusterState;
import org.apache.solr.common.cloud.DocCollection;
import org.apache.solr.common.cloud.DocRouter;
import org.apache.solr.common.cloud.DocRouter;
import org.apache.solr.common.cloud.ImplicitDocRouter;
import org.apache.solr.common.cloud.Replica;
import org.apache.solr.common.cloud.Slice;
@ -37,7 +35,6 @@ import org.apache.solr.common.cloud.SolrZkClient;
import org.apache.solr.common.cloud.ZkCoreNodeProps;
import org.apache.solr.common.cloud.ZkNodeProps;
import org.apache.solr.common.cloud.ZkStateReader;
import org.apache.solr.common.cloud.ZooKeeperException;
import org.apache.solr.handler.component.ShardHandler;
import org.apache.zookeeper.CreateMode;
import org.apache.zookeeper.KeeperException;
@ -78,46 +75,48 @@ public class Overseer {
@Override
public void run() {
if(!this.isClosed && amILeader()) {
if (!this.isClosed && amILeader()) {
// see if there's something left from the previous Overseer and re
// process all events that were not persisted into cloud state
synchronized (reader.getUpdateLock()) { //XXX this only protects against edits inside single node
try {
byte[] head = workQueue.peek();
synchronized (reader.getUpdateLock()) { // XXX this only protects
// against edits inside single
// node
try {
byte[] head = workQueue.peek();
if (head != null) {
reader.updateClusterState(true);
ClusterState clusterState = reader.getClusterState();
log.info("Replaying operations from work queue.");
if (head != null) {
reader.updateClusterState(true);
ClusterState clusterState = reader.getClusterState();
log.info("Replaying operations from work queue.");
while (head != null && amILeader()) {
final ZkNodeProps message = ZkNodeProps.load(head);
final String operation = message.getStr(QUEUE_OPERATION);
clusterState = processMessage(clusterState, message, operation);
zkClient.setData(ZkStateReader.CLUSTER_STATE,
ZkStateReader.toJSON(clusterState), true);
while (head != null && amILeader()) {
final ZkNodeProps message = ZkNodeProps.load(head);
final String operation = message
.getStr(QUEUE_OPERATION);
clusterState = processMessage(clusterState, message, operation);
zkClient.setData(ZkStateReader.CLUSTER_STATE,
ZkStateReader.toJSON(clusterState), true);
workQueue.poll();
head = workQueue.peek();
}
workQueue.poll();
head = workQueue.peek();
}
} catch (KeeperException e) {
if (e.code() == KeeperException.Code.SESSIONEXPIRED
|| e.code() == KeeperException.Code.CONNECTIONLOSS) {
log.warn("Solr cannot talk to ZK");
return;
}
SolrException.log(log, "", e);
throw new ZooKeeperException(SolrException.ErrorCode.SERVER_ERROR,
"", e);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
}
} catch (KeeperException e) {
if (e.code() == KeeperException.Code.SESSIONEXPIRED) {
log.warn("Solr cannot talk to ZK, exiting Overseer work queue loop", e);
return;
}
log.error("Exception in Overseer work queue loop", e);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
return;
} catch (Exception e) {
log.error("Exception in Overseer work queue loop", e);
}
}
}
log.info("Starting to work on the main queue");
while (!this.isClosed && amILeader()) {
@ -146,17 +145,17 @@ public class Overseer {
while (workQueue.poll() != null);
} catch (KeeperException e) {
if (e.code() == KeeperException.Code.SESSIONEXPIRED
|| e.code() == KeeperException.Code.CONNECTIONLOSS) {
log.warn("Overseer cannot talk to ZK");
if (e.code() == KeeperException.Code.SESSIONEXPIRED) {
log.warn("Solr cannot talk to ZK, exiting Overseer main queue loop", e);
return;
}
SolrException.log(log, "", e);
throw new ZooKeeperException(SolrException.ErrorCode.SERVER_ERROR,
"", e);
log.error("Exception in Overseer main queue loop", e);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
return;
} catch (Exception e) {
log.error("Exception in Overseer main queue loop", e);
}
}

View File

@ -40,7 +40,7 @@ import org.slf4j.LoggerFactory;
/**
* A {@link DirectoryFactory} impl base class for caching Directory instances
* per path. Most DirectoryFactory implementations will want to extend this
* class and simply implement {@link DirectoryFactory#create(String)}.
* class and simply implement {@link DirectoryFactory#create(String, DirContext)}.
*
*/
public abstract class CachingDirectoryFactory extends DirectoryFactory {
@ -202,7 +202,7 @@ public abstract class CachingDirectoryFactory extends DirectoryFactory {
}
@Override
protected abstract Directory create(String path) throws IOException;
protected abstract Directory create(String path, DirContext dirContext) throws IOException;
@Override
public boolean exists(String path) {
@ -218,9 +218,9 @@ public abstract class CachingDirectoryFactory extends DirectoryFactory {
* java.lang.String)
*/
@Override
public final Directory get(String path, String rawLockType)
public final Directory get(String path, DirContext dirContext, String rawLockType)
throws IOException {
return get(path, rawLockType, false);
return get(path, dirContext, rawLockType, false);
}
/*
@ -230,7 +230,7 @@ public abstract class CachingDirectoryFactory extends DirectoryFactory {
* java.lang.String, boolean)
*/
@Override
public final Directory get(String path, String rawLockType, boolean forceNew)
public final Directory get(String path, DirContext dirContext, String rawLockType, boolean forceNew)
throws IOException {
String fullPath = new File(path).getAbsolutePath();
synchronized (this) {
@ -264,7 +264,7 @@ public abstract class CachingDirectoryFactory extends DirectoryFactory {
}
if (directory == null || forceNew) {
directory = create(fullPath);
directory = create(fullPath, dirContext);
directory = rateLimit(directory);

View File

@ -1626,10 +1626,15 @@ public class CoreContainer
return schema;
}
private static final String DEF_SOLR_XML ="<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n" +
"<solr persistent=\"false\">\n" +
" <cores adminPath=\"/admin/cores\" defaultCoreName=\"" + DEFAULT_DEFAULT_CORE_NAME + "\">\n" +
" <core name=\""+ DEFAULT_DEFAULT_CORE_NAME + "\" shard=\"${shard:}\" instanceDir=\"collection1\" />\n" +
" </cores>\n" +
"</solr>";
private static final String DEF_SOLR_XML = "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n"
+ "<solr persistent=\"false\">\n"
+ " <cores adminPath=\"/admin/cores\" defaultCoreName=\""
+ DEFAULT_DEFAULT_CORE_NAME
+ "\""
+ " host=\"${host:}\" hostPort=\"${hostPort:}\" hostContext=\"${hostContext:}\" zkClientTimeout=\"${zkClientTimeout:15000}\""
+ ">\n"
+ " <core name=\""
+ DEFAULT_DEFAULT_CORE_NAME
+ "\" shard=\"${shard:}\" collection=\"${collection:}\" instanceDir=\"collection1\" />\n"
+ " </cores>\n" + "</solr>";
}

View File

@ -41,6 +41,8 @@ public abstract class DirectoryFactory implements NamedListInitializedPlugin,
// A large estimate should currently have no other side effects.
public static final IOContext IOCONTEXT_NO_CACHE = new IOContext(new FlushInfo(10*1000*1000, 100L*1000*1000*1000));
// hint about what the directory contains - default is index directory
public enum DirContext {DEFAULT, META_DATA}
private static final Logger log = LoggerFactory.getLogger(DirectoryFactory.class.getName());
@ -71,7 +73,7 @@ public abstract class DirectoryFactory implements NamedListInitializedPlugin,
*
* @throws IOException If there is a low-level I/O error.
*/
protected abstract Directory create(String path) throws IOException;
protected abstract Directory create(String path, DirContext dirContext) throws IOException;
/**
* Returns true if a Directory exists for a given path.
@ -118,7 +120,7 @@ public abstract class DirectoryFactory implements NamedListInitializedPlugin,
*
* @throws IOException If there is a low-level I/O error.
*/
public abstract Directory get(String path, String rawLockType)
public abstract Directory get(String path, DirContext dirContext, String rawLockType)
throws IOException;
/**
@ -130,7 +132,7 @@ public abstract class DirectoryFactory implements NamedListInitializedPlugin,
*
* @throws IOException If there is a low-level I/O error.
*/
public abstract Directory get(String path, String rawLockType,
public abstract Directory get(String path, DirContext dirContext, String rawLockType,
boolean forceNew) throws IOException;
/**

View File

@ -22,6 +22,7 @@ import org.apache.lucene.store.LockFactory; // javadocs
import org.apache.lucene.store.MMapDirectory;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.DirectoryFactory.DirContext;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -56,7 +57,7 @@ public class MMapDirectoryFactory extends StandardDirectoryFactory {
}
@Override
protected Directory create(String path) throws IOException {
protected Directory create(String path, DirContext dirContext) throws IOException {
MMapDirectory mapDirectory = new MMapDirectory(new File(path), null, maxChunk);
try {
mapDirectory.setUseUnmap(unmapHack);

View File

@ -18,6 +18,7 @@ package org.apache.solr.core;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.NIOFSDirectory;
import org.apache.solr.core.DirectoryFactory.DirContext;
import java.io.File;
import java.io.IOException;
@ -30,7 +31,7 @@ import java.io.IOException;
public class NIOFSDirectoryFactory extends StandardDirectoryFactory {
@Override
protected Directory create(String path) throws IOException {
protected Directory create(String path, DirContext dirContext) throws IOException {
return new NIOFSDirectory(new File(path));
}

View File

@ -25,6 +25,7 @@ import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.NRTCachingDirectory;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.DirectoryFactory.DirContext;
/**
* Factory to instantiate {@link org.apache.lucene.store.NRTCachingDirectory}
@ -48,7 +49,7 @@ public class NRTCachingDirectoryFactory extends StandardDirectoryFactory {
}
@Override
protected Directory create(String path) throws IOException {
protected Directory create(String path, DirContext dirContext) throws IOException {
return new NRTCachingDirectory(FSDirectory.open(new File(path)), maxMergeSizeMB, maxCachedMB);
}

View File

@ -28,7 +28,7 @@ import org.apache.lucene.store.RAMDirectory;
public class RAMDirectoryFactory extends EphemeralDirectoryFactory {
@Override
protected Directory create(String path) throws IOException {
protected Directory create(String path, DirContext dirContext) throws IOException {
return new RAMDirectory();
}

View File

@ -18,6 +18,7 @@ package org.apache.solr.core;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.SimpleFSDirectory;
import org.apache.solr.core.DirectoryFactory.DirContext;
import java.io.File;
import java.io.IOException;
@ -30,7 +31,7 @@ import java.io.IOException;
public class SimpleFSDirectoryFactory extends StandardDirectoryFactory {
@Override
protected Directory create(String path) throws IOException {
protected Directory create(String path, DirContext dirContext) throws IOException {
return new SimpleFSDirectory(new File(path));
}

View File

@ -69,6 +69,7 @@ import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.ExecutorUtil;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.core.DirectoryFactory.DirContext;
import org.apache.solr.handler.SnapPuller;
import org.apache.solr.handler.admin.ShowFileRequestHandler;
import org.apache.solr.handler.component.DebugComponent;
@ -237,7 +238,7 @@ public final class SolrCore implements SolrInfoMBean {
Properties p = new Properties();
Directory dir = null;
try {
dir = getDirectoryFactory().get(getDataDir(), getSolrConfig().indexConfig.lockType);
dir = getDirectoryFactory().get(getDataDir(), DirContext.META_DATA, getSolrConfig().indexConfig.lockType);
if (dir.fileExists(SnapPuller.INDEX_PROPERTIES)){
final IndexInput input = dir.openInput(SnapPuller.INDEX_PROPERTIES, IOContext.DEFAULT);
@ -454,7 +455,7 @@ public final class SolrCore implements SolrInfoMBean {
if (indexExists && firstTime && !reload) {
Directory dir = directoryFactory.get(indexDir,
Directory dir = directoryFactory.get(indexDir, DirContext.DEFAULT,
getSolrConfig().indexConfig.lockType);
try {
if (IndexWriter.isLocked(dir)) {

View File

@ -35,7 +35,7 @@ import org.apache.lucene.store.IOContext;
public class StandardDirectoryFactory extends CachingDirectoryFactory {
@Override
protected Directory create(String path) throws IOException {
protected Directory create(String path, DirContext dirContext) throws IOException {
return FSDirectory.open(new File(path));
}

View File

@ -60,6 +60,7 @@ import org.apache.solr.core.IndexDeletionPolicyWrapper;
import org.apache.solr.core.SolrCore;
import org.apache.solr.core.SolrDeletionPolicy;
import org.apache.solr.core.SolrEventListener;
import org.apache.solr.core.DirectoryFactory.DirContext;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.BinaryQueryResponseWriter;
import org.apache.solr.response.SolrQueryResponse;
@ -361,7 +362,7 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
// use a set to workaround possible Lucene bug which returns same file
// name multiple times
Collection<String> files = new HashSet<String>(commit.getFileNames());
dir = core.getDirectoryFactory().get(core.getNewIndexDir(), core.getSolrConfig().indexConfig.lockType);
dir = core.getDirectoryFactory().get(core.getNewIndexDir(), DirContext.DEFAULT, core.getSolrConfig().indexConfig.lockType);
try {
for (String fileName : files) {
@ -467,7 +468,7 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
Directory dir;
long size = 0;
try {
dir = core.getDirectoryFactory().get(core.getNewIndexDir(), core.getSolrConfig().indexConfig.lockType);
dir = core.getDirectoryFactory().get(core.getNewIndexDir(), DirContext.DEFAULT, core.getSolrConfig().indexConfig.lockType);
try {
size = DirectoryFactory.sizeOfDirectory(dir);
} finally {

View File

@ -86,6 +86,7 @@ import org.apache.solr.common.util.ExecutorUtil;
import org.apache.solr.common.util.FastInputStream;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.CachingDirectoryFactory.CloseListener;
import org.apache.solr.core.DirectoryFactory.DirContext;
import org.apache.solr.core.DirectoryFactory;
import org.apache.solr.core.IndexDeletionPolicyWrapper;
import org.apache.solr.core.SolrCore;
@ -369,16 +370,18 @@ public class SnapPuller {
filesDownloaded = Collections.synchronizedList(new ArrayList<Map<String, Object>>());
// if the generateion of master is older than that of the slave , it means they are not compatible to be copied
// then a new index direcory to be created and all the files need to be copied
boolean isFullCopyNeeded = IndexDeletionPolicyWrapper.getCommitTimestamp(commit) >= latestVersion || forceReplication;
boolean isFullCopyNeeded = IndexDeletionPolicyWrapper
.getCommitTimestamp(commit) >= latestVersion
|| commit.getGeneration() >= latestGeneration || forceReplication;
String tmpIdxDirName = "index." + new SimpleDateFormat(SnapShooter.DATE_FMT, Locale.ROOT).format(new Date());
tmpIndex = createTempindexDir(core, tmpIdxDirName);
tmpIndexDir = core.getDirectoryFactory().get(tmpIndex, core.getSolrConfig().indexConfig.lockType);
tmpIndexDir = core.getDirectoryFactory().get(tmpIndex, DirContext.DEFAULT, core.getSolrConfig().indexConfig.lockType);
// make sure it's the newest known index dir...
indexDirPath = core.getNewIndexDir();
indexDir = core.getDirectoryFactory().get(indexDirPath, core.getSolrConfig().indexConfig.lockType);
indexDir = core.getDirectoryFactory().get(indexDirPath, DirContext.DEFAULT, core.getSolrConfig().indexConfig.lockType);
Directory oldDirectory = null;
try {
@ -542,7 +545,7 @@ public class SnapPuller {
long replicationTimeTaken = (replicationTime - getReplicationStartTime()) / 1000;
Directory dir = null;
try {
dir = solrCore.getDirectoryFactory().get(solrCore.getDataDir(), solrCore.getSolrConfig().indexConfig.lockType);
dir = solrCore.getDirectoryFactory().get(solrCore.getDataDir(), DirContext.META_DATA, solrCore.getSolrConfig().indexConfig.lockType);
int indexCount = 1, confFilesCount = 1;
if (props.containsKey(TIMES_INDEX_REPLICATED)) {
@ -725,7 +728,7 @@ public class SnapPuller {
String indexDir = solrCore.getIndexDir();
// it's okay to use null for lock factory since we know this dir will exist
Directory dir = solrCore.getDirectoryFactory().get(indexDir, solrCore.getSolrConfig().indexConfig.lockType);
Directory dir = solrCore.getDirectoryFactory().get(indexDir, DirContext.DEFAULT, solrCore.getSolrConfig().indexConfig.lockType);
try {
for (Map<String,Object> file : filesToDownload) {
if (!dir.fileExists((String) file.get(NAME)) || downloadCompleteIndex) {
@ -848,7 +851,7 @@ public class SnapPuller {
Properties p = new Properties();
Directory dir = null;
try {
dir = solrCore.getDirectoryFactory().get(solrCore.getDataDir(), solrCore.getSolrConfig().indexConfig.lockType);
dir = solrCore.getDirectoryFactory().get(solrCore.getDataDir(), DirContext.META_DATA, solrCore.getSolrConfig().indexConfig.lockType);
if (dir.fileExists(SnapPuller.INDEX_PROPERTIES)){
final IndexInput input = dir.openInput(SnapPuller.INDEX_PROPERTIES, DirectoryFactory.IOCONTEXT_NO_CACHE);

View File

@ -35,6 +35,7 @@ import org.apache.lucene.store.Lock;
import org.apache.lucene.store.SimpleFSLockFactory;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.DirectoryFactory;
import org.apache.solr.core.DirectoryFactory.DirContext;
import org.apache.solr.core.SolrCore;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -102,7 +103,7 @@ public class SnapShooter {
Collection<String> files = indexCommit.getFileNames();
FileCopier fileCopier = new FileCopier();
Directory dir = solrCore.getDirectoryFactory().get(solrCore.getNewIndexDir(), solrCore.getSolrConfig().indexConfig.lockType);
Directory dir = solrCore.getDirectoryFactory().get(solrCore.getNewIndexDir(), DirContext.DEFAULT, solrCore.getSolrConfig().indexConfig.lockType);
try {
fileCopier.copyFiles(dir, files, snapShotDir);
} finally {

View File

@ -57,6 +57,7 @@ import org.apache.solr.core.CoreContainer;
import org.apache.solr.core.CoreDescriptor;
import org.apache.solr.core.DirectoryFactory;
import org.apache.solr.core.SolrCore;
import org.apache.solr.core.DirectoryFactory.DirContext;
import org.apache.solr.handler.RequestHandlerBase;
import org.apache.solr.request.LocalSolrQueryRequest;
import org.apache.solr.request.SolrQueryRequest;
@ -367,7 +368,7 @@ public class CoreAdminHandler extends RequestHandlerBase {
dirsToBeReleased = new Directory[dirNames.length];
DirectoryFactory dirFactory = core.getDirectoryFactory();
for (int i = 0; i < dirNames.length; i++) {
Directory dir = dirFactory.get(dirNames[i], core.getSolrConfig().indexConfig.lockType);
Directory dir = dirFactory.get(dirNames[i], DirContext.DEFAULT, core.getSolrConfig().indexConfig.lockType);
dirsToBeReleased[i] = dir;
// TODO: why doesn't this use the IR factory? what is going on here?
readersToBeClosed[i] = DirectoryReader.open(dir);
@ -688,6 +689,8 @@ public class CoreAdminHandler extends RequestHandlerBase {
SolrParams params = req.getParams();
String cname = params.get(CoreAdminParams.CORE);
String indexInfo = params.get(CoreAdminParams.INDEX_INFO);
boolean isIndexInfoNeeded = Boolean.parseBoolean(null == indexInfo ? "true" : indexInfo);
boolean doPersist = false;
NamedList<Object> status = new SimpleOrderedMap<Object>();
Map<String,Exception> allFailures = coreContainer.getCoreInitFailures();
@ -695,7 +698,7 @@ public class CoreAdminHandler extends RequestHandlerBase {
if (cname == null) {
rsp.add("defaultCoreName", coreContainer.getDefaultCoreName());
for (String name : coreContainer.getCoreNames()) {
status.add(name, getCoreStatus(coreContainer, name));
status.add(name, getCoreStatus(coreContainer, name, isIndexInfoNeeded));
}
rsp.add("initFailures", allFailures);
} else {
@ -703,7 +706,7 @@ public class CoreAdminHandler extends RequestHandlerBase {
? Collections.singletonMap(cname, allFailures.get(cname))
: Collections.emptyMap();
rsp.add("initFailures", failures);
status.add(cname, getCoreStatus(coreContainer, cname));
status.add(cname, getCoreStatus(coreContainer, cname, isIndexInfoNeeded));
}
rsp.add("status", status);
doPersist = false; // no state change
@ -987,7 +990,7 @@ public class CoreAdminHandler extends RequestHandlerBase {
}
protected NamedList<Object> getCoreStatus(CoreContainer cores, String cname) throws IOException {
protected NamedList<Object> getCoreStatus(CoreContainer cores, String cname, boolean isIndexInfoNeeded) throws IOException {
NamedList<Object> info = new SimpleOrderedMap<Object>();
SolrCore core = cores.getCore(cname);
if (core != null) {
@ -1000,15 +1003,17 @@ public class CoreAdminHandler extends RequestHandlerBase {
info.add("schema", core.getSchemaResource());
info.add("startTime", new Date(core.getStartTime()));
info.add("uptime", System.currentTimeMillis() - core.getStartTime());
RefCounted<SolrIndexSearcher> searcher = core.getSearcher();
try {
SimpleOrderedMap<Object> indexInfo = LukeRequestHandler.getIndexInfo(searcher.get().getIndexReader());
long size = getIndexSize(core);
indexInfo.add("sizeInBytes", size);
indexInfo.add("size", NumberUtils.readableSize(size));
info.add("index", indexInfo);
} finally {
searcher.decref();
if (isIndexInfoNeeded) {
RefCounted<SolrIndexSearcher> searcher = core.getSearcher();
try {
SimpleOrderedMap<Object> indexInfo = LukeRequestHandler.getIndexInfo(searcher.get().getIndexReader());
long size = getIndexSize(core);
indexInfo.add("sizeInBytes", size);
indexInfo.add("size", NumberUtils.readableSize(size));
info.add("index", indexInfo);
} finally {
searcher.decref();
}
}
} finally {
core.close();
@ -1022,9 +1027,9 @@ public class CoreAdminHandler extends RequestHandlerBase {
long size = 0;
try {
if (!core.getDirectoryFactory().exists(core.getIndexDir())) {
dir = core.getDirectoryFactory().get(core.getNewIndexDir(), core.getSolrConfig().indexConfig.lockType);
dir = core.getDirectoryFactory().get(core.getNewIndexDir(), DirContext.DEFAULT, core.getSolrConfig().indexConfig.lockType);
} else {
dir = core.getDirectoryFactory().get(core.getIndexDir(), core.getSolrConfig().indexConfig.lockType);
dir = core.getDirectoryFactory().get(core.getIndexDir(), DirContext.DEFAULT, core.getSolrConfig().indexConfig.lockType);
}
try {

View File

@ -49,6 +49,7 @@ import org.apache.solr.core.DirectoryFactory;
import org.apache.solr.core.SolrConfig;
import org.apache.solr.core.SolrCore;
import org.apache.solr.core.SolrInfoMBean;
import org.apache.solr.core.DirectoryFactory.DirContext;
import org.apache.solr.request.LocalSolrQueryRequest;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.request.SolrRequestInfo;
@ -119,7 +120,7 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable,SolrIn
public SolrIndexSearcher(SolrCore core, String path, IndexSchema schema, SolrIndexConfig config, String name, boolean enableCache, DirectoryFactory directoryFactory) throws IOException {
// we don't need to reserve the directory because we get it from the factory
this(core, schema,name, core.getIndexReaderFactory().newReader(directoryFactory.get(path, config.lockType), core), true, enableCache, false, directoryFactory);
this(core, schema,name, core.getIndexReaderFactory().newReader(directoryFactory.get(path, DirContext.DEFAULT, config.lockType), core), true, enableCache, false, directoryFactory);
}
private static String getIndexDir(Directory dir) {

View File

@ -32,6 +32,7 @@ import org.apache.lucene.util.InfoStream;
import org.apache.lucene.util.PrintStreamInfoStream;
import org.apache.lucene.util.ThreadInterruptedException;
import org.apache.solr.core.DirectoryFactory;
import org.apache.solr.core.DirectoryFactory.DirContext;
import org.apache.solr.schema.IndexSchema;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -58,7 +59,7 @@ public class SolrIndexWriter extends IndexWriter {
public static SolrIndexWriter create(String name, String path, DirectoryFactory directoryFactory, boolean create, IndexSchema schema, SolrIndexConfig config, IndexDeletionPolicy delPolicy, Codec codec, boolean forceNewDirectory) throws IOException {
SolrIndexWriter w = null;
final Directory d = directoryFactory.get(path, config.lockType, forceNewDirectory);
final Directory d = directoryFactory.get(path, DirContext.DEFAULT, config.lockType, forceNewDirectory);
try {
w = new SolrIndexWriter(name, path, d, create, schema,
config, delPolicy, codec);

View File

@ -134,7 +134,13 @@ public class SignatureUpdateProcessorFactory
if (enabled) {
SolrInputDocument doc = cmd.getSolrInputDocument();
List<String> currDocSigFields = null;
boolean isPartialUpdate = DistributedUpdateProcessor.isAtomicUpdate(cmd);
if (sigFields == null || sigFields.size() == 0) {
if (isPartialUpdate) {
throw new SolrException
(ErrorCode.SERVER_ERROR,
"Can't use SignatureUpdateProcessor with partial updates on signature fields");
}
Collection<String> docFields = doc.getFieldNames();
currDocSigFields = new ArrayList<String>(docFields.size());
currDocSigFields.addAll(docFields);
@ -149,6 +155,12 @@ public class SignatureUpdateProcessorFactory
for (String field : currDocSigFields) {
SolrInputField f = doc.getField(field);
if (f != null) {
if (isPartialUpdate) {
throw new SolrException
(ErrorCode.SERVER_ERROR,
"Can't use SignatureUpdateProcessor with partial update request " +
"containing signature field: " + field);
}
sig.add(field);
Object o = f.getValue();
if (o instanceof Collection) {

View File

@ -196,7 +196,8 @@ public class SimplePostTool {
fatal("System Property 'data' is not valid for this tool: " + mode);
}
String params = System.getProperty("params", "");
urlStr = System.getProperty("url", SimplePostTool.appendParam(DEFAULT_POST_URL, params));
urlStr = System.getProperty("url", DEFAULT_POST_URL);
urlStr = SimplePostTool.appendParam(urlStr, params);
URL url = new URL(urlStr);
boolean auto = isOn(System.getProperty("auto", DEFAULT_AUTO));
String type = System.getProperty("type");
@ -800,7 +801,7 @@ public class SimplePostTool {
" " + urlc.getResponseMessage() + " for url "+url);
}
} catch (IOException e) {
warn("An error occured posting data to "+url+". Please check that Solr is running.");
warn("An error occurred posting data to "+url+". Please check that Solr is running.");
}
}

View File

@ -47,6 +47,29 @@
</updateLog>
</updateHandler>
<updateRequestProcessorChain name="dedupe">
<processor class="org.apache.solr.update.processor.SignatureUpdateProcessorFactory">
<bool name="enabled">true</bool>
<bool name="overwriteDupes">true</bool>
<str name="fields">v_t,t_field</str>
<str name="signatureClass">org.apache.solr.update.processor.TextProfileSignature</str>
</processor>
<processor class="solr.RunUpdateProcessorFactory" />
</updateRequestProcessorChain>
<updateRequestProcessorChain name="stored_sig">
<!-- this chain is valid even though the signature field is not
indexed, because we are not asking for dups to be overwritten
-->
<processor class="org.apache.solr.update.processor.SignatureUpdateProcessorFactory">
<bool name="enabled">true</bool>
<str name="signatureField">non_indexed_signature_sS</str>
<bool name="overwriteDupes">false</bool>
<str name="fields">v_t,t_field</str>
<str name="signatureClass">org.apache.solr.update.processor.TextProfileSignature</str>
</processor>
<processor class="solr.RunUpdateProcessorFactory" />
</updateRequestProcessorChain>
<requestHandler name="/admin/" class="org.apache.solr.handler.admin.AdminHandlers" />
</config>

View File

@ -22,6 +22,7 @@ import java.io.IOException;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.store.Directory;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.core.DirectoryFactory.DirContext;
import org.junit.BeforeClass;
import org.junit.Test;
@ -46,7 +47,7 @@ public class AlternateDirectoryTest extends SolrTestCaseJ4 {
public static volatile Directory dir;
@Override
public Directory create(String path) throws IOException {
public Directory create(String path, DirContext dirContext) throws IOException {
openCalled = true;
return dir = newFSDirectory(new File(path));

View File

@ -22,6 +22,7 @@ import java.io.IOException;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.solr.core.DirectoryFactory.DirContext;
/**
* Test-case for RAMDirectoryFactory
@ -37,13 +38,13 @@ public class RAMDirectoryFactoryTest extends LuceneTestCase {
final Directory directory = new RAMDirectory();
RAMDirectoryFactory factory = new RAMDirectoryFactory() {
@Override
protected Directory create(String path) {
protected Directory create(String path, DirContext dirContext) {
return directory;
}
};
String path = "/fake/path";
Directory dir1 = factory.get(path, null);
Directory dir2 = factory.get(path, null);
Directory dir1 = factory.get(path, DirContext.DEFAULT, null);
Directory dir2 = factory.get(path, DirContext.DEFAULT, null);
assertEquals("RAMDirectoryFactory should not create new instance of RefCntRamDirectory " +
"every time open() is called for the same path", dir1, dir2);
@ -53,7 +54,7 @@ public class RAMDirectoryFactoryTest extends LuceneTestCase {
private void dotestOpenSucceedForEmptyDir() throws IOException {
RAMDirectoryFactory factory = new RAMDirectoryFactory();
Directory dir = factory.get("/fake/path", null);
Directory dir = factory.get("/fake/path", DirContext.DEFAULT, null);
assertNotNull("RAMDirectoryFactory should create RefCntRamDirectory even if the path doen't lead " +
"to index directory on the file system", dir);
factory.release(dir);

View File

@ -64,7 +64,7 @@ public class SignatureUpdateProcessorFactoryTest extends SolrTestCaseJ4 {
chain = "dedupe"; // set the default that most tests expect
}
void checkNumDocs(int n) {
static void checkNumDocs(int n) {
SolrQueryRequest req = req();
try {
assertEquals(n, req.getSearcher().getIndexReader().numDocs());
@ -353,7 +353,11 @@ public class SignatureUpdateProcessorFactoryTest extends SolrTestCaseJ4 {
}
}
private void addDoc(String doc) throws Exception {
private void addDoc(String doc) throws Exception {
addDoc(doc, chain);
}
static void addDoc(String doc, String chain) throws Exception {
Map<String, String[]> params = new HashMap<String, String[]>();
MultiMapSolrParams mmparams = new MultiMapSolrParams(params);
params.put(UpdateParams.UPDATE_CHAIN, new String[] { chain });

View File

@ -0,0 +1,74 @@
package org.apache.solr.update.processor;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import com.google.common.collect.Maps;
import org.apache.noggit.ObjectBuilder;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.client.solrj.request.UpdateRequest;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.core.SolrCore;
import org.junit.BeforeClass;
import org.junit.Test;
import java.util.List;
import java.util.Map;
import static org.apache.solr.update.processor.SignatureUpdateProcessorFactoryTest.addDoc;
public class TestPartialUpdateDeduplication extends SolrTestCaseJ4 {
@BeforeClass
public static void beforeClass() throws Exception {
initCore("solrconfig-tlog.xml", "schema15.xml");
}
@Test
public void testPartialUpdates() throws Exception {
SignatureUpdateProcessorFactoryTest.checkNumDocs(0);
String chain = "dedupe";
// partial update
SolrInputDocument doc = new SolrInputDocument();
doc.addField("id", "2a");
Map<String, Object> map = Maps.newHashMap();
map.put("set", "Hello Dude man!");
doc.addField("v_t", map);
UpdateRequest req = new UpdateRequest();
req.add(doc);
boolean exception_ok = false;
try {
addDoc(req.getXML(), chain);
} catch (Exception e) {
exception_ok = true;
}
assertTrue("Should have gotten an exception with partial update on signature generating field",
exception_ok);
SignatureUpdateProcessorFactoryTest.checkNumDocs(0);
addDoc(adoc("id", "2a", "v_t", "Hello Dude man!", "name", "ali babi'"), chain);
doc = new SolrInputDocument();
doc.addField("id", "2a");
map = Maps.newHashMap();
map.put("set", "name changed");
doc.addField("name", map);
req = new UpdateRequest();
req.add(doc);
addDoc(req.getXML(), chain);
addDoc(commit(), chain);
SignatureUpdateProcessorFactoryTest.checkNumDocs(1);
}
}

View File

@ -56,6 +56,7 @@ public class SimplePostToolTest extends SolrTestCaseJ4 {
t_web = SimplePostTool.parseArgsAndInit(args);
System.setProperty("params", "param1=foo&param2=bar");
System.setProperty("url", "http://localhost:5150/solr/update");
t_test = SimplePostTool.parseArgsAndInit(args);
pf = new MockPageFetcher();
@ -76,7 +77,7 @@ public class SimplePostToolTest extends SolrTestCaseJ4 {
assertEquals(1, t_web.recursive);
assertEquals(10, t_web.delay);
assertNotNull(t_test.solrUrl);
assertEquals("http://localhost:5150/solr/update?param1=foo&param2=bar",t_test.solrUrl.toExternalForm());
}
@Test

View File

@ -28,7 +28,7 @@
<jmx />
<lib dir="../../../../dist/" regex="apache-solr-dataimporthandler-.*\.jar" />
<lib dir="../../../../dist/" regex="solr-dataimporthandler-.*\.jar" />
<!-- <indexConfig> section could go here, but we want the defaults -->

View File

@ -34,7 +34,7 @@
<lib dir="../../../../contrib/extraction/lib" />
<lib dir="../../../../contrib/dataimporthandler/lib/" regex=".*jar$" />
<lib dir="../../../../dist/" regex="apache-solr-dataimporthandler-.*\.jar" />
<lib dir="../../../../dist/" regex="solr-dataimporthandler-.*\.jar" />
<!-- <indexConfig> section could go here, but we want the defaults -->

View File

@ -28,7 +28,7 @@
<jmx />
<lib dir="../../../../dist/" regex="apache-solr-dataimporthandler-.*\.jar" />
<lib dir="../../../../dist/" regex="solr-dataimporthandler-.*\.jar" />
<!-- <indexConfig> section could go here, but we want the defaults -->

Some files were not shown because too many files have changed in this diff Show More