diff --git a/dev-tools/eclipse/dot.classpath b/dev-tools/eclipse/dot.classpath index 17280705c64..4f88ffca6d2 100644 --- a/dev-tools/eclipse/dot.classpath +++ b/dev-tools/eclipse/dot.classpath @@ -168,7 +168,7 @@ - + diff --git a/dev-tools/idea/.idea/libraries/JUnit.xml b/dev-tools/idea/.idea/libraries/JUnit.xml index b6e28c823dc..592cf0090ef 100644 --- a/dev-tools/idea/.idea/libraries/JUnit.xml +++ b/dev-tools/idea/.idea/libraries/JUnit.xml @@ -2,7 +2,7 @@ - + diff --git a/dev-tools/idea/lucene/classification/classification.iml b/dev-tools/idea/lucene/classification/classification.iml index 936be59aafa..20105b6f374 100644 --- a/dev-tools/idea/lucene/classification/classification.iml +++ b/dev-tools/idea/lucene/classification/classification.iml @@ -15,5 +15,7 @@ + + diff --git a/dev-tools/maven/lucene/classification/pom.xml.template b/dev-tools/maven/lucene/classification/pom.xml.template index eaf3d4e9d4e..ec572ed513e 100644 --- a/dev-tools/maven/lucene/classification/pom.xml.template +++ b/dev-tools/maven/lucene/classification/pom.xml.template @@ -54,6 +54,17 @@ lucene-core ${project.version} + + ${project.groupId} + lucene-queries + ${project.version} + + + ${project.groupId} + lucene-analyzers-common + ${project.version} + test + ${module-path}/src/java diff --git a/dev-tools/maven/lucene/core/src/java/pom.xml.template b/dev-tools/maven/lucene/core/src/java/pom.xml.template index 79bfacf7979..0579acec87f 100644 --- a/dev-tools/maven/lucene/core/src/java/pom.xml.template +++ b/dev-tools/maven/lucene/core/src/java/pom.xml.template @@ -24,7 +24,7 @@ org.apache.lucene lucene-parent @version@ - ../pom.xml + ../../../pom.xml org.apache.lucene lucene-core diff --git a/dev-tools/maven/pom.xml.template b/dev-tools/maven/pom.xml.template index a6008b6317e..c432f674166 100644 --- a/dev-tools/maven/pom.xml.template +++ b/dev-tools/maven/pom.xml.template @@ -434,7 +434,7 @@ com.carrotsearch.randomizedtesting randomizedtesting-runner - 2.0.3 + 2.0.4 diff --git a/dev-tools/scripts/checkJavadocLinks.py b/dev-tools/scripts/checkJavadocLinks.py index 35bf05eb516..20993145741 100644 --- a/dev-tools/scripts/checkJavadocLinks.py +++ b/dev-tools/scripts/checkJavadocLinks.py @@ -32,12 +32,17 @@ class FindHyperlinks(HTMLParser): def __init__(self, baseURL): HTMLParser.__init__(self) + self.stack = [] self.anchors = set() self.links = [] self.baseURL = baseURL self.printed = False def handle_starttag(self, tag, attrs): + # NOTE: I don't think 'a' should be in here. But try debugging + # NumericRangeQuery.html. (Could be javadocs bug, its a generic type...) + if tag not in ('link', 'meta', 'frame', 'br', 'hr', 'p', 'li', 'img', 'col', 'a'): + self.stack.append(tag) if tag == 'a': name = None href = None @@ -74,6 +79,18 @@ class FindHyperlinks(HTMLParser): else: raise RuntimeError('couldn\'t find an href nor name in link in %s: only got these attrs: %s' % (self.baseURL, attrs)) + def handle_endtag(self, tag): + if tag in ('link', 'meta', 'frame', 'br', 'hr', 'p', 'li', 'img', 'col', 'a'): + return + + if len(self.stack) == 0: + raise RuntimeError('%s %s:%s: saw no opening <%s>' % (self.baseURL, self.getpos()[0], self.getpos()[1], tag, self.stack[-1])) + + if self.stack[-1] == tag: + self.stack.pop() + else: + raise RuntimeError('%s %s:%s: saw but expected ' % (self.baseURL, self.getpos()[0], self.getpos()[1], tag, self.stack[-1])) + def printFile(self): if not self.printed: print() diff --git a/dev-tools/scripts/smokeTestRelease.py b/dev-tools/scripts/smokeTestRelease.py index 8bb05b96403..f012c398ee8 100644 --- a/dev-tools/scripts/smokeTestRelease.py +++ b/dev-tools/scripts/smokeTestRelease.py @@ -657,11 +657,21 @@ def verifyUnpacked(project, artifact, unpackPath, version, tmpDir): print(' run tests w/ Java 6...') run('%s; ant test' % javaExe('1.6'), '%s/test.log' % unpackPath) run('%s; ant jar' % javaExe('1.6'), '%s/compile.log' % unpackPath) - testDemo(isSrc, version) + testDemo(isSrc, version, '1.6') # test javadocs print(' generate javadocs w/ Java 6...') run('%s; ant javadocs' % javaExe('1.6'), '%s/javadocs.log' % unpackPath) checkJavadocpath('%s/build/docs' % unpackPath) + + print(' run tests w/ Java 7...') + run('%s; ant clean test' % javaExe('1.7'), '%s/test.log' % unpackPath) + run('%s; ant jar' % javaExe('1.7'), '%s/compile.log' % unpackPath) + testDemo(isSrc, version, '1.7') + + print(' generate javadocs w/ Java 7...') + run('%s; ant javadocs' % javaExe('1.7'), '%s/javadocs.log' % unpackPath) + checkJavadocpathFull('%s/build/docs' % unpackPath) + else: os.chdir('solr') # DISABLED until solr tests consistently pass @@ -679,8 +689,8 @@ def verifyUnpacked(project, artifact, unpackPath, version, tmpDir): # test javadocs print(' generate javadocs w/ Java 7...') - run('%s; ant javadocs' % javaExe('1.7'), '%s/javadocs.log' % unpackPath) - checkJavadocpath('%s/solr/build/docs' % unpackPath, False) + run('%s; ant clean javadocs' % javaExe('1.7'), '%s/javadocs.log' % unpackPath) + checkJavadocpathFull('%s/solr/build/docs' % unpackPath, False) print(' test solr example w/ Java 6...') run('%s; ant clean example' % javaExe('1.6'), '%s/antexample.log' % unpackPath) @@ -699,7 +709,8 @@ def verifyUnpacked(project, artifact, unpackPath, version, tmpDir): checkAllJARs(os.getcwd(), project, version) if project == 'lucene': - testDemo(isSrc, version) + testDemo(isSrc, version, '1.6') + testDemo(isSrc, version, '1.7') else: checkSolrWAR('%s/example/webapps/solr.war' % unpackPath, version) @@ -819,6 +830,9 @@ def testSolrExample(unpackPath, javaPath, isSrc): os.chdir('..') +# the weaker check: we can use this on java6 for some checks, +# but its generated HTML is hopelessly broken so we cannot run +# the link checking that checkJavadocpathFull does. def checkJavadocpath(path, failOnMissing=True): # check for level='package' # we fail here if its screwed up @@ -831,11 +845,20 @@ def checkJavadocpath(path, failOnMissing=True): # raise RuntimeError('javadoc problems') print('\n***WARNING***: javadocs want to fail!\n') +# full checks +def checkJavadocpathFull(path, failOnMissing=True): + # check for missing, etc + checkJavadocpath(path, failOnMissing) + + # also validate html/check for broken links if checkJavadocLinks.checkAll(path): raise RuntimeError('broken javadocs links found!') -def testDemo(isSrc, version): - print(' test demo...') +def testDemo(isSrc, version, jdk): + if os.path.exists('index'): + shutil.rmtree('index') # nuke any index from any previous iteration + + print(' test demo with %s...' % jdk) sep = ';' if cygwin else ':' if isSrc: cp = 'build/core/classes/java{0}build/demo/classes/java{0}build/analysis/common/classes/java{0}build/queryparser/classes/java'.format(sep) @@ -843,8 +866,8 @@ def testDemo(isSrc, version): else: cp = 'core/lucene-core-{0}.jar{1}demo/lucene-demo-{0}.jar{1}analysis/common/lucene-analyzers-common-{0}.jar{1}queryparser/lucene-queryparser-{0}.jar'.format(version, sep) docsDir = 'docs' - run('%s; java -cp "%s" org.apache.lucene.demo.IndexFiles -index index -docs %s' % (javaExe('1.6'), cp, docsDir), 'index.log') - run('%s; java -cp "%s" org.apache.lucene.demo.SearchFiles -index index -query lucene' % (javaExe('1.6'), cp), 'search.log') + run('%s; java -cp "%s" org.apache.lucene.demo.IndexFiles -index index -docs %s' % (javaExe(jdk), cp, docsDir), 'index.log') + run('%s; java -cp "%s" org.apache.lucene.demo.SearchFiles -index index -query lucene' % (javaExe(jdk), cp), 'search.log') reMatchingDocs = re.compile('(\d+) total matching documents') m = reMatchingDocs.search(open('search.log', encoding='UTF-8').read()) if m is None: diff --git a/dev-tools/scripts/write.stage.maven.build.xml.pl b/dev-tools/scripts/write.stage.maven.build.xml.pl index 4c780d97e63..c5e8aa851ff 100755 --- a/dev-tools/scripts/write.stage.maven.build.xml.pl +++ b/dev-tools/scripts/write.stage.maven.build.xml.pl @@ -12,6 +12,10 @@ # 2. The pathname of the Ant build script to be built. # 3. The pathname of common-build.xml, which will be imported # in the Ant build script to be built. +# 4. Whether to prompt for credentials, rather than consulting +# settings.xml: boolean, e.g. "true" or "false" +# 5. The ID of the target repository +# 6. The URL to the target repository # # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with @@ -33,33 +37,72 @@ use strict; use warnings; use File::Basename; use File::Find; +use Cwd 'abs_path'; use File::Path qw(make_path); my $num_artifacts = 0; -my $maven_dist_dir = $ARGV[0]; +my $maven_dist_dir = abs_path($ARGV[0]); my $output_build_xml_file = $ARGV[1]; my $common_build_xml = $ARGV[2]; +my $m2_credentials_prompt = $ARGV[3]; +my $m2_repository_id = $ARGV[4]; +my $m2_repository_url = $ARGV[5]; if ($^O eq 'cygwin') { # Make sure Cygwin Perl can find the output path $output_build_xml_file = `cygpath -u "$output_build_xml_file"`; $output_build_xml_file =~ s/\s+$//; # Trim trailing whitespace $output_build_xml_file =~ s/^\s+//; # Trim leading whitespace } my ($output_file, $output_dir) = fileparse($output_build_xml_file); + +my @basepaths = (); +my $grandparent_pom = ''; +my @parent_poms = (); +sub find_poms; +File::Find::find({follow => 1, wanted => \&find_poms}, $maven_dist_dir); + +my $parent_pom_targets = ''; +if (@parent_poms) { + $parent_pom_targets = "\n"; + if ($grandparent_pom) { + $parent_pom_targets .= qq! \n!; + } + my $n = 0; + for my $parent_pom (@parent_poms) { + $parent_pom_targets .= qq! \n!; + ++$n; + } + $parent_pom_targets .= " \n"; +} + make_path($output_dir); open my $output_build_xml, ">$output_build_xml_file" or die "ERROR opening '$ARGV[1]' for writing: $!"; print $output_build_xml qq! - + !; -sub wanted; +my $credentials = ''; +if ($m2_credentials_prompt !~ /\A(?s:f(?:alse)?|no?)\z/) { + print $output_build_xml qq! + + WARNING: ON SOME PLATFORMS YOUR PASSPHRASE WILL BE ECHOED BACK\!\!\!\!\! + + + \n!; -File::Find::find({follow => 1, wanted => \&wanted}, $maven_dist_dir); + $credentials = q! + + !; +} + +for my $basepath (@basepaths) { + output_deploy_stanza($basepath); +} print $output_build_xml q! @@ -72,7 +115,7 @@ close $output_build_xml; print "Wrote '$output_build_xml_file' to stage $num_artifacts Maven artifacts.\n"; exit; -sub wanted { +sub find_poms { /^(.*)\.pom\z/s && do { my $pom_dir = $File::Find::dir; if ($^O eq 'cygwin') { # Output windows-style paths on Windows @@ -83,21 +126,36 @@ sub wanted { my $basefile = $_; $basefile =~ s/\.pom\z//; my $basepath = "$pom_dir/$basefile"; - my $pom_file = "$basepath.pom"; - my $jar_file = "$basepath.jar"; - my $war_file = "$basepath.war"; + push @basepaths, $basepath; - if (-f $war_file) { - print $output_build_xml qq! + if ($basefile =~ /grandparent/) { + $grandparent_pom = "$basepath.pom"; + } elsif ($basefile =~ /parent/) { + push @parent_poms, "$basepath.pom"; + } + } +} + +sub output_deploy_stanza { + my $basepath = shift; + my $pom_file = "$basepath.pom"; + my $jar_file = "$basepath.jar"; + my $war_file = "$basepath.war"; + + if (-f $war_file) { + print $output_build_xml qq! + $parent_pom_targets + $credentials \n!; - } elsif (-f $jar_file) { - print $output_build_xml qq! + } elsif (-f $jar_file) { + print $output_build_xml qq! + $parent_pom_targets @@ -106,16 +164,18 @@ sub wanted { + $credentials \n!; - } else { - print $output_build_xml qq! + } else { + print $output_build_xml qq! + $parent_pom_targets + $credentials \n!; - } + } - ++$num_artifacts; - }; + ++$num_artifacts; } diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 8dbe8649d65..1f0f329f7e1 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -35,7 +35,7 @@ New Features output for a single input. UpToTwoPositiveIntsOutputs was moved from lucene/core to lucene/misc. (Mike McCandless) -* LUCENE-3842: New AnalyzingCompletionLookup, for doing auto-suggest +* LUCENE-3842: New AnalyzingSuggester, for doing auto-suggest using an analyzer. This can create powerful suggesters: if the analyzer remove stop words then "ghost chr..." could suggest "The Ghost of Christmas Past"; if SynonymFilter is used to map wifi and wireless @@ -83,6 +83,9 @@ Bug Fixes romaji even for out-of-vocabulary kana cases (e.g. half-width forms). (Robert Muir) +* LUCENE-4504: Fix broken sort comparator in ValueSource.getSortField, + used when sorting by a function query. (Tom Shally via Robert Muir) + Optimizations * LUCENE-4443: Lucene41PostingsFormat no longer writes unnecessary offsets @@ -114,6 +117,10 @@ Optimizations Build +* Upgrade randomized testing to version 2.0.4: avoid hangs on shutdown + hooks hanging forever by calling Runtime.halt() in addition to + Runtime.exit() after a short delay to allow graceful shutdown (Dawid Weiss) + * LUCENE-4451: Memory leak per unique thread caused by RandomizedContext.contexts static map. Upgrade randomized testing to version 2.0.2 (Mike McCandless, Dawid Weiss) diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/payloads/package.html b/lucene/analysis/common/src/java/org/apache/lucene/analysis/payloads/package.html index 9935890da58..176d74d6718 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/payloads/package.html +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/payloads/package.html @@ -20,11 +20,6 @@ org.apache.lucene.analysis.payloads -
Provides various convenience classes for creating payloads on Tokens. -
-
 
-
-Copyright © 2007 Apache Software Foundation -
+Provides various convenience classes for creating payloads on Tokens. \ No newline at end of file diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/sinks/package.html b/lucene/analysis/common/src/java/org/apache/lucene/analysis/sinks/package.html index 33d5be61f2b..9ce0655624a 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/sinks/package.html +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/sinks/package.html @@ -16,32 +16,10 @@ limitations under the License. --> - - org.apache.lucene.analysis.sinks + + org.apache.lucene.analysis.sinks -
Implementations of the SinkTokenizer that might be useful. -
-
 
-
-Copyright © 2007 Apache Software Foundation -
+Implementations of the SinkTokenizer that might be useful. \ No newline at end of file diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java index 93192d9e2b4..c837ab3e8d8 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java @@ -120,7 +120,7 @@ public final class ClassicTokenizer extends Tokenizer { } private void init(Version matchVersion) { - this.scanner = new ClassicTokenizerImpl(input); + this.scanner = new ClassicTokenizerImpl(null); // best effort NPE if you dont call reset } // this tokenizer generates three attributes: diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java index 97e512d1c8e..ed83d9e8739 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java @@ -134,7 +134,7 @@ public final class StandardTokenizer extends Tokenizer { } private final void init(Version matchVersion) { - this.scanner = new StandardTokenizerImpl(input); + this.scanner = new StandardTokenizerImpl(null); // best effort NPE if you dont call reset } // this tokenizer generates three attributes: diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java index d5442534fa8..6d3251befcd 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java @@ -98,7 +98,7 @@ public final class UAX29URLEmailTokenizer extends Tokenizer { */ public UAX29URLEmailTokenizer(Version matchVersion, Reader input) { super(input); - this.scanner = getScannerFor(matchVersion, input); + this.scanner = getScannerFor(matchVersion); } /** @@ -106,7 +106,7 @@ public final class UAX29URLEmailTokenizer extends Tokenizer { */ public UAX29URLEmailTokenizer(Version matchVersion, AttributeSource source, Reader input) { super(source, input); - this.scanner = getScannerFor(matchVersion, input); + this.scanner = getScannerFor(matchVersion); } /** @@ -114,11 +114,11 @@ public final class UAX29URLEmailTokenizer extends Tokenizer { */ public UAX29URLEmailTokenizer(Version matchVersion, AttributeFactory factory, Reader input) { super(factory, input); - this.scanner = getScannerFor(matchVersion, input); + this.scanner = getScannerFor(matchVersion); } - private static StandardTokenizerInterface getScannerFor(Version matchVersion, Reader input) { - return new UAX29URLEmailTokenizerImpl(input); + private static StandardTokenizerInterface getScannerFor(Version matchVersion) { + return new UAX29URLEmailTokenizerImpl(null); // best effort NPE if you dont call reset } // this tokenizer generates three attributes: diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizer.java index 644fe7fad39..0d8029af649 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizer.java @@ -143,7 +143,7 @@ public final class WikipediaTokenizer extends Tokenizer { */ public WikipediaTokenizer(Reader input, int tokenOutput, Set untokenizedTypes) { super(input); - this.scanner = new WikipediaTokenizerImpl(input); + this.scanner = new WikipediaTokenizerImpl(null); // best effort NPE if you dont call reset init(tokenOutput, untokenizedTypes); } @@ -156,7 +156,7 @@ public final class WikipediaTokenizer extends Tokenizer { */ public WikipediaTokenizer(AttributeFactory factory, Reader input, int tokenOutput, Set untokenizedTypes) { super(factory, input); - this.scanner = new WikipediaTokenizerImpl(input); + this.scanner = new WikipediaTokenizerImpl(null); // best effort NPE if you dont call reset init(tokenOutput, untokenizedTypes); } @@ -169,7 +169,7 @@ public final class WikipediaTokenizer extends Tokenizer { */ public WikipediaTokenizer(AttributeSource source, Reader input, int tokenOutput, Set untokenizedTypes) { super(source, input); - this.scanner = new WikipediaTokenizerImpl(input); + this.scanner = new WikipediaTokenizerImpl(null); // best effort NPE if you dont call reset init(tokenOutput, untokenizedTypes); } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestElision.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestElision.java index c48c86c47c8..0d5cead735f 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestElision.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestElision.java @@ -52,9 +52,12 @@ public class TestElision extends BaseTokenStreamTestCase { private List filter(TokenFilter filter) throws IOException { List tas = new ArrayList(); CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class); + filter.reset(); while (filter.incrementToken()) { tas.add(termAtt.toString()); } + filter.end(); + filter.close(); return tas; } diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizer.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizer.java index a3f074e0271..b03c80effc8 100644 --- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizer.java +++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizer.java @@ -227,7 +227,7 @@ public final class JapaneseTokenizer extends Tokenizer { outputCompounds = false; break; } - buffer.reset(input); + buffer.reset(null); // best effort NPE consumers that don't call reset() resetState(); diff --git a/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikAnalyzer.java b/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikAnalyzer.java index 0bb14762678..c26d403127b 100644 --- a/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikAnalyzer.java +++ b/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikAnalyzer.java @@ -62,12 +62,16 @@ public class TestMorfologikAnalyzer extends BaseTokenStreamTestCase { ts_1.reset(); ts_1.incrementToken(); assertEquals("first stream", "liście", termAtt_1.toString()); + ts_1.end(); + ts_1.close(); TokenStream ts_2 = a.tokenStream("dummy", new StringReader("danych")); CharTermAttribute termAtt_2 = ts_2.getAttribute(CharTermAttribute.class); ts_2.reset(); ts_2.incrementToken(); assertEquals("second stream", "dany", termAtt_2.toString()); + ts_2.end(); + ts_2.close(); } /** Test stemming of mixed-case tokens. */ @@ -110,6 +114,7 @@ public class TestMorfologikAnalyzer extends BaseTokenStreamTestCase { public final void testPOSAttribute() throws IOException { TokenStream ts = getTestAnalyzer().tokenStream("dummy", new StringReader("liście")); + ts.reset(); assertPOSToken(ts, "liście", "subst:sg:acc:n2", "subst:sg:nom:n2", @@ -127,6 +132,8 @@ public class TestMorfologikAnalyzer extends BaseTokenStreamTestCase { assertPOSToken(ts, "lista", "subst:sg:dat:f", "subst:sg:loc:f"); + ts.end(); + ts.close(); } /** blast some random strings through the analyzer */ diff --git a/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/PhoneticFilterFactory.java b/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/PhoneticFilterFactory.java index 0e8a94bc598..658cfac9ddf 100644 --- a/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/PhoneticFilterFactory.java +++ b/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/PhoneticFilterFactory.java @@ -41,12 +41,12 @@ import org.apache.lucene.analysis.util.TokenFilterFactory; *

* This takes one required argument, "encoder", and the rest are optional: *

- *
encoder
required, one of "DoubleMetaphone", "Metaphone", "Soundex", "RefinedSoundex", "Caverphone" (v2.0), + *
encoder
required, one of "DoubleMetaphone", "Metaphone", "Soundex", "RefinedSoundex", "Caverphone" (v2.0), * or "ColognePhonetic" (case insensitive). If encoder isn't one of these, it'll be resolved as a class name either by - * itself if it already contains a '.' or otherwise as in the same package as these others. - *
inject
(default=true) add tokens to the stream with the offset=0 - *
maxCodeLength
The maximum length of the phonetic codes, as defined by the encoder. If an encoder doesn't - * support this then specifying this is an error. + * itself if it already contains a '.' or otherwise as in the same package as these others.
+ *
inject
(default=true) add tokens to the stream with the offset=0
+ *
maxCodeLength
The maximum length of the phonetic codes, as defined by the encoder. If an encoder doesn't + * support this then specifying this is an error.
*
* *
diff --git a/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/package.html b/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/package.html
index fcd6f463d78..e4eeb7fa7e7 100644
--- a/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/package.html
+++ b/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/package.html
@@ -19,11 +19,7 @@
 
 
 
-
SmartChineseAnalyzer Hidden Markov Model package. -
-
@lucene.experimental -
diff --git a/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/package.html b/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/package.html index 31ea96e951a..cc9b8c179f3 100644 --- a/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/package.html +++ b/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/package.html @@ -20,12 +20,8 @@ -
Analyzer for Simplified Chinese, which indexes words. -
-
@lucene.experimental -
Three analyzers are provided for Chinese, each of which treats Chinese text in a different way.
    diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecDocParser.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecDocParser.java index 9f0dd36e17f..8e24f72983b 100644 --- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecDocParser.java +++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecDocParser.java @@ -24,7 +24,7 @@ import java.util.Locale; import java.util.Map; /** - * Parser for trec doc content, invoked on doc text excluding and + * Parser for trec doc content, invoked on doc text excluding <DOC> and <DOCNO> * which are handled in TrecContentSource. Required to be stateless and hence thread safe. */ public abstract class TrecDocParser { diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/package.html b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/package.html index b92da02b171..783e561dcd4 100644 --- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/package.html +++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/package.html @@ -20,8 +20,8 @@ Benchmarking Lucene By Tasks -
    Benchmarking Lucene By Tasks. +

    This package provides "task based" performance benchmarking of Lucene. One can use the predefined benchmarks, or create new ones. @@ -251,7 +251,7 @@ The following is an informal description of the supported syntax. fixed, so for deletion in loops it is better to use the doc.delete.step property. -

  • SetProp takes a name,value mandatory param, +
  • SetProp takes a name,value mandatory param, ',' used as a separator.
  • SearchTravRetTask and SearchTravTask take a numeric diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/package.html b/lucene/benchmark/src/java/org/apache/lucene/benchmark/package.html index dc28bc83d33..2daf1865fd4 100644 --- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/package.html +++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/package.html @@ -20,9 +20,10 @@ Lucene Benchmarking Package +The benchmark contribution contains tools for benchmarking Lucene using standard, freely available corpora.
    -

    - The benchmark contribution contains tools for benchmarking Lucene using standard, freely available corpora. ANT will +

    + ANT will download the corpus automatically, place it in a temp directory and then unpack it to the working.dir directory specified in the build. The temp directory and working directory can be safely removed after a run. However, the next time the task is run, it will need to download the files again. diff --git a/lucene/build.xml b/lucene/build.xml index a8db45533a8..e61d8d13eec 100644 --- a/lucene/build.xml +++ b/lucene/build.xml @@ -228,9 +228,16 @@ - + + + + + + diff --git a/lucene/classification/src/java/org/apache/lucene/classification/ClassificationResult.java b/lucene/classification/src/java/org/apache/lucene/classification/ClassificationResult.java index eb9a3267539..49e1eebd874 100644 --- a/lucene/classification/src/java/org/apache/lucene/classification/ClassificationResult.java +++ b/lucene/classification/src/java/org/apache/lucene/classification/ClassificationResult.java @@ -1,5 +1,3 @@ -package org.apache.lucene.classification; - /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -16,24 +14,39 @@ package org.apache.lucene.classification; * See the License for the specific language governing permissions and * limitations under the License. */ +package org.apache.lucene.classification; /** * The result of a call to {@link Classifier#assignClass(String)} holding an assigned class and a score. + * @lucene.experimental */ public class ClassificationResult { private String assignedClass; private double score; + /** + * Constructor + * @param assignedClass the class String assigned by a {@link Classifier} + * @param score the score for the assignedClass as a double + */ public ClassificationResult(String assignedClass, double score) { this.assignedClass = assignedClass; this.score = score; } + /** + * retrieve the result class + * @return a String representing an assigned class + */ public String getAssignedClass() { return assignedClass; } + /** + * retrieve the result score + * @return a double representing a result score + */ public double getScore() { return score; } diff --git a/lucene/classification/src/java/org/apache/lucene/classification/Classifier.java b/lucene/classification/src/java/org/apache/lucene/classification/Classifier.java index 3944fbe0d1f..38d0486cc93 100644 --- a/lucene/classification/src/java/org/apache/lucene/classification/Classifier.java +++ b/lucene/classification/src/java/org/apache/lucene/classification/Classifier.java @@ -1,5 +1,3 @@ -package org.apache.lucene.classification; - /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -16,6 +14,7 @@ package org.apache.lucene.classification; * See the License for the specific language governing permissions and * limitations under the License. */ +package org.apache.lucene.classification; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.index.AtomicReader; diff --git a/lucene/classification/src/java/org/apache/lucene/classification/KNearestNeighborClassifier.java b/lucene/classification/src/java/org/apache/lucene/classification/KNearestNeighborClassifier.java index 9bfb65ae0e4..5680ccfaafe 100644 --- a/lucene/classification/src/java/org/apache/lucene/classification/KNearestNeighborClassifier.java +++ b/lucene/classification/src/java/org/apache/lucene/classification/KNearestNeighborClassifier.java @@ -1,5 +1,3 @@ -package org.apache.lucene.classification; - /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -16,6 +14,7 @@ package org.apache.lucene.classification; * See the License for the specific language governing permissions and * limitations under the License. */ +package org.apache.lucene.classification; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.index.AtomicReader; @@ -43,6 +42,10 @@ public class KNearestNeighborClassifier implements Classifier { private IndexSearcher indexSearcher; private int k; + /** + * Create a {@link Classifier} using kNN algorithm + * @param k the number of neighbors to analyze as an int + */ public KNearestNeighborClassifier(int k) { this.k = k; } diff --git a/lucene/classification/src/java/org/apache/lucene/classification/SimpleNaiveBayesClassifier.java b/lucene/classification/src/java/org/apache/lucene/classification/SimpleNaiveBayesClassifier.java index e4165ddc0bd..ce55826b552 100644 --- a/lucene/classification/src/java/org/apache/lucene/classification/SimpleNaiveBayesClassifier.java +++ b/lucene/classification/src/java/org/apache/lucene/classification/SimpleNaiveBayesClassifier.java @@ -1,5 +1,3 @@ -package org.apache.lucene.classification; - /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -16,6 +14,7 @@ package org.apache.lucene.classification; * See the License for the specific language governing permissions and * limitations under the License. */ +package org.apache.lucene.classification; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; diff --git a/lucene/classification/src/test/org/apache/lucene/classification/ClassificationTestBase.java b/lucene/classification/src/test/org/apache/lucene/classification/ClassificationTestBase.java index 03ece770a9d..a9925841351 100644 --- a/lucene/classification/src/test/org/apache/lucene/classification/ClassificationTestBase.java +++ b/lucene/classification/src/test/org/apache/lucene/classification/ClassificationTestBase.java @@ -1,5 +1,3 @@ -package org.apache.lucene.classification; - /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -16,6 +14,7 @@ package org.apache.lucene.classification; * See the License for the specific language governing permissions and * limitations under the License. */ +package org.apache.lucene.classification; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.document.Document; @@ -31,7 +30,7 @@ import org.junit.Before; /** * Base class for testing {@link Classifier}s */ -public class ClassificationTestBase extends LuceneTestCase { +public abstract class ClassificationTestBase extends LuceneTestCase { private RandomIndexWriter indexWriter; private String textFieldName; diff --git a/lucene/classification/src/test/org/apache/lucene/classification/KNearestNeighborClassifierTest.java b/lucene/classification/src/test/org/apache/lucene/classification/KNearestNeighborClassifierTest.java index b12b4138a2a..6bc5402dcf0 100644 --- a/lucene/classification/src/test/org/apache/lucene/classification/KNearestNeighborClassifierTest.java +++ b/lucene/classification/src/test/org/apache/lucene/classification/KNearestNeighborClassifierTest.java @@ -1,5 +1,3 @@ -package org.apache.lucene.classification; - /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -16,6 +14,7 @@ package org.apache.lucene.classification; * See the License for the specific language governing permissions and * limitations under the License. */ +package org.apache.lucene.classification; import org.apache.lucene.analysis.MockAnalyzer; import org.junit.Test; diff --git a/lucene/classification/src/test/org/apache/lucene/classification/SimpleNaiveBayesClassifierTest.java b/lucene/classification/src/test/org/apache/lucene/classification/SimpleNaiveBayesClassifierTest.java index 099a38408c7..24285b4035e 100644 --- a/lucene/classification/src/test/org/apache/lucene/classification/SimpleNaiveBayesClassifierTest.java +++ b/lucene/classification/src/test/org/apache/lucene/classification/SimpleNaiveBayesClassifierTest.java @@ -1,5 +1,3 @@ -package org.apache.lucene.classification; - /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -16,6 +14,7 @@ package org.apache.lucene.classification; * See the License for the specific language governing permissions and * limitations under the License. */ +package org.apache.lucene.classification; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.MockAnalyzer; diff --git a/lucene/codecs/src/test/org/apache/lucene/codecs/compressing/AbstractTestCompressionMode.java b/lucene/codecs/src/test/org/apache/lucene/codecs/compressing/AbstractTestCompressionMode.java index 171ea02216d..7bb7186f776 100644 --- a/lucene/codecs/src/test/org/apache/lucene/codecs/compressing/AbstractTestCompressionMode.java +++ b/lucene/codecs/src/test/org/apache/lucene/codecs/compressing/AbstractTestCompressionMode.java @@ -76,6 +76,16 @@ public abstract class AbstractTestCompressionMode extends LuceneTestCase { return Arrays.copyOfRange(bytes.bytes, bytes.offset, bytes.offset + bytes.length); } + static byte[] copyCompressedData(Uncompressor uncompressor, byte[] compressed) throws IOException { + GrowableByteArrayDataOutput out = new GrowableByteArrayDataOutput(compressed.length); + uncompressor.copyCompressedData(new ByteArrayDataInput(compressed), out); + return Arrays.copyOf(out.bytes, out.length); + } + + byte[] copyCompressedData(byte[] compressed) throws IOException { + return copyCompressedData(mode.newUncompressor(), compressed); + } + public void testUncompress() throws IOException { final byte[] uncompressed = randomArray(); final byte[] compressed = compress(uncompressed); @@ -103,9 +113,47 @@ public abstract class AbstractTestCompressionMode extends LuceneTestCase { public void testCopyCompressedData() throws IOException { final byte[] uncompressed = randomArray(); final byte[] compressed = compress(uncompressed); - GrowableByteArrayDataOutput out = new GrowableByteArrayDataOutput(uncompressed.length); - mode.newUncompressor().copyCompressedData(new ByteArrayDataInput(compressed), out); - assertArrayEquals(compressed, Arrays.copyOf(out.bytes, out.length)); + assertArrayEquals(compressed, copyCompressedData(compressed)); } -} + public void test(byte[] uncompressed) throws IOException { + final byte[] compressed = compress(uncompressed); + final byte[] restored = uncompress(compressed); + assertEquals(uncompressed.length, restored.length); + assertArrayEquals(compressed, copyCompressedData(compressed)); + } + + public void testEmptySequence() throws IOException { + test(new byte[0]); + } + + public void testShortSequence() throws IOException { + test(new byte[] { (byte) random().nextInt(256) }); + } + + public void testIncompressible() throws IOException { + final byte[] uncompressed = new byte[RandomInts.randomIntBetween(random(), 20, 256)]; + for (int i = 0; i < uncompressed.length; ++i) { + uncompressed[i] = (byte) i; + } + test(uncompressed); + } + + // for LZ compression + + public void testShortLiteralsAndMatchs() throws IOException { + // literals and matchs lengths <= 15 + final byte[] uncompressed = "1234562345673456745678910123".getBytes("UTF-8"); + test(uncompressed); + } + + public void testLongLiteralsAndMatchs() throws IOException { + // literals and matchs length > 16 + final byte[] uncompressed = new byte[RandomInts.randomIntBetween(random(), 300, 1024)]; + for (int i = 0; i < uncompressed.length; ++i) { + uncompressed[i] = (byte) i; + } + test(uncompressed); + } + +} \ No newline at end of file diff --git a/lucene/common-build.xml b/lucene/common-build.xml index ba8dc6c6a04..3b69e31bdc8 100644 --- a/lucene/common-build.xml +++ b/lucene/common-build.xml @@ -181,6 +181,7 @@ + @@ -273,10 +274,11 @@ - + + @@ -457,37 +459,24 @@ + + + - + + + - - - - - - - - - - - - - - - - - - @@ -1389,14 +1378,25 @@ ${tests-output}/junit4-*.suites - per-JVM executed suites - + + + + + + + + + + + Invoking target stage-maven in ${output.build.xml} now... @@ -1564,6 +1564,26 @@ ${tests-output}/junit4-*.suites - per-JVM executed suites + + + + + + + + + + + + + + + + + + @@ -1604,7 +1624,7 @@ ${tests-output}/junit4-*.suites - per-JVM executed suites Copyright © ${year} Apache Software Foundation. All Rights Reserved. + Copyright © ${year} Apache Software Foundation. All Rights Reserved.