diff --git a/dev-tools/maven/lucene/contrib/ant/pom.xml b/dev-tools/maven/lucene/contrib/ant/pom.xml new file mode 100644 index 00000000000..3410b4f773f --- /dev/null +++ b/dev-tools/maven/lucene/contrib/ant/pom.xml @@ -0,0 +1,115 @@ + + + 4.0.0 + + org.apache.lucene + lucene-parent + 4.0-SNAPSHOT + ../../pom.xml + + org.apache.lucene + lucene-ant + jar + Lucene Contrib Ant + Ant task to create Lucene indexes + + lucene/contrib/ant + ../../build/contrib/ant + + + + ${project.groupId} + lucene-core + ${project.version} + + + ${project.groupId} + lucene-core + ${project.version} + test-jar + test + + + ${project.groupId} + lucene-analyzers-common + ${project.version} + + + jtidy + jtidy + + + org.apache.ant + ant + + + org.apache.ant + ant-junit + test + + + junit + junit + test + + + + ${build-directory} + ${build-directory}/classes/java + ${build-directory}/classes/test + src/java + src/test + + + src/resources + + + + + ${project.build.testSourceDirectory} + + **/*.java + + + + + + org.codehaus.mojo + appassembler-maven-plugin + + -Xmx128M + flat + + windows + unix + + + + org.apache.lucene.ant.HtmlDocument + HtmlDocument + + + + + + + diff --git a/dev-tools/maven/lucene/contrib/db/bdb-je/pom.xml b/dev-tools/maven/lucene/contrib/db/bdb-je/pom.xml new file mode 100644 index 00000000000..336ee7c2dc4 --- /dev/null +++ b/dev-tools/maven/lucene/contrib/db/bdb-je/pom.xml @@ -0,0 +1,76 @@ + + + 4.0.0 + + org.apache.lucene + lucene-parent + 4.0-SNAPSHOT + ../../../pom.xml + + org.apache.lucene + lucene-bdb-je + jar + Lucene Contrib bdb-je + Berkeley DB based Directory implementation + + lucene/contrib/db/bdb-je + ../../../build/contrib/db/bdb-je + + + + ${project.groupId} + lucene-core + ${project.version} + + + ${project.groupId} + lucene-core + ${project.version} + test-jar + test + + + com.sleepycat + berkeleydb-je + + + junit + junit + test + + + + ${build-directory} + ${build-directory}/classes/java + ${build-directory}/classes/test + src/java + src/test + + + ${project.build.testSourceDirectory} + + **/*.java + + + + + diff --git a/dev-tools/maven/lucene/contrib/db/bdb/pom.xml b/dev-tools/maven/lucene/contrib/db/bdb/pom.xml new file mode 100644 index 00000000000..4ff4ec2c799 --- /dev/null +++ b/dev-tools/maven/lucene/contrib/db/bdb/pom.xml @@ -0,0 +1,87 @@ + + + 4.0.0 + + org.apache.lucene + lucene-parent + 4.0-SNAPSHOT + ../../../pom.xml + + org.apache.lucene + lucene-bdb + jar + Lucene Contrib bdb + Berkeley DB based Directory implementation + + lucene/contrib/db/bdb + ../../../build/contrib/db/bdb + + + + ${project.groupId} + lucene-core + ${project.version} + + + ${project.groupId} + lucene-core + ${project.version} + test-jar + test + + + com.sleepycat + berkeleydb + + + junit + junit + test + + + + ${build-directory} + ${build-directory}/classes/java + ${build-directory}/classes/test + src/java + src/test + + + ${project.build.testSourceDirectory} + + **/*.java + + + + + + org.apache.maven.plugins + maven-surefire-plugin + + + + true + + + + + diff --git a/dev-tools/maven/lucene/contrib/db/pom.xml b/dev-tools/maven/lucene/contrib/db/pom.xml new file mode 100644 index 00000000000..a267ff9139e --- /dev/null +++ b/dev-tools/maven/lucene/contrib/db/pom.xml @@ -0,0 +1,49 @@ + + + 4.0.0 + + org.apache.lucene + lucene-parent + 4.0-SNAPSHOT + ../../pom.xml + + org.apache.lucene + lucene-db-aggregator + Lucene Database aggregator POM + pom + + bdb + bdb-je + + + ../../build/contrib/db/lucene-db-aggregator + + + org.apache.maven.plugins + maven-deploy-plugin + + true + + + + + diff --git a/dev-tools/maven/lucene/contrib/demo/pom.xml b/dev-tools/maven/lucene/contrib/demo/pom.xml new file mode 100644 index 00000000000..e2e61c46090 --- /dev/null +++ b/dev-tools/maven/lucene/contrib/demo/pom.xml @@ -0,0 +1,124 @@ + + + 4.0.0 + + org.apache.lucene + lucene-parent + 4.0-SNAPSHOT + ../../pom.xml + + org.apache.lucene + lucene-demo + war + Lucene Demos + This is the demo for Apache Lucene Java + + lucene/contrib/demo + ../../build/contrib/demo + + + + ${project.groupId} + lucene-core + ${project.version} + + + ${project.groupId} + lucene-core + ${project.version} + test-jar + test + + + ${project.groupId} + lucene-analyzers-common + ${project.version} + + + javax.servlet + servlet-api + provided + + + junit + junit + test + + + + ${build-directory} + ${build-directory}/classes/java + ${build-directory}/classes/test + src/java + src/test + + + ${project.build.testSourceDirectory} + + **/*.java + + + + + + org.apache.maven.plugins + maven-war-plugin + + src/jsp + true + + + + + org.codehaus.mojo + appassembler-maven-plugin + + -Xmx128M + flat + ${build-directory} + + windows + unix + + + + org.apache.lucene.demo.DeleteFiles + DeleteFiles + + + org.apache.lucene.demo.IndexFiles + IndexFiles + + + org.apache.lucene.demo.IndexHTML + IndexHTML + + + org.apache.lucene.demo.SearchFiles + SearchFiles + + + + + + + diff --git a/dev-tools/maven/lucene/contrib/highlighter/pom.xml b/dev-tools/maven/lucene/contrib/highlighter/pom.xml new file mode 100644 index 00000000000..7df7e4affca --- /dev/null +++ b/dev-tools/maven/lucene/contrib/highlighter/pom.xml @@ -0,0 +1,84 @@ + + + 4.0.0 + + org.apache.lucene + lucene-parent + 4.0-SNAPSHOT + ../../pom.xml + + org.apache.lucene + lucene-highlighter + jar + Lucene Highlighter + + This is the highlighter for apache lucene java + + + lucene/contrib/highlighter + ../../build/contrib/highlighter + + + + ${project.groupId} + lucene-core + ${project.version} + + + ${project.groupId} + lucene-core + ${project.version} + test-jar + test + + + ${project.groupId} + lucene-memory + ${project.version} + + + ${project.groupId} + lucene-queries + ${project.version} + + + junit + junit + test + + + + ${build-directory} + ${build-directory}/classes/java + ${build-directory}/classes/test + src/java + src/test + + + ${project.build.testSourceDirectory} + + **/*.java + + + + + diff --git a/dev-tools/maven/lucene/contrib/instantiated/pom.xml b/dev-tools/maven/lucene/contrib/instantiated/pom.xml new file mode 100644 index 00000000000..f40c5544d16 --- /dev/null +++ b/dev-tools/maven/lucene/contrib/instantiated/pom.xml @@ -0,0 +1,72 @@ + + + 4.0.0 + + org.apache.lucene + lucene-parent + 4.0-SNAPSHOT + ../../pom.xml + + org.apache.lucene + lucene-instantiated + jar + Lucene InstantiatedIndex + InstantiatedIndex, alternative RAM store for small corpora. + + lucene/contrib/instantiated + ../../build/contrib/instantiated + + + + ${project.groupId} + lucene-core + ${project.version} + + + ${project.groupId} + lucene-core + ${project.version} + test-jar + test + + + junit + junit + test + + + + ${build-directory} + ${build-directory}/classes/java + ${build-directory}/classes/test + src/java + src/test + + + ${project.build.testSourceDirectory} + + **/*.java + + + + + diff --git a/dev-tools/maven/lucene/contrib/lucli/pom.xml b/dev-tools/maven/lucene/contrib/lucli/pom.xml new file mode 100644 index 00000000000..f6266f7caa2 --- /dev/null +++ b/dev-tools/maven/lucene/contrib/lucli/pom.xml @@ -0,0 +1,108 @@ + + + 4.0.0 + + org.apache.lucene + lucene-parent + 4.0-SNAPSHOT + ../../pom.xml + + org.apache.lucene + lucene-lucli + jar + Lucene Lucli + Lucene Command Line Interface + + lucene/contrib/lucli + ../../build/contrib/lucli + + + + ${project.groupId} + lucene-core + ${project.version} + + + ${project.groupId} + lucene-core + ${project.version} + test-jar + test + + + ${project.groupId} + lucene-analyzers-common + ${project.version} + + + jline + jline + + + junit + junit + test + + + + ${build-directory} + ${build-directory}/classes/java + ${build-directory}/classes/test + src/java + src/test + + + ${project.build.testSourceDirectory} + + **/*.java + + + + + + org.apache.maven.plugins + maven-javadoc-plugin + + true + + + + org.codehaus.mojo + appassembler-maven-plugin + + -Xmx128M + flat + + windows + unix + + + + lucli.Lucli + lucli + + + + + + + diff --git a/dev-tools/maven/lucene/contrib/memory/pom.xml b/dev-tools/maven/lucene/contrib/memory/pom.xml new file mode 100644 index 00000000000..940fdf1b34e --- /dev/null +++ b/dev-tools/maven/lucene/contrib/memory/pom.xml @@ -0,0 +1,74 @@ + + + 4.0.0 + + org.apache.lucene + lucene-parent + 4.0-SNAPSHOT + ../../pom.xml + + org.apache.lucene + lucene-memory + jar + Lucene Memory + + High-performance single-document index to compare against Query + + + lucene/contrib/memory + ../../build/contrib/memory + + + + ${project.groupId} + lucene-core + ${project.version} + + + ${project.groupId} + lucene-core + ${project.version} + test-jar + test + + + junit + junit + test + + + + ${build-directory} + ${build-directory}/classes/java + ${build-directory}/classes/test + src/java + src/test + + + ${project.build.testSourceDirectory} + + **/*.java + + + + + diff --git a/dev-tools/maven/lucene/contrib/misc/pom.xml b/dev-tools/maven/lucene/contrib/misc/pom.xml new file mode 100644 index 00000000000..62bf4577e53 --- /dev/null +++ b/dev-tools/maven/lucene/contrib/misc/pom.xml @@ -0,0 +1,121 @@ + + + 4.0.0 + + org.apache.lucene + lucene-parent + 4.0-SNAPSHOT + ../../pom.xml + + org.apache.lucene + lucene-misc + jar + Lucene Miscellaneous + Miscellaneous Lucene extensions + + lucene/contrib/misc + ../../build/contrib/misc + + + + ${project.groupId} + lucene-core + ${project.version} + + + ${project.groupId} + lucene-core + ${project.version} + test-jar + test + + + ${project.groupId} + lucene-analyzers-common + ${project.version} + + + junit + junit + test + + + + ${build-directory} + ${build-directory}/classes/java + ${build-directory}/classes/test + src/java + src/test + + + ${project.build.testSourceDirectory} + + **/*.java + + + + + + org.codehaus.mojo + appassembler-maven-plugin + + -Xmx128M + flat + + windows + unix + + + + org.apache.lucene.index.FieldNormModifier + FieldNormModifier + + + org.apache.lucene.index.IndexSplitter + IndexSplitter + + + org.apache.lucene.index.MultiPassIndexSplitter + MultiPassIndexSplitter + + + org.apache.lucene.misc.GetTermInfo + GetTermInfo + + + org.apache.lucene.misc.HighFreqTerms + HighFreqTerms + + + org.apache.lucene.misc.IndexMergeTool + IndexMergeTool + + + org.apache.lucene.misc.LengthNormModifier + LengthNormModifier + + + + + + + diff --git a/dev-tools/maven/lucene/contrib/pom.xml b/dev-tools/maven/lucene/contrib/pom.xml new file mode 100644 index 00000000000..039b87d80a7 --- /dev/null +++ b/dev-tools/maven/lucene/contrib/pom.xml @@ -0,0 +1,62 @@ + + + 4.0.0 + + org.apache.lucene + lucene-parent + 4.0-SNAPSHOT + ../pom.xml + + org.apache.lucene + lucene-contrib-aggregator + Lucene Contrib aggregator POM + pom + + ant + db + demo + highlighter + instantiated + lucli + memory + misc + queries + queryparser + spatial + spellchecker + swing + wordnet + xml-query-parser + + + build/lucene-contrib-aggregator + + + org.apache.maven.plugins + maven-deploy-plugin + + true + + + + + diff --git a/dev-tools/maven/lucene/contrib/queries/pom.xml b/dev-tools/maven/lucene/contrib/queries/pom.xml new file mode 100644 index 00000000000..4cef0b9fa3b --- /dev/null +++ b/dev-tools/maven/lucene/contrib/queries/pom.xml @@ -0,0 +1,78 @@ + + + 4.0.0 + + org.apache.lucene + lucene-parent + 4.0-SNAPSHOT + ../../pom.xml + + org.apache.lucene + lucene-queries + jar + Lucene Queries + + Queries - various query object exotica not in core + + + lucene/contrib/queries + ../../build/contrib/queries + + + + ${project.groupId} + lucene-core + ${project.version} + + + ${project.groupId} + lucene-core + ${project.version} + test-jar + test + + + jakarta-regexp + jakarta-regexp + + + junit + junit + test + + + + ${build-directory} + ${build-directory}/classes/java + ${build-directory}/classes/test + src/java + src/test + + + ${project.build.testSourceDirectory} + + **/*.java + + + + + diff --git a/dev-tools/maven/lucene/contrib/queryparser/pom.xml b/dev-tools/maven/lucene/contrib/queryparser/pom.xml new file mode 100644 index 00000000000..6f215fa962d --- /dev/null +++ b/dev-tools/maven/lucene/contrib/queryparser/pom.xml @@ -0,0 +1,79 @@ + + + 4.0.0 + + org.apache.lucene + lucene-parent + 4.0-SNAPSHOT + ../../pom.xml + + org.apache.lucene + lucene-queryparser + jar + Lucene Query Parser + + This is the Flexible Query Parser for apache lucene java + + + lucene/contrib/queryparser + ../../build/contrib/queryparser + + + + ${project.groupId} + lucene-core + ${project.version} + + + ${project.groupId} + lucene-core + ${project.version} + test-jar + test + + + junit + junit + test + + + + ${build-directory} + ${build-directory}/classes/java + ${build-directory}/classes/test + src/java + src/test + + + src/resources + + + + + ${project.build.testSourceDirectory} + + **/*.java + + + + + diff --git a/dev-tools/maven/lucene/contrib/spatial/pom.xml b/dev-tools/maven/lucene/contrib/spatial/pom.xml new file mode 100644 index 00000000000..6bac5c10a8f --- /dev/null +++ b/dev-tools/maven/lucene/contrib/spatial/pom.xml @@ -0,0 +1,77 @@ + + + 4.0.0 + + org.apache.lucene + lucene-parent + 4.0-SNAPSHOT + ../../pom.xml + + org.apache.lucene + lucene-spatial + jar + Lucene Spatial + Spatial search package + + lucene/contrib/spatial + ../../build/contrib/spatial + + + + ${project.groupId} + lucene-core + ${project.version} + + + ${project.groupId} + lucene-core + ${project.version} + test-jar + test + + + ${project.groupId} + lucene-queries + ${project.version} + + + junit + junit + test + + + + ${build-directory} + ${build-directory}/classes/java + ${build-directory}/classes/test + src/java + src/test + + + ${project.build.testSourceDirectory} + + **/*.java + + + + + diff --git a/dev-tools/maven/lucene/contrib/spellchecker/pom.xml b/dev-tools/maven/lucene/contrib/spellchecker/pom.xml new file mode 100644 index 00000000000..5be1474a734 --- /dev/null +++ b/dev-tools/maven/lucene/contrib/spellchecker/pom.xml @@ -0,0 +1,77 @@ + + + 4.0.0 + + org.apache.lucene + lucene-parent + 4.0-SNAPSHOT + ../../pom.xml + + org.apache.lucene + lucene-spellchecker + jar + Lucene Spellchecker + Spell Checker + + lucene/contrib/spellchecker + ../../build/contrib/spellchecker + + + + ${project.groupId} + lucene-core + ${project.version} + + + ${project.groupId} + lucene-core + ${project.version} + test-jar + test + + + ${project.groupId} + lucene-analyzers-common + ${project.version} + + + junit + junit + test + + + + ${build-directory} + ${build-directory}/classes/java + ${build-directory}/classes/test + src/java + src/test + + + ${project.build.testSourceDirectory} + + **/*.java + + + + + diff --git a/dev-tools/maven/lucene/contrib/swing/pom.xml b/dev-tools/maven/lucene/contrib/swing/pom.xml new file mode 100644 index 00000000000..1f2f4e5e424 --- /dev/null +++ b/dev-tools/maven/lucene/contrib/swing/pom.xml @@ -0,0 +1,101 @@ + + + 4.0.0 + + org.apache.lucene + lucene-parent + 4.0-SNAPSHOT + ../../pom.xml + + org.apache.lucene + lucene-swing + jar + Lucene Swing + Swing Models + + lucene/contrib/swing + ../../build/contrib/swing + + + + ${project.groupId} + lucene-core + ${project.version} + + + ${project.groupId} + lucene-core + ${project.version} + test-jar + test + + + ${project.groupId} + lucene-analyzers-common + ${project.version} + + + junit + junit + test + + + + ${build-directory} + ${build-directory}/classes/java + ${build-directory}/classes/test + src/java + src/test + + + ${project.build.testSourceDirectory} + + **/*.java + + + + + + org.codehaus.mojo + appassembler-maven-plugin + + -Xmx128M + flat + + windows + unix + + + + org.apache.lucene.swing.models.ListSearcherSimulator + ListSearchSimulator + + + org.apache.lucene.swing.models.TableSearcherSimulator + TableSearchSimulator + + + + + + + diff --git a/dev-tools/maven/lucene/contrib/wordnet/pom.xml b/dev-tools/maven/lucene/contrib/wordnet/pom.xml new file mode 100644 index 00000000000..d55557322b6 --- /dev/null +++ b/dev-tools/maven/lucene/contrib/wordnet/pom.xml @@ -0,0 +1,105 @@ + + + 4.0.0 + + org.apache.lucene + lucene-parent + 4.0-SNAPSHOT + ../../pom.xml + + org.apache.lucene + lucene-wordnet + jar + Lucene Wordnet + WordNet + + lucene/contrib/wordnet + ../../build/contrib/wordnet + + + + ${project.groupId} + lucene-core + ${project.version} + + + ${project.groupId} + lucene-core + ${project.version} + test-jar + test + + + ${project.groupId} + lucene-analyzers-common + ${project.version} + + + junit + junit + test + + + + ${build-directory} + ${build-directory}/classes/java + ${build-directory}/classes/test + src/java + src/test + + + ${project.build.testSourceDirectory} + + **/*.java + + + + + + org.codehaus.mojo + appassembler-maven-plugin + + -Xmx128M + flat + + windows + unix + + + + org.apache.lucene.wordnet.SynExpand + SynExpand + + + org.apache.lucene.wordnet.SynExpand + SynExpand + + + org.apache.lucene.wordnet.Syns2Index + Syns2Index + + + + + + + diff --git a/dev-tools/maven/lucene/contrib/xml-query-parser/pom.xml b/dev-tools/maven/lucene/contrib/xml-query-parser/pom.xml new file mode 100644 index 00000000000..f4c9aea1dd7 --- /dev/null +++ b/dev-tools/maven/lucene/contrib/xml-query-parser/pom.xml @@ -0,0 +1,82 @@ + + + 4.0.0 + + org.apache.lucene + lucene-parent + 4.0-SNAPSHOT + ../../pom.xml + + org.apache.lucene + lucene-xml-query-parser + jar + Lucene XML Query Parser + XML query parser + + lucene/contrib/xml-query-parser + ../../build/contrib/xml-query-parser + + + + ${project.groupId} + lucene-core + ${project.version} + + + ${project.groupId} + lucene-core + ${project.version} + test-jar + test + + + ${project.groupId} + lucene-queries + ${project.version} + + + javax.servlet + servlet-api + provided + + + junit + junit + test + + + + ${build-directory} + ${build-directory}/classes/java + ${build-directory}/classes/test + src/java + src/test + + + ${project.build.testSourceDirectory} + + **/*.java + + + + + diff --git a/dev-tools/maven/lucene/pom.xml b/dev-tools/maven/lucene/pom.xml new file mode 100644 index 00000000000..42b7c546157 --- /dev/null +++ b/dev-tools/maven/lucene/pom.xml @@ -0,0 +1,41 @@ + + + 4.0.0 + + org.apache.lucene + lucene-solr-grandparent + 4.0-SNAPSHOT + ../pom.xml + + org.apache.lucene + lucene-parent + pom + Lucene parent POM + Lucene parent POM + + src + contrib + + + build/lucene-parent + + diff --git a/dev-tools/maven/lucene/src/pom.xml b/dev-tools/maven/lucene/src/pom.xml new file mode 100644 index 00000000000..43c83e76750 --- /dev/null +++ b/dev-tools/maven/lucene/src/pom.xml @@ -0,0 +1,126 @@ + + + 4.0.0 + + org.apache.lucene + lucene-parent + 4.0-SNAPSHOT + ../pom.xml + + org.apache.lucene + lucene-core + jar + Lucene Core + Apache Lucene Java Core + + lucene + ../build + + + + junit + junit + test + + + org.apache.ant + ant + test + + + org.apache.ant + ant-junit + test + + + + ${build-directory} + ${build-directory}/classes/java + ${build-directory}/classes/test + java + test + + + ${project.build.testSourceDirectory} + + **/*.java + + + + + + org.apache.maven.plugins + maven-jar-plugin + + + + test-jar + + + + + + org.apache.maven.plugins + maven-surefire-plugin + + + ${project.version} + + + + + org.codehaus.mojo + appassembler-maven-plugin + + -Xmx128M + flat + + windows + unix + + + + org.apache.lucene.index.CheckIndex + CheckIndex + + + org.apache.lucene.index.IndexReader + IndexReader + + + org.apache.lucene.store.LockStressTest + LockStressTest + + + org.apache.lucene.store.LockVerifyServer + IndexReader + + + org.apache.lucene.util.English + English + + + + + + + diff --git a/dev-tools/maven/modules/analysis/common/pom.xml b/dev-tools/maven/modules/analysis/common/pom.xml new file mode 100644 index 00000000000..8977178b874 --- /dev/null +++ b/dev-tools/maven/modules/analysis/common/pom.xml @@ -0,0 +1,116 @@ + + + 4.0.0 + + org.apache.lucene + lucene-parent + 4.0-SNAPSHOT + ../../../lucene/pom.xml + + org.apache.lucene + lucene-analyzers-common + jar + Lucene Common Analyzers + Additional Analyzers + + modules/analysis/common + ../build/common + + + + ${project.groupId} + lucene-core + ${project.version} + + + ${project.groupId} + lucene-core + ${project.version} + test-jar + test + + + junit + junit + test + + + + ${build-directory} + ${build-directory}/classes/java + ${build-directory}/classes/test + src/java + src/test + + + src/resources + + + + + ${project.build.testSourceDirectory} + + **/*.java + + + + + + org.apache.maven.plugins + maven-jar-plugin + + + + test-jar + + + + + + org.codehaus.mojo + appassembler-maven-plugin + + -Xmx128M + flat + + windows + unix + + + + org.apache.lucene.analysis.charfilter.HtmlStripCharFilter + HtmlStripCharFilter + + + org.apache.lucene.analysis.en.PorterStemmer + EnglishPorterStemmer + + + org.tartarus.snowball.TestApp + SnowballTestApp + + + + + + + diff --git a/dev-tools/maven/modules/analysis/icu/pom.xml b/dev-tools/maven/modules/analysis/icu/pom.xml new file mode 100644 index 00000000000..1ce2183627e --- /dev/null +++ b/dev-tools/maven/modules/analysis/icu/pom.xml @@ -0,0 +1,96 @@ + + + 4.0.0 + + org.apache.lucene + lucene-parent + 4.0-SNAPSHOT + ../../../lucene/pom.xml + + org.apache.lucene + lucene-analyzers-icu + jar + Lucene ICU Analysis Components + + Provides integration with ICU (International Components for Unicode) for + stronger Unicode and internationalization support. + + + modules/analysis/icu + ../build/icu + + + + ${project.groupId} + lucene-core + ${project.version} + + + ${project.groupId} + lucene-core + ${project.version} + test-jar + test + + + ${project.groupId} + lucene-analyzers-common + ${project.version} + + + ${project.groupId} + lucene-analyzers-common + ${project.version} + test-jar + test + + + com.ibm.icu + icu4j + + + junit + junit + test + + + + ${build-directory} + ${build-directory}/classes/java + ${build-directory}/classes/test + src/java + src/test + + + src/resources + + + + + ${project.build.testSourceDirectory} + + **/*.java + + + + + diff --git a/dev-tools/maven/modules/analysis/phonetic/pom.xml b/dev-tools/maven/modules/analysis/phonetic/pom.xml new file mode 100644 index 00000000000..665752e5300 --- /dev/null +++ b/dev-tools/maven/modules/analysis/phonetic/pom.xml @@ -0,0 +1,90 @@ + + + 4.0.0 + + org.apache.lucene + lucene-parent + 4.0-SNAPSHOT + ../../../lucene/pom.xml + + org.apache.lucene + lucene-analyzers-phonetic + jar + Lucene Phonetic Filters + + Provides phonetic encoding via Commons Codec. + + + modules/analysis/phonetic + ../build/phonetic + + + + ${project.groupId} + lucene-core + ${project.version} + + + ${project.groupId} + lucene-core + ${project.version} + test-jar + test + + + ${project.groupId} + lucene-analyzers-common + ${project.version} + + + ${project.groupId} + lucene-analyzers-common + ${project.version} + test-jar + test + + + commons-codec + commons-codec + + + junit + junit + test + + + + ${build-directory} + ${build-directory}/classes/java + ${build-directory}/classes/test + src/java + src/test + + + ${project.build.testSourceDirectory} + + **/*.java + + + + + diff --git a/dev-tools/maven/modules/analysis/pom.xml b/dev-tools/maven/modules/analysis/pom.xml new file mode 100644 index 00000000000..7829ed61b77 --- /dev/null +++ b/dev-tools/maven/modules/analysis/pom.xml @@ -0,0 +1,52 @@ + + + 4.0.0 + + org.apache.lucene + lucene-parent + 4.0-SNAPSHOT + ../../lucene/pom.xml + + org.apache.lucene + lucene-analysis-modules-aggregator + Lucene Analysis Modules aggregator POM + pom + + common + icu + phonetic + smartcn + stempel + + + build/lucene-analysis-modules-aggregator + + + org.apache.maven.plugins + maven-deploy-plugin + + true + + + + + diff --git a/dev-tools/maven/modules/analysis/smartcn/pom.xml b/dev-tools/maven/modules/analysis/smartcn/pom.xml new file mode 100644 index 00000000000..10a1a013dea --- /dev/null +++ b/dev-tools/maven/modules/analysis/smartcn/pom.xml @@ -0,0 +1,82 @@ + + + 4.0.0 + + org.apache.lucene + lucene-parent + 4.0-SNAPSHOT + ../../../lucene/pom.xml + + org.apache.lucene + lucene-analyzers-smartcn + jar + Lucene Smart Chinese Analyzer + Smart Chinese Analyzer + + modules/analysis/smartcn + ../build/smartcn + + + + ${project.groupId} + lucene-core + ${project.version} + + + ${project.groupId} + lucene-core + ${project.version} + test-jar + test + + + ${project.groupId} + lucene-analyzers-common + ${project.version} + + + junit + junit + test + + + + ${build-directory} + ${build-directory}/classes/java + ${build-directory}/classes/test + src/java + src/test + + + src/resources + + + + + ${project.build.testSourceDirectory} + + **/*.java + + + + + diff --git a/dev-tools/maven/modules/analysis/stempel/pom.xml b/dev-tools/maven/modules/analysis/stempel/pom.xml new file mode 100644 index 00000000000..19b6907e127 --- /dev/null +++ b/dev-tools/maven/modules/analysis/stempel/pom.xml @@ -0,0 +1,82 @@ + + + 4.0.0 + + org.apache.lucene + lucene-parent + 4.0-SNAPSHOT + ../../../lucene/pom.xml + + org.apache.lucene + lucene-analyzers-stempel + jar + Lucene Stempel Analyzer + Stempel Analyzer + + modules/analysis/stempel + ../build/stempel + + + + ${project.groupId} + lucene-core + ${project.version} + + + ${project.groupId} + lucene-core + ${project.version} + test-jar + test + + + ${project.groupId} + lucene-analyzers-common + ${project.version} + + + junit + junit + test + + + + ${build-directory} + ${build-directory}/classes/java + ${build-directory}/classes/test + src/java + src/test + + + src/resources + + + + + ${project.build.testSourceDirectory} + + **/*.java + + + + + diff --git a/dev-tools/maven/modules/benchmark/pom.xml b/dev-tools/maven/modules/benchmark/pom.xml new file mode 100755 index 00000000000..e4b35e338c4 --- /dev/null +++ b/dev-tools/maven/modules/benchmark/pom.xml @@ -0,0 +1,157 @@ + + + 4.0.0 + + org.apache.lucene + lucene-parent + 4.0-SNAPSHOT + ../../lucene/pom.xml + + org.apache.lucene + lucene-benchmark + jar + Lucene Benchmark + Lucene Benchmarking Module + + modules/benchmark + build + + + + ${project.groupId} + lucene-core + ${project.version} + + + ${project.groupId} + lucene-core + ${project.version} + test-jar + test + + + ${project.groupId} + lucene-analyzers-common + ${project.version} + + + ${project.groupId} + lucene-demo + ${project.version} + classes + + + ${project.groupId} + lucene-highlighter + ${project.version} + + + ${project.groupId} + lucene-memory + ${project.version} + + + commons-beanutils + commons-beanutils + + + commons-collections + commons-collections + + + org.apache.commons + commons-compress + + + commons-digester + commons-digester + + + commons-logging + commons-logging + + + xerces + xercesImpl + + + xml-apis + xml-apis + + + junit + junit + test + + + + ${build-directory} + ${build-directory}/classes/java + ${build-directory}/classes/test + src/java + src/test + + + ${project.build.testSourceDirectory} + + **/*.java + + + + + + org.codehaus.mojo + appassembler-maven-plugin + + -Xmx128M + flat + + windows + unix + + + + org.apache.lucene.benchmark.byTask.Benchmark + Benchmark + + + org.apache.lucene.benchmark.quality.trec.QueryDriver + QueryDriver + + + org.apache.lucene.benchmark.quality.utils.QualityQueriesFinder + QualityQueriesFinder + + + org.apache.lucene.benchmark.utils.ExtractReuters + ExtractReuters + + + org.apache.lucene.benchmark.utils.ExtractWikipedia + ExtractWikipedia + + + + + + + diff --git a/dev-tools/maven/modules/pom.xml b/dev-tools/maven/modules/pom.xml new file mode 100644 index 00000000000..1b60080c172 --- /dev/null +++ b/dev-tools/maven/modules/pom.xml @@ -0,0 +1,49 @@ + + + 4.0.0 + + org.apache.lucene + lucene-parent + 4.0-SNAPSHOT + ../lucene/pom.xml + + org.apache.lucene + lucene-modules-aggregator + Lucene Modules aggregator POM + pom + + analysis + benchmark + + + build/lucene-modules-aggregator + + + org.apache.maven.plugins + maven-deploy-plugin + + true + + + + + diff --git a/dev-tools/maven/pom.xml b/dev-tools/maven/pom.xml new file mode 100644 index 00000000000..c0c5ae7d812 --- /dev/null +++ b/dev-tools/maven/pom.xml @@ -0,0 +1,690 @@ + + + 4.0.0 + + org.apache + apache + 8 + + org.apache.lucene + lucene-solr-grandparent + 4.0-SNAPSHOT + pom + Grandparent POM for Apache Lucene Java and Apache Solr + Parent POM for Apache Lucene Java and Apache Solr + http://lucene.apache.org/java + + lucene + modules + solr + + + 4.0.0 + yyyy-MM-dd HH:mm:ss + 1.5 + + + JIRA + http://issues.apache.org/jira/browse/LUCENE + + + Hudson + http://lucene.zones.apache.org:8080/hudson/job/Lucene-Nightly/ + + + + General List + general-subscribe@lucene.apache.org + general-unsubscribe@lucene.apache.org + + http://mail-archives.apache.org/mod_mbox/lucene-general/ + + + + Java User List + java-user-subscribe@lucene.apache.org + java-user-unsubscribe@lucene.apache.org + + http://mail-archives.apache.org/mod_mbox/lucene-java-user/ + + + + Java Developer List + dev-subscribe@lucene.apache.org + dev-unsubscribe@lucene.apache.org + http://mail-archives.apache.org/mod_mbox/lucene-dev/ + + + Java Commits List + commits-subscribe@lucene.apache.org + commits-unsubscribe@lucene.apache.org + + http://mail-archives.apache.org/mod_mbox/lucene-java-commits/ + + + + 2000 + + + scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory} + + + scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory} + + + http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory} + + + + + Apache 2 + http://www.apache.org/licenses/LICENSE-2.0.txt + + + + + carrot2.org + Carrot2 Maven2 repository + http://download.carrot2.org/maven2/ + + never + + + + apache.snapshots + Apache Snapshot Repository + http://repository.apache.org/snapshots + + false + + + never + + + + + + + com.ibm.icu + icu4j + 4.6 + + + com.sleepycat + berkeleydb + 4.7.25 + + + com.sleepycat + berkeleydb-je + 3.3.93 + + + commons-beanutils + commons-beanutils + 1.7.0 + + + commons-codec + commons-codec + 1.4 + + + commons-collections + commons-collections + 3.2.1 + + + commons-digester + commons-digester + 1.7 + + + commons-fileupload + commons-fileupload + 1.2.1 + + + commons-io + commons-io + 1.4 + + + commons-httpclient + commons-httpclient + 3.1 + + + commons-lang + commons-lang + 2.4 + + + commons-logging + commons-logging + 1.1.1 + + + jakarta-regexp + jakarta-regexp + 1.4 + + + javax.activation + activation + 1.1 + + + javax.mail + mail + 1.4.1 + + + jline + jline + 0.9.1 + + + jtidy + jtidy + 4aug2000r7-dev + + + junit + junit + 4.7 + + + org.apache.ant + ant + 1.7.1 + + + org.apache.ant + ant-junit + 1.7.1 + + + org.apache.commons + commons-compress + 1.1 + + + org.apache.geronimo.specs + geronimo-stax-api_1.0_spec + 1.0.1 + + + org.apache.solr + solr-commons-csv + ${project.version} + + + org.apache.solr + solr-noggit + ${project.version} + + + org.apache.tika + tika-core + 0.8 + + + org.apache.tika + tika-parsers + 0.8 + + + org.apache.velocity + velocity + 1.6.4 + + + org.apache.velocity + velocity-tools + 2.0 + + + org.apache.zookeeper + zookeeper + 3.3.1 + + + org.carrot2 + carrot2-core + 3.4.2 + + + org.codehaus.woodstox + wstx-asl + 3.2.7 + + + org.easymock + easymock + 2.2 + + + org.mortbay.jetty + jetty + 6.1.26 + + + org.mortbay.jetty + jetty-util + 6.1.26 + + + org.mortbay.jetty + jsp-2.1-glassfish + 2.1.v20091210 + + + org.mortbay.jetty + jsp-2.1-jetty + 6.1.26 + + + org.mortbay.jetty + jsp-api-2.1-glassfish + 2.1.v20091210 + + + org.slf4j + jcl-over-slf4j + 1.5.5 + + + org.slf4j + log4j-over-slf4j + 1.5.5 + + + org.slf4j + slf4j-api + 1.5.5 + + + org.slf4j + slf4j-jdk14 + 1.5.5 + + + xerces + xercesImpl + 2.10.0 + + + xml-apis + xml-apis + 2.10.0 + + + javax.servlet + servlet-api + 2.4 + + + + + lucene/build/lucene-parent + + + + org.apache.maven.plugins + maven-antrun-plugin + 1.6 + + + org.apache.maven.plugins + maven-clean-plugin + 2.4.1 + + + org.apache.maven.plugins + maven-compiler-plugin + 2.3.2 + + ${java.compat.version} + ${java.compat.version} + + + + org.apache.maven.plugins + maven-deploy-plugin + 2.5 + + + org.apache.maven.plugins + maven-enforcer-plugin + 1.0 + + + org.apache.maven.plugins + maven-install-plugin + 2.3.1 + + + org.apache.maven.plugins + maven-jar-plugin + 2.3.1 + + + + + + + + ${project.groupId} + ${project.groupId} + ${project.name} + + ${base.specification.version}.${now.version} + The Apache Software Foundation + + ${project.version} ${svn.revision} - ${user.name} - ${now.timestamp} + The Apache Software Foundation + ${java.compat.version} + ${java.compat.version} + + + + + + org.apache.maven.plugins + maven-resources-plugin + 2.4.3 + + + org.apache.maven.plugins + maven-surefire-plugin + 2.7.1 + + plain + ${project.build.testOutputDirectory} + + temp + 1 + ${tests.codec} + ${tests.directory} + ${tests.iter} + ${tests.locale} + ${tests.luceneMatchVersion} + ${tests.multiplier} + ${tests.nightly} + ${tests.seed} + ${tests.timezone} + + + + + org.apache.maven.plugins + maven-war-plugin + 2.1.1 + + + + + + + + ${project.groupId} + ${project.groupId} + ${project.name} + + ${base.specification.version}.${now.version} + The Apache Software Foundation + + ${project.version} ${svn.revision} - ${user.name} - ${now.timestamp} + The Apache Software Foundation + ${java.compat.version} + ${java.compat.version} + + + + + + org.codehaus.mojo + appassembler-maven-plugin + 1.1 + + + org.codehaus.mojo + build-helper-maven-plugin + 1.5 + + + org.codehaus.mojo + buildnumber-maven-plugin + 1.0-beta-4 + + + org.mortbay.jetty + maven-jetty-plugin + 6.1.26 + + + org.codehaus.gmaven + gmaven-plugin + 1.3 + + + + + + org.codehaus.gmaven + gmaven-plugin + + + generate-timestamps + validate + + execute + + + + project.properties['now.timestamp'] = "${maven.build.timestamp}" + project.properties['now.version'] = ("${maven.build.timestamp}" =~ /[- :]/).replaceAll(".") + project.properties['now.year'] = "${maven.build.timestamp}".substring(0, 4) + + + + + + + org.codehaus.mojo + buildnumber-maven-plugin + + + validate + + create + + + + + false + false + true + svn.revision + + + + org.apache.maven.plugins + maven-enforcer-plugin + + + enforce-java-compat-version-and-maven-2.2.1 + + enforce + + + + + Java ${java.compat.version}+ is required. + [${java.compat.version},) + + + Maven 2.2.1+ is required. + [2.2.1,) + + + + + + + + + + + + bootstrap + + + + org.apache.maven.plugins + maven-antrun-plugin + + + get-jars-and-poms + install + + run + + + + + + + + + + + + org.apache.maven.plugins + maven-install-plugin + + + install-icu4j + install + + install-file + + + com.ibm.icu + icu4j + 4.6 + jar + modules/analysis/icu/lib/icu4j-4_6.jar + + + + install-xercesImpl + install + + install-file + + + xerces + xercesImpl + 2.10.0 + jar + modules/benchmark/lib/xercesImpl-2.10.0.jar + + + + install-xml-apis + install + + install-file + + + xml-apis + xml-apis + 2.10.0 + jar + modules/benchmark/lib/xml-apis-2.10.0.jar + + + + install-berkeleydb + install + + install-file + + + com.sleepycat + berkeleydb + 4.7.25 + jar + lucene/contrib/db/bdb/lib/db-4.7.25.jar + + + + install-berkeleydb-je + install + + install-file + + + com.sleepycat + berkeleydb-je + 3.3.93 + jar + lucene/contrib/db/bdb-je/lib/je-3.3.93.jar + + + + install-solr-commons-csv + install + + install-file + + + org.apache.solr + solr-commons-csv + ${project.version} + jar + solr/lib/commons-csv-1.0-SNAPSHOT-r966014.jar + + + + install-solr-noggit + install + + install-file + + + org.apache.solr + solr-noggit + ${project.version} + jar + solr/lib/apache-solr-noggit-r944541.jar + + + + + + + + + diff --git a/dev-tools/maven/solr/contrib/analysis-extras/pom.xml b/dev-tools/maven/solr/contrib/analysis-extras/pom.xml new file mode 100644 index 00000000000..ce67bbaf22e --- /dev/null +++ b/dev-tools/maven/solr/contrib/analysis-extras/pom.xml @@ -0,0 +1,116 @@ + + + 4.0.0 + + org.apache.solr + solr-parent + 4.0-SNAPSHOT + ../../pom.xml + + org.apache.solr + solr-analysis-extras + jar + Apache Solr Analysis Extras + Apache Solr Analysis Extras + + solr/contrib/analysis-extras + build + 4.0 + + + + ${project.groupId} + solr-core + ${project.version} + + + ${project.groupId} + solr-core + ${project.version} + test-jar + test + + + org.apache.lucene + lucene-analyzers-common + ${project.version} + + + org.apache.lucene + lucene-analyzers-icu + ${project.version} + + + org.apache.lucene + lucene-analyzers-smartcn + ${project.version} + + + org.apache.lucene + lucene-analyzers-stempel + ${project.version} + + + org.apache.lucene + lucene-core + ${project.version} + + + org.apache.lucene + lucene-core + ${project.version} + test-jar + test + + + junit + junit + test + + + + ${build-directory} + ${build-directory}/classes + ${build-directory}/test-classes + src/java + src/test + + + test-files + + + ../../src/test-files + + + + + org.apache.maven.plugins + maven-surefire-plugin + + + ../../../../testlogging.properties + + + + + + diff --git a/dev-tools/maven/solr/contrib/clustering/pom.xml b/dev-tools/maven/solr/contrib/clustering/pom.xml new file mode 100644 index 00000000000..81266fd9cda --- /dev/null +++ b/dev-tools/maven/solr/contrib/clustering/pom.xml @@ -0,0 +1,103 @@ + + + 4.0.0 + + org.apache.solr + solr-parent + 4.0-SNAPSHOT + ../../pom.xml + + org.apache.solr + solr-clustering + jar + Apache Solr Clustering + Apache Solr Clustering + + solr/contrib/clustering + build + 4.0 + + + + ${project.groupId} + solr-core + ${project.version} + + + ${project.groupId} + solr-core + ${project.version} + test-jar + test + + + ${project.groupId} + solr-solrj + ${project.version} + + + org.apache.lucene + lucene-analyzers-common + ${project.version} + + + org.apache.lucene + lucene-core + ${project.version} + test-jar + test + + + org.carrot2 + carrot2-core + + + junit + junit + test + + + + ${build-directory} + ${build-directory}/classes + ${build-directory}/test-classes + + + src/test/resources + + + ../../src/test-files + + + + + org.apache.maven.plugins + maven-surefire-plugin + + + ../../../../testlogging.properties + + + + + + diff --git a/dev-tools/maven/solr/contrib/dataimporthandler/pom.xml b/dev-tools/maven/solr/contrib/dataimporthandler/pom.xml new file mode 100644 index 00000000000..a90b1dec722 --- /dev/null +++ b/dev-tools/maven/solr/contrib/dataimporthandler/pom.xml @@ -0,0 +1,50 @@ + + + 4.0.0 + + org.apache.solr + solr-parent + 4.0-SNAPSHOT + ../../pom.xml + + org.apache.solr + solr-dataimporthandler-aggregator + pom + Apache Solr DataImportHandler aggregator POM + Apache Solr DataImportHandler aggregator POM + + src + src/extras + + + target/solr-dataimporthandler-aggregator + + + org.apache.maven.plugins + maven-deploy-plugin + + true + + + + + diff --git a/dev-tools/maven/solr/contrib/dataimporthandler/src/extras/pom.xml b/dev-tools/maven/solr/contrib/dataimporthandler/src/extras/pom.xml new file mode 100644 index 00000000000..bae3817983f --- /dev/null +++ b/dev-tools/maven/solr/contrib/dataimporthandler/src/extras/pom.xml @@ -0,0 +1,132 @@ + + + 4.0.0 + + org.apache.solr + solr-parent + 4.0-SNAPSHOT + ../../../../pom.xml + + org.apache.solr + solr-dataimporthandler-extras + jar + Apache Solr DataImportHandler Extras + Apache Solr DataImportHandler Extras + + solr/contrib/dataimporthandler/src/extras + ../../target/extras + 4.0 + + + + ${project.groupId} + solr-core + ${project.version} + + + ${project.groupId} + solr-core + ${project.version} + test-jar + test + + + ${project.groupId} + solr-dataimporthandler + ${project.version} + + + ${project.groupId} + solr-dataimporthandler + ${project.version} + test-jar + test + + + ${project.groupId} + solr-solrj + ${project.version} + + + org.apache.lucene + lucene-core + ${project.version} + test-jar + test + + + commons-io + commons-io + + + javax.activation + activation + + + javax.mail + mail + + + org.apache.tika + tika-parsers + + + org.slf4j + slf4j-api + + + xerces + xercesImpl + + + junit + junit + test + + + + ${build-directory} + ${build-directory}/extras/classes + ${build-directory}/extras/test-classes + main/java + test/java + + + test/resources + + + ../../../../src/test-files + + + + + org.apache.maven.plugins + maven-surefire-plugin + + + ../../../../../../testlogging.properties + + + + + + diff --git a/dev-tools/maven/solr/contrib/dataimporthandler/src/pom.xml b/dev-tools/maven/solr/contrib/dataimporthandler/src/pom.xml new file mode 100644 index 00000000000..0745d2e5417 --- /dev/null +++ b/dev-tools/maven/solr/contrib/dataimporthandler/src/pom.xml @@ -0,0 +1,129 @@ + + + 4.0.0 + + org.apache.solr + solr-parent + 4.0-SNAPSHOT + ../../../pom.xml + + org.apache.solr + solr-dataimporthandler + jar + Apache Solr DataImportHandler + Apache Solr DataImportHandler + + solr/contrib/dataimporthandler + ../target + 4.0 + + + + ${project.groupId} + solr-core + ${project.version} + + + ${project.groupId} + solr-core + ${project.version} + test-jar + test + + + ${project.groupId} + solr-solrj + ${project.version} + + + org.apache.lucene + lucene-analyzers-common + ${project.version} + + + org.apache.lucene + lucene-core + ${project.version} + test-jar + test + + + org.apache.geronimo.specs + geronimo-stax-api_1.0_spec + + + commons-io + commons-io + + + org.slf4j + slf4j-api + + + junit + junit + test + + + org.easymock + easymock + test + + + + ${build-directory} + ${build-directory}/classes + ${build-directory}/test-classes + main/java + test/java + + + test/resources + + + ../../../src/test-files + + + + + org.apache.maven.plugins + maven-jar-plugin + + + + test-jar + + + + + + org.apache.maven.plugins + maven-surefire-plugin + + + ../../../../../testlogging.properties + + + + + + diff --git a/dev-tools/maven/solr/contrib/extraction/pom.xml b/dev-tools/maven/solr/contrib/extraction/pom.xml new file mode 100644 index 00000000000..75d1f2316c9 --- /dev/null +++ b/dev-tools/maven/solr/contrib/extraction/pom.xml @@ -0,0 +1,114 @@ + + + 4.0.0 + + org.apache.solr + solr-parent + 4.0-SNAPSHOT + ../../pom.xml + + org.apache.solr + solr-cell + jar + Apache Solr Content Extraction Library + + Apache Solr Content Extraction Library integrates Apache Tika + content extraction framework into Solr + + + solr/contrib/extraction + build + 4.0 + + + + ${project.groupId} + solr-core + ${project.version} + + + ${project.groupId} + solr-core + ${project.version} + test-jar + test + + + ${project.groupId} + solr-solrj + ${project.version} + + + org.apache.lucene + lucene-analyzers-common + ${project.version} + + + org.apache.lucene + lucene-core + ${project.version} + test-jar + test + + + com.ibm.icu + icu4j + + + org.apache.tika + tika-parsers + + + xerces + xercesImpl + + + junit + junit + test + + + + ${build-directory} + ${build-directory}/classes + ${build-directory}/test-classes + + + src/test/resources + + + ../../src/test-files + + + + + org.apache.maven.plugins + maven-surefire-plugin + + + ../../../../testlogging.properties + + + + + + diff --git a/dev-tools/maven/solr/contrib/pom.xml b/dev-tools/maven/solr/contrib/pom.xml new file mode 100644 index 00000000000..fff3350f474 --- /dev/null +++ b/dev-tools/maven/solr/contrib/pom.xml @@ -0,0 +1,51 @@ + + + 4.0.0 + + org.apache.solr + solr-parent + 4.0-SNAPSHOT + ../pom.xml + + org.apache.solr + solr-contrib-aggregator + Apache Solr Contrib aggregator POM + pom + + analysis-extras + clustering + dataimporthandler + extraction + + + ../build/solr-contrib-aggregator + + + org.apache.maven.plugins + maven-deploy-plugin + + true + + + + + diff --git a/dev-tools/maven/solr/pom.xml b/dev-tools/maven/solr/pom.xml new file mode 100644 index 00000000000..465af72a1a0 --- /dev/null +++ b/dev-tools/maven/solr/pom.xml @@ -0,0 +1,94 @@ + + + 4.0.0 + + org.apache.lucene + lucene-solr-grandparent + 4.0-SNAPSHOT + ../pom.xml + + org.apache.solr + solr-parent + pom + Apache Solr parent POM + Apache Solr parent POM + + src + src/solrj + src/webapp + contrib + + + 1.6 + + + JIRA + http://issues.apache.org/jira/browse/SOLR + + + Hudson + + http://lucene.zones.apache.org:8080/hudson/job/Solr-Nightly/ + + + + + Solr User List + solr-user-subscribe@lucene.apache.org + solr-user-unsubscribe@lucene.apache.org + + http://mail-archives.apache.org/mod_mbox/solr-user/ + + + + Java Developer List + dev-subscribe@lucene.apache.org + dev-unsubscribe@lucene.apache.org + http://mail-archives.apache.org/mod_mbox/lucene-dev/ + + + Java Commits List + commits-subscribe@lucene.apache.org + commits-unsubscribe@lucene.apache.org + + http://mail-archives.apache.org/mod_mbox/lucene-java-commits/ + + + + 2006 + + build/solr-parent + + + + org.apache.maven.plugins + maven-javadoc-plugin + + + ${project.name} ${project.version} API (${now.version}) + ${project.name} ${project.version} API (${now.version}) + + + + + + diff --git a/dev-tools/maven/solr/src/pom.xml b/dev-tools/maven/solr/src/pom.xml new file mode 100644 index 00000000000..0014b38bfd1 --- /dev/null +++ b/dev-tools/maven/solr/src/pom.xml @@ -0,0 +1,252 @@ + + + 4.0.0 + + org.apache.solr + solr-parent + 4.0-SNAPSHOT + ../pom.xml + + org.apache.solr + solr-core + jar + Apache Solr Core + Apache Solr Core + + solr + ../build + 4.0 + + + + ${project.groupId} + solr-solrj + ${project.version} + + + ${project.groupId} + solr-noggit + + + org.apache.lucene + lucene-core + ${project.version} + test-jar + test + + + org.apache.lucene + lucene-analyzers-common + ${project.version} + + + org.apache.lucene + lucene-analyzers-phonetic + ${project.version} + + + org.apache.lucene + lucene-highlighter + ${project.version} + + + org.apache.lucene + lucene-memory + ${project.version} + + + org.apache.lucene + lucene-misc + ${project.version} + + + org.apache.lucene + lucene-queries + ${project.version} + + + org.apache.lucene + lucene-spatial + ${project.version} + + + org.apache.lucene + lucene-spellchecker + ${project.version} + + + org.apache.solr + solr-commons-csv + + + org.apache.geronimo.specs + geronimo-stax-api_1.0_spec + + + commons-codec + commons-codec + + + commons-fileupload + commons-fileupload + + + commons-httpclient + commons-httpclient + + + commons-io + commons-io + + + commons-lang + commons-lang + + + org.apache.velocity + velocity + + + org.apache.velocity + velocity-tools + + + org.mortbay.jetty + jetty + + + org.mortbay.jetty + jetty-util + + + org.mortbay.jetty + jsp-2.1-jetty + provided + + + org.slf4j + slf4j-api + + + org.slf4j + slf4j-jdk14 + + + javax.servlet + servlet-api + provided + + + junit + junit + test + + + org.easymock + easymock + test + + + + ${build-directory} + ${build-directory}/solr + ${build-directory}/tests + java + test + + + test-files + + + + + org.codehaus.mojo + build-helper-maven-plugin + + + add-source + generate-sources + + add-source + + + + webapp/src + + + + + + + org.apache.maven.plugins + maven-jar-plugin + + + + test-jar + + + + + + org.apache.maven.plugins + maven-surefire-plugin + + + ../../../testlogging.properties + + + + + org.codehaus.mojo + appassembler-maven-plugin + + -Xmx128M + flat + + windows + unix + + + + org.apache.solr.client.solrj.embedded.JettySolrRunner + JettySolrRunner + + + org.apache.solr.util.BitSetPerf + BitSetPerf + -Xms128m -Xbatch + + + org.apache.solr.util.SimplePostTool + SimplePostTool + + + org.apache.solr.util.SuggestMissingFactories + SuggestMissingFactories + + + + + + + diff --git a/dev-tools/maven/solr/src/solrj/pom.xml b/dev-tools/maven/solr/src/solrj/pom.xml new file mode 100644 index 00000000000..262793a6dbe --- /dev/null +++ b/dev-tools/maven/solr/src/solrj/pom.xml @@ -0,0 +1,135 @@ + + + 4.0.0 + + org.apache.solr + solr-parent + 4.0-SNAPSHOT + ../../pom.xml + + org.apache.solr + solr-solrj + jar + Apache Solr Solrj + Apache Solr Solrj + + solr/src/solrj + ../../build/solrj + 4.0 + + + + org.apache.lucene + lucene-core + ${project.version} + + + org.apache.lucene + lucene-core + ${project.version} + test-jar + test + + + org.apache.lucene + lucene-analyzers-common + ${project.version} + test + + + org.apache.geronimo.specs + geronimo-stax-api_1.0_spec + + + org.apache.zookeeper + zookeeper + + + javax.jms + jms + + + com.sun.jmx + jmxri + + + com.sun.jdmk + jmxtools + + + + + commons-httpclient + commons-httpclient + + + commons-io + commons-io + + + org.slf4j + slf4j-api + + + junit + junit + test + + + + ${build-directory} + ${build-directory} + . + + + + org.codehaus.mojo + build-helper-maven-plugin + + + add-source + generate-sources + + add-source + + + + ../common + + + + + + + org.apache.maven.plugins + maven-surefire-plugin + + + + + + true + + + + + diff --git a/dev-tools/maven/solr/src/webapp/pom.xml b/dev-tools/maven/solr/src/webapp/pom.xml new file mode 100644 index 00000000000..af914c5a94a --- /dev/null +++ b/dev-tools/maven/solr/src/webapp/pom.xml @@ -0,0 +1,191 @@ + + + 4.0.0 + + org.apache.solr + solr-parent + 4.0-SNAPSHOT + ../../pom.xml + + org.apache.solr + solr + war + Apache Solr Search Server + Apache Solr Search Server + + solr/src/webapp + ../../build/web + + + + ${project.groupId} + solr-core + ${project.version} + + + commons-chain + commons-chain + + + commons-digester + commons-digester + + + commons-logging + commons-logging + + + commons-validator + commons-validator + + + jakarta-regexp + jakarta-regexp + + + dom4j + dom4j + + + log4j + log4j + + + javax.activation + activation + + + javax.mail + mail + + + jline + jline + + + org.apache.struts + struts-core + + + org.apache.struts + struts-taglib + + + org.apache.struts + struts-tiles + + + org.mortbay.jetty + jetty + + + org.mortbay.jetty + jetty-util + + + oro + oro + + + sslext + sslext + + + xml-apis + xml-apis + + + + + ${project.groupId} + solr-dataimporthandler + ${project.version} + + + org.codehaus.woodstox + wstx-asl + + + org.slf4j + jcl-over-slf4j + + + org.slf4j + log4j-over-slf4j + + + + ${build-directory} + + + org.apache.maven.plugins + maven-javadoc-plugin + + true + + + + org.apache.maven.plugins + maven-source-plugin + + + + + + false + + + + org.apache.maven.plugins + maven-war-plugin + + web + web/WEB-INF/web.xml + + + ../../contrib/dataimporthandler/src/main/webapp + + + + + + org.mortbay.jetty + maven-jetty-plugin + + 10 + + + 8080 + 60000 + + + + / + + web,../../contrib/dataimporthandler/src/main/webapp + + + + + + + diff --git a/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/TokenSourcesTest.java b/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/TokenSourcesTest.java new file mode 100644 index 00000000000..3fcaa10b535 --- /dev/null +++ b/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/TokenSourcesTest.java @@ -0,0 +1,191 @@ +package org.apache.lucene.search.highlight; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.io.Reader; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.Token; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; +import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.Field.TermVector; +import org.apache.lucene.index.CorruptIndexException; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermPositionVector; +import org.apache.lucene.search.DisjunctionMaxQuery; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.search.spans.SpanTermQuery; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.LockObtainFailedException; +import org.apache.lucene.util.LuceneTestCase; + +// LUCENE-2874 +public class TokenSourcesTest extends LuceneTestCase { + private static final String FIELD = "text"; + + private static final class OverlapAnalyzer extends Analyzer { + + @Override + public TokenStream tokenStream(String fieldName, Reader reader) { + return new TokenStreamOverlap(); + } + } + + private static final class TokenStreamOverlap extends TokenStream { + private Token[] tokens; + + private int i = -1; + + private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class); + private final OffsetAttribute offsetAttribute = addAttribute(OffsetAttribute.class); + private final PositionIncrementAttribute positionIncrementAttribute = addAttribute(PositionIncrementAttribute.class); + + public TokenStreamOverlap() { + reset(); + } + + @Override + public boolean incrementToken() throws IOException { + this.i++; + if (this.i >= this.tokens.length) { + return false; + } + clearAttributes(); + termAttribute.setEmpty().append(this.tokens[i]); + offsetAttribute.setOffset(this.tokens[i].startOffset(), + this.tokens[i].endOffset()); + positionIncrementAttribute.setPositionIncrement(this.tokens[i] + .getPositionIncrement()); + return true; + } + + @Override + public void reset() { + this.i = -1; + this.tokens = new Token[] { + new Token(new char[] { 't', 'h', 'e' }, 0, 3, 0, 3), + new Token(new char[] { '{', 'f', 'o', 'x', '}' }, 0, 5, 0, 7), + new Token(new char[] { 'f', 'o', 'x' }, 0, 3, 4, 7), + new Token(new char[] { 'd', 'i', 'd' }, 0, 3, 8, 11), + new Token(new char[] { 'n', 'o', 't' }, 0, 3, 12, 15), + new Token(new char[] { 'j', 'u', 'm', 'p' }, 0, 4, 16, 20) }; + this.tokens[1].setPositionIncrement(0); + } + } + + public void testOverlapWithOffset() throws CorruptIndexException, + LockObtainFailedException, IOException, InvalidTokenOffsetsException { + final String TEXT = "the fox did not jump"; + final Directory directory = newDirectory(); + final IndexWriter indexWriter = new IndexWriter(directory, + newIndexWriterConfig(TEST_VERSION_CURRENT, new OverlapAnalyzer())); + try { + final Document document = new Document(); + document.add(new Field(FIELD, new TokenStreamOverlap(), + TermVector.WITH_OFFSETS)); + indexWriter.addDocument(document); + } finally { + indexWriter.close(); + } + final IndexReader indexReader = IndexReader.open(directory, true); + try { + assertEquals(1, indexReader.numDocs()); + final IndexSearcher indexSearcher = new IndexSearcher(indexReader); + try { + final DisjunctionMaxQuery query = new DisjunctionMaxQuery(1); + query.add(new SpanTermQuery(new Term(FIELD, "{fox}"))); + query.add(new SpanTermQuery(new Term(FIELD, "fox"))); + // final Query phraseQuery = new SpanNearQuery(new SpanQuery[] { + // new SpanTermQuery(new Term(FIELD, "{fox}")), + // new SpanTermQuery(new Term(FIELD, "fox")) }, 0, true); + + TopDocs hits = indexSearcher.search(query, 1); + assertEquals(1, hits.totalHits); + final Highlighter highlighter = new Highlighter( + new SimpleHTMLFormatter(), new SimpleHTMLEncoder(), + new QueryScorer(query)); + final TokenStream tokenStream = TokenSources + .getTokenStream( + (TermPositionVector) indexReader.getTermFreqVector(0, FIELD), + false); + assertEquals("the fox did not jump", + highlighter.getBestFragment(tokenStream, TEXT)); + } finally { + indexSearcher.close(); + } + } finally { + indexReader.close(); + directory.close(); + } + } + + public void testOverlapWithPositionsAndOffset() throws CorruptIndexException, + LockObtainFailedException, IOException, InvalidTokenOffsetsException { + final String TEXT = "the fox did not jump"; + final Directory directory = newDirectory(); + final IndexWriter indexWriter = new IndexWriter(directory, + newIndexWriterConfig(TEST_VERSION_CURRENT, new OverlapAnalyzer())); + try { + final Document document = new Document(); + document.add(new Field(FIELD, new TokenStreamOverlap(), + TermVector.WITH_POSITIONS_OFFSETS)); + indexWriter.addDocument(document); + } finally { + indexWriter.close(); + } + final IndexReader indexReader = IndexReader.open(directory, true); + try { + assertEquals(1, indexReader.numDocs()); + final IndexSearcher indexSearcher = new IndexSearcher(indexReader); + try { + final DisjunctionMaxQuery query = new DisjunctionMaxQuery(1); + query.add(new SpanTermQuery(new Term(FIELD, "{fox}"))); + query.add(new SpanTermQuery(new Term(FIELD, "fox"))); + // final Query phraseQuery = new SpanNearQuery(new SpanQuery[] { + // new SpanTermQuery(new Term(FIELD, "{fox}")), + // new SpanTermQuery(new Term(FIELD, "fox")) }, 0, true); + + TopDocs hits = indexSearcher.search(query, 1); + assertEquals(1, hits.totalHits); + final Highlighter highlighter = new Highlighter( + new SimpleHTMLFormatter(), new SimpleHTMLEncoder(), + new QueryScorer(query)); + final TokenStream tokenStream = TokenSources + .getTokenStream( + (TermPositionVector) indexReader.getTermFreqVector(0, FIELD), + false); + assertEquals("the fox did not jump", + highlighter.getBestFragment(tokenStream, TEXT)); + } finally { + indexSearcher.close(); + } + } finally { + indexReader.close(); + directory.close(); + } + } + +} diff --git a/lucene/src/java/org/apache/lucene/index/codecs/BlockTermState.java b/lucene/src/java/org/apache/lucene/index/codecs/BlockTermState.java new file mode 100644 index 00000000000..4ab22aef72f --- /dev/null +++ b/lucene/src/java/org/apache/lucene/index/codecs/BlockTermState.java @@ -0,0 +1,55 @@ +package org.apache.lucene.index.codecs; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.OrdTermState; +import org.apache.lucene.index.TermState; + +/** + * Holds all state required for {@link PostingsReaderBase} + * to produce a {@link DocsEnum} without re-seeking the + * terms dict. + */ +public class BlockTermState extends OrdTermState { + public int docFreq; // how many docs have this term + public long totalTermFreq; // total number of occurrences of this term + + public int termCount; // term ord are in the current block + public long blockFilePointer; // fp into the terms dict primary file (_X.tib) that holds this term + + public int blockTermCount; // how many terms in current block + + @Override + public void copyFrom(TermState _other) { + assert _other instanceof BlockTermState : "can not copy from " + _other.getClass().getName(); + BlockTermState other = (BlockTermState) _other; + super.copyFrom(_other); + docFreq = other.docFreq; + totalTermFreq = other.totalTermFreq; + termCount = other.termCount; + blockFilePointer = other.blockFilePointer; + + // NOTE: don't copy blockTermCount; + // it's "transient": used only by the "primary" + // termState, and regenerated on seek by TermState + } + + @Override + public String toString() { + return super.toString() + "ord=" + ord + " docFreq=" + docFreq + " totalTermFreq=" + totalTermFreq + " termCount=" + termCount + " blockFP=" + blockFilePointer; + } +} diff --git a/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsReader.java b/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsReader.java new file mode 100644 index 00000000000..1ea93a04495 --- /dev/null +++ b/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsReader.java @@ -0,0 +1,741 @@ +package org.apache.lucene.index.codecs; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.Closeable; +import java.io.IOException; +import java.util.Collection; +import java.util.Comparator; +import java.util.Iterator; +import java.util.TreeMap; + +import org.apache.lucene.index.DocsAndPositionsEnum; +import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.FieldInfos; +import org.apache.lucene.index.FieldsEnum; +import org.apache.lucene.index.IndexFileNames; +import org.apache.lucene.index.SegmentInfo; +import org.apache.lucene.index.TermState; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.index.codecs.standard.StandardPostingsReader; // javadocs +import org.apache.lucene.store.ByteArrayDataInput; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.Bits; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.CodecUtil; +import org.apache.lucene.util.DoubleBarrelLRUCache; + +/** Handles a terms dict, but decouples all details of + * doc/freqs/positions reading to an instance of {@link + * PostingsReaderBase}. This class is reusable for + * codecs that use a different format for + * docs/freqs/positions (though codecs are also free to + * make their own terms dict impl). + * + *

This class also interacts with an instance of {@link + * TermsIndexReaderBase}, to abstract away the specific + * implementation of the terms dict index. + * @lucene.experimental */ + +public class BlockTermsReader extends FieldsProducer { + // Open input to the main terms dict file (_X.tis) + private final IndexInput in; + + // Reads the terms dict entries, to gather state to + // produce DocsEnum on demand + private final PostingsReaderBase postingsReader; + + private final TreeMap fields = new TreeMap(); + + // Comparator that orders our terms + private final Comparator termComp; + + // Caches the most recently looked-up field + terms: + private final DoubleBarrelLRUCache termsCache; + + // Reads the terms index + private TermsIndexReaderBase indexReader; + + // keeps the dirStart offset + protected long dirOffset; + + // Used as key for the terms cache + private static class FieldAndTerm extends DoubleBarrelLRUCache.CloneableKey { + String field; + BytesRef term; + + public FieldAndTerm() { + } + + public FieldAndTerm(FieldAndTerm other) { + field = other.field; + term = new BytesRef(other.term); + } + + @Override + public boolean equals(Object _other) { + FieldAndTerm other = (FieldAndTerm) _other; + return other.field == field && term.bytesEquals(other.term); + } + + @Override + public Object clone() { + return new FieldAndTerm(this); + } + + @Override + public int hashCode() { + return field.hashCode() * 31 + term.hashCode(); + } + } + + private String segment; + + public BlockTermsReader(TermsIndexReaderBase indexReader, Directory dir, FieldInfos fieldInfos, String segment, PostingsReaderBase postingsReader, int readBufferSize, + Comparator termComp, int termsCacheSize, String codecId) + throws IOException { + + this.postingsReader = postingsReader; + termsCache = new DoubleBarrelLRUCache(termsCacheSize); + + this.termComp = termComp; + this.segment = segment; + in = dir.openInput(IndexFileNames.segmentFileName(segment, codecId, BlockTermsWriter.TERMS_EXTENSION), + readBufferSize); + + boolean success = false; + try { + readHeader(in); + + // Have PostingsReader init itself + postingsReader.init(in); + + // Read per-field details + seekDir(in, dirOffset); + + final int numFields = in.readVInt(); + + for(int i=0;i= 0; + final long termsStartPointer = in.readVLong(); + final FieldInfo fieldInfo = fieldInfos.fieldInfo(field); + final long sumTotalTermFreq = fieldInfo.omitTermFreqAndPositions ? -1 : in.readVLong(); + assert !fields.containsKey(fieldInfo.name); + fields.put(fieldInfo.name, new FieldReader(fieldInfo, numTerms, termsStartPointer, sumTotalTermFreq)); + } + success = true; + } finally { + if (!success) { + in.close(); + } + } + + this.indexReader = indexReader; + } + + protected void readHeader(IndexInput input) throws IOException { + CodecUtil.checkHeader(in, BlockTermsWriter.CODEC_NAME, + BlockTermsWriter.VERSION_START, + BlockTermsWriter.VERSION_CURRENT); + dirOffset = in.readLong(); + } + + protected void seekDir(IndexInput input, long dirOffset) + throws IOException { + input.seek(dirOffset); + } + + @Override + public void loadTermsIndex(int indexDivisor) throws IOException { + indexReader.loadTermsIndex(indexDivisor); + } + + @Override + public void close() throws IOException { + try { + try { + if (indexReader != null) { + indexReader.close(); + } + } finally { + // null so if an app hangs on to us (ie, we are not + // GCable, despite being closed) we still free most + // ram + indexReader = null; + if (in != null) { + in.close(); + } + } + } finally { + try { + if (postingsReader != null) { + postingsReader.close(); + } + } finally { + for(FieldReader field : fields.values()) { + field.close(); + } + } + } + } + + public static void files(Directory dir, SegmentInfo segmentInfo, String id, Collection files) { + files.add(IndexFileNames.segmentFileName(segmentInfo.name, id, BlockTermsWriter.TERMS_EXTENSION)); + } + + public static void getExtensions(Collection extensions) { + extensions.add(BlockTermsWriter.TERMS_EXTENSION); + } + + @Override + public FieldsEnum iterator() { + return new TermFieldsEnum(); + } + + @Override + public Terms terms(String field) throws IOException { + return fields.get(field); + } + + // Iterates through all fields + private class TermFieldsEnum extends FieldsEnum { + final Iterator it; + FieldReader current; + + TermFieldsEnum() { + it = fields.values().iterator(); + } + + @Override + public String next() { + if (it.hasNext()) { + current = it.next(); + return current.fieldInfo.name; + } else { + current = null; + return null; + } + } + + @Override + public TermsEnum terms() throws IOException { + return current.iterator(); + } + } + + private class FieldReader extends Terms implements Closeable { + final long numTerms; + final FieldInfo fieldInfo; + final long termsStartPointer; + final long sumTotalTermFreq; + + FieldReader(FieldInfo fieldInfo, long numTerms, long termsStartPointer, long sumTotalTermFreq) { + assert numTerms > 0; + this.fieldInfo = fieldInfo; + this.numTerms = numTerms; + this.termsStartPointer = termsStartPointer; + this.sumTotalTermFreq = sumTotalTermFreq; + } + + @Override + public Comparator getComparator() { + return termComp; + } + + @Override + public void close() { + super.close(); + } + + @Override + public TermsEnum iterator() throws IOException { + return new SegmentTermsEnum(); + } + + @Override + public long getUniqueTermCount() { + return numTerms; + } + + @Override + public long getSumTotalTermFreq() { + return sumTotalTermFreq; + } + + // Iterates through terms in this field + private final class SegmentTermsEnum extends TermsEnum { + private final IndexInput in; + private final BlockTermState state; + private final boolean doOrd; + private final FieldAndTerm fieldTerm = new FieldAndTerm(); + private final TermsIndexReaderBase.FieldIndexEnum indexEnum; + private final BytesRef term = new BytesRef(); + + /* This is true if indexEnum is "still" seek'd to the index term + for the current term. We set it to true on seeking, and then it + remains valid until next() is called enough times to load another + terms block: */ + private boolean indexIsCurrent; + + /* True if we've already called .next() on the indexEnum, to "bracket" + the current block of terms: */ + private boolean didIndexNext; + + /* Next index term, bracketing the current block of terms; this is + only valid if didIndexNext is true: */ + private BytesRef nextIndexTerm; + + /* True after seek(TermState), do defer seeking. If the app then + calls next() (which is not "typical"), then we'll do the real seek */ + private boolean seekPending; + + /* How many blocks we've read since last seek. Once this + is >= indexEnum.getDivisor() we set indexIsCurrent to false (since + the index can no long bracket seek-within-block). */ + private int blocksSinceSeek; + + private byte[] termSuffixes; + private ByteArrayDataInput termSuffixesReader = new ByteArrayDataInput(null); + + /* Common prefix used for all terms in this block. */ + private int termBlockPrefix; + + private byte[] docFreqBytes; + private final ByteArrayDataInput freqReader = new ByteArrayDataInput(null); + private int metaDataUpto; + + public SegmentTermsEnum() throws IOException { + in = (IndexInput) BlockTermsReader.this.in.clone(); + in.seek(termsStartPointer); + indexEnum = indexReader.getFieldEnum(fieldInfo); + doOrd = indexReader.supportsOrd(); + fieldTerm.field = fieldInfo.name; + state = postingsReader.newTermState(); + state.totalTermFreq = -1; + state.ord = -1; + + termSuffixes = new byte[128]; + docFreqBytes = new byte[64]; + //System.out.println("BTR.enum init this=" + this + " postingsReader=" + postingsReader); + } + + @Override + public Comparator getComparator() { + return termComp; + } + + @Override + public SeekStatus seek(final BytesRef target, final boolean useCache) throws IOException { + + if (indexEnum == null) { + throw new IllegalStateException("terms index was not loaded"); + } + + //System.out.println("BTR.seek seg=" + segment + " target=" + fieldInfo.name + ":" + target.utf8ToString() + " " + target + " current=" + term().utf8ToString() + " " + term() + " useCache=" + useCache + " indexIsCurrent=" + indexIsCurrent + " didIndexNext=" + didIndexNext + " seekPending=" + seekPending + " divisor=" + indexReader.getDivisor() + " this=" + this); + /* + if (didIndexNext) { + if (nextIndexTerm == null) { + //System.out.println(" nextIndexTerm=null"); + } else { + //System.out.println(" nextIndexTerm=" + nextIndexTerm.utf8ToString()); + } + } + */ + + // Check cache + if (useCache) { + fieldTerm.term = target; + // TODO: should we differentiate "frozen" + // TermState (ie one that was cloned and + // cached/returned by termState()) from the + // malleable (primary) one? + final TermState cachedState = termsCache.get(fieldTerm); + if (cachedState != null) { + seekPending = true; + //System.out.println(" cached!"); + seek(target, cachedState); + //System.out.println(" term=" + term.utf8ToString()); + return SeekStatus.FOUND; + } + } + + boolean doSeek = true; + + // See if we can avoid seeking, because target term + // is after current term but before next index term: + if (indexIsCurrent) { + + final int cmp = termComp.compare(term, target); + + if (cmp == 0) { + // Already at the requested term + return SeekStatus.FOUND; + } else if (cmp < 0) { + + // Target term is after current term + if (!didIndexNext) { + if (indexEnum.next() == -1) { + nextIndexTerm = null; + } else { + nextIndexTerm = indexEnum.term(); + } + //System.out.println(" now do index next() nextIndexTerm=" + (nextIndexTerm == null ? "null" : nextIndexTerm.utf8ToString())); + didIndexNext = true; + } + + if (nextIndexTerm == null || termComp.compare(target, nextIndexTerm) < 0) { + // Optimization: requested term is within the + // same term block we are now in; skip seeking + // (but do scanning): + doSeek = false; + //System.out.println(" skip seek: nextIndexTerm=" + (nextIndexTerm == null ? "null" : nextIndexTerm.utf8ToString())); + } + } + } + + if (doSeek) { + //System.out.println(" seek"); + + // Ask terms index to find biggest indexed term (= + // first term in a block) that's <= our text: + in.seek(indexEnum.seek(target)); + boolean result = nextBlock(); + + // Block must exist since, at least, the indexed term + // is in the block: + assert result; + + indexIsCurrent = true; + didIndexNext = false; + blocksSinceSeek = 0; + + if (doOrd) { + state.ord = indexEnum.ord()-1; + } + + // NOTE: the first _next() after an index seek is + // a bit wasteful, since it redundantly reads some + // suffix bytes into the buffer. We could avoid storing + // those bytes in the primary file, but then when + // next()ing over an index term we'd have to + // special case it: + term.copy(indexEnum.term()); + //System.out.println(" seek: term=" + term.utf8ToString()); + } else { + ////System.out.println(" skip seek"); + } + + seekPending = false; + + // Now scan: + while (_next() != null) { + final int cmp = termComp.compare(term, target); + if (cmp == 0) { + // Match! + if (useCache) { + // Store in cache + decodeMetaData(); + termsCache.put(new FieldAndTerm(fieldTerm), (BlockTermState) state.clone()); + } + //System.out.println(" FOUND"); + return SeekStatus.FOUND; + } else if (cmp > 0) { + //System.out.println(" NOT_FOUND term=" + term.utf8ToString()); + return SeekStatus.NOT_FOUND; + } + + // The purpose of the terms dict index is to seek + // the enum to the closest index term before the + // term we are looking for. So, we should never + // cross another index term (besides the first + // one) while we are scanning: + assert indexIsCurrent; + } + + indexIsCurrent = false; + //System.out.println(" END"); + return SeekStatus.END; + } + + @Override + public BytesRef next() throws IOException { + //System.out.println("BTR.next() seekPending=" + seekPending + " pendingSeekCount=" + state.termCount); + + // If seek was previously called and the term was cached, + // usually caller is just going to pull a D/&PEnum or get + // docFreq, etc. But, if they then call next(), + // this method catches up all internal state so next() + // works properly: + if (seekPending) { + assert !indexIsCurrent; + in.seek(state.blockFilePointer); + final int pendingSeekCount = state.termCount; + boolean result = nextBlock(); + + final long savOrd = state.ord; + + // Block must exist since seek(TermState) was called w/ a + // TermState previously returned by this enum when positioned + // on a real term: + assert result; + + while(state.termCount < pendingSeekCount) { + BytesRef nextResult = _next(); + assert nextResult != null; + } + seekPending = false; + state.ord = savOrd; + } + return _next(); + } + + /* Decodes only the term bytes of the next term. If caller then asks for + metadata, ie docFreq, totalTermFreq or pulls a D/&PEnum, we then (lazily) + decode all metadata up to the current term. */ + private BytesRef _next() throws IOException { + //System.out.println("BTR._next this=" + this + " termCount=" + state.termCount + " (vs " + state.blockTermCount + ")"); + if (state.termCount == state.blockTermCount) { + if (!nextBlock()) { + //System.out.println(" eof"); + indexIsCurrent = false; + return null; + } + } + + // TODO: cutover to something better for these ints! simple64? + final int suffix = termSuffixesReader.readVInt(); + //System.out.println(" suffix=" + suffix); + + term.length = termBlockPrefix + suffix; + if (term.bytes.length < term.length) { + term.grow(term.length); + } + termSuffixesReader.readBytes(term.bytes, termBlockPrefix, suffix); + state.termCount++; + + // NOTE: meaningless in the non-ord case + state.ord++; + + //System.out.println(" return term=" + fieldInfo.name + ":" + term.utf8ToString() + " " + term); + return term; + } + + @Override + public BytesRef term() { + return term; + } + + @Override + public int docFreq() throws IOException { + //System.out.println("BTR.docFreq"); + decodeMetaData(); + //System.out.println(" return " + state.docFreq); + return state.docFreq; + } + + @Override + public long totalTermFreq() throws IOException { + decodeMetaData(); + return state.totalTermFreq; + } + + @Override + public DocsEnum docs(Bits skipDocs, DocsEnum reuse) throws IOException { + //System.out.println("BTR.docs this=" + this); + decodeMetaData(); + //System.out.println(" state.docFreq=" + state.docFreq); + final DocsEnum docsEnum = postingsReader.docs(fieldInfo, state, skipDocs, reuse); + assert docsEnum != null; + return docsEnum; + } + + @Override + public DocsAndPositionsEnum docsAndPositions(Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException { + //System.out.println("BTR.d&p this=" + this); + decodeMetaData(); + if (fieldInfo.omitTermFreqAndPositions) { + return null; + } else { + DocsAndPositionsEnum dpe = postingsReader.docsAndPositions(fieldInfo, state, skipDocs, reuse); + //System.out.println(" return d&pe=" + dpe); + return dpe; + } + } + + @Override + public void seek(BytesRef target, TermState otherState) throws IOException { + //System.out.println("BTR.seek termState target=" + target.utf8ToString() + " " + target + " this=" + this); + assert otherState != null && otherState instanceof BlockTermState; + assert !doOrd || ((BlockTermState) otherState).ord < numTerms; + state.copyFrom(otherState); + seekPending = true; + indexIsCurrent = false; + term.copy(target); + } + + @Override + public TermState termState() throws IOException { + //System.out.println("BTR.termState this=" + this); + decodeMetaData(); + TermState ts = (TermState) state.clone(); + //System.out.println(" return ts=" + ts); + return ts; + } + + @Override + public SeekStatus seek(long ord) throws IOException { + //System.out.println("BTR.seek by ord ord=" + ord); + if (indexEnum == null) { + throw new IllegalStateException("terms index was not loaded"); + } + + if (ord >= numTerms) { + state.ord = numTerms-1; + return SeekStatus.END; + } + + // TODO: if ord is in same terms block and + // after current ord, we should avoid this seek just + // like we do in the seek(BytesRef) case + in.seek(indexEnum.seek(ord)); + boolean result = nextBlock(); + + // Block must exist since ord < numTerms: + assert result; + + indexIsCurrent = true; + didIndexNext = false; + blocksSinceSeek = 0; + seekPending = false; + + state.ord = indexEnum.ord()-1; + assert state.ord >= -1: "ord=" + state.ord; + term.copy(indexEnum.term()); + + // Now, scan: + int left = (int) (ord - state.ord); + while(left > 0) { + final BytesRef term = _next(); + assert term != null; + left--; + assert indexIsCurrent; + } + + // always found + return SeekStatus.FOUND; + } + + public long ord() { + if (!doOrd) { + throw new UnsupportedOperationException(); + } + return state.ord; + } + + private void doPendingSeek() { + } + + /* Does initial decode of next block of terms; this + doesn't actually decode the docFreq, totalTermFreq, + postings details (frq/prx offset, etc.) metadata; + it just loads them as byte[] blobs which are then + decoded on-demand if the metadata is ever requested + for any term in this block. This enables terms-only + intensive consumes (eg certain MTQs, respelling) to + not pay the price of decoding metadata they won't + use. */ + private boolean nextBlock() throws IOException { + + // TODO: we still lazy-decode the byte[] for each + // term (the suffix), but, if we decoded + // all N terms up front then seeking could do a fast + // bsearch w/in the block... + + //System.out.println("BTR.nextBlock() fp=" + in.getFilePointer() + " this=" + this); + state.blockFilePointer = in.getFilePointer(); + state.blockTermCount = in.readVInt(); + //System.out.println(" blockTermCount=" + state.blockTermCount); + if (state.blockTermCount == 0) { + return false; + } + termBlockPrefix = in.readVInt(); + + // term suffixes: + int len = in.readVInt(); + if (termSuffixes.length < len) { + termSuffixes = new byte[ArrayUtil.oversize(len, 1)]; + } + //System.out.println(" termSuffixes len=" + len); + in.readBytes(termSuffixes, 0, len); + termSuffixesReader.reset(termSuffixes); + + // docFreq, totalTermFreq + len = in.readVInt(); + if (docFreqBytes.length < len) { + docFreqBytes = new byte[ArrayUtil.oversize(len, 1)]; + } + //System.out.println(" freq bytes len=" + len); + in.readBytes(docFreqBytes, 0, len); + freqReader.reset(docFreqBytes); + metaDataUpto = 0; + + state.termCount = 0; + + postingsReader.readTermsBlock(in, fieldInfo, state); + + blocksSinceSeek++; + indexIsCurrent &= (blocksSinceSeek < indexReader.getDivisor()); + //System.out.println(" indexIsCurrent=" + indexIsCurrent); + + return true; + } + + private void decodeMetaData() throws IOException { + //System.out.println("BTR.decodeMetadata mdUpto=" + metaDataUpto + " vs termCount=" + state.termCount + " state=" + state); + if (!seekPending) { + // lazily catch up on metadata decode: + final int limit = state.termCount; + state.termCount = metaDataUpto; + while (metaDataUpto < limit) { + //System.out.println(" decode"); + // TODO: we could make "tiers" of metadata, ie, + // decode docFreq/totalTF but don't decode postings + // metadata; this way caller could get + // docFreq/totalTF w/o paying decode cost for + // postings + state.docFreq = freqReader.readVInt(); + if (!fieldInfo.omitTermFreqAndPositions) { + state.totalTermFreq = state.docFreq + freqReader.readVLong(); + } + postingsReader.nextTerm(fieldInfo, state); + metaDataUpto++; + state.termCount++; + } + } else { + //System.out.println(" skip! seekPending"); + } + } + } + } +} diff --git a/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsWriter.java b/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsWriter.java new file mode 100644 index 00000000000..c60b42506ed --- /dev/null +++ b/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsWriter.java @@ -0,0 +1,316 @@ +package org.apache.lucene.index.codecs; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Comparator; +import java.util.List; + +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.FieldInfos; +import org.apache.lucene.index.IndexFileNames; +import org.apache.lucene.index.SegmentWriteState; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.store.RAMOutputStream; +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.CodecUtil; +import org.apache.lucene.util.RamUsageEstimator; + +// TODO: currently we encode all terms between two indexed +// terms as a block; but, we could decouple the two, ie +// allow several blocks in between two indexed terms + +/** + * Writes terms dict, block-encoding (column stride) each + * term's metadata for each set of terms between two + * index terms. + * + * @lucene.experimental + */ + +public class BlockTermsWriter extends FieldsConsumer { + + final static String CODEC_NAME = "BLOCK_TERMS_DICT"; + + // Initial format + public static final int VERSION_START = 0; + + public static final int VERSION_CURRENT = VERSION_START; + + /** Extension of terms file */ + static final String TERMS_EXTENSION = "tib"; + + protected final IndexOutput out; + final PostingsWriterBase postingsWriter; + final FieldInfos fieldInfos; + FieldInfo currentField; + private final TermsIndexWriterBase termsIndexWriter; + private final List fields = new ArrayList(); + private final Comparator termComp; + private final String segment; + + public BlockTermsWriter( + TermsIndexWriterBase termsIndexWriter, + SegmentWriteState state, + PostingsWriterBase postingsWriter, + Comparator termComp) throws IOException + { + final String termsFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, TERMS_EXTENSION); + this.termsIndexWriter = termsIndexWriter; + this.termComp = termComp; + out = state.directory.createOutput(termsFileName); + fieldInfos = state.fieldInfos; + writeHeader(out); + currentField = null; + this.postingsWriter = postingsWriter; + segment = state.segmentName; + + //System.out.println("BTW.init seg=" + state.segmentName); + + postingsWriter.start(out); // have consumer write its format/header + } + + protected void writeHeader(IndexOutput out) throws IOException { + CodecUtil.writeHeader(out, CODEC_NAME, VERSION_CURRENT); + + out.writeLong(0); // leave space for end index pointer + } + + @Override + public TermsConsumer addField(FieldInfo field) throws IOException { + //System.out.println("\nBTW.addField seg=" + segment + " field=" + field.name); + assert currentField == null || currentField.name.compareTo(field.name) < 0; + currentField = field; + TermsIndexWriterBase.FieldWriter fieldIndexWriter = termsIndexWriter.addField(field, out.getFilePointer()); + final TermsWriter terms = new TermsWriter(fieldIndexWriter, field, postingsWriter); + fields.add(terms); + return terms; + } + + @Override + public void close() throws IOException { + + try { + + int nonZeroCount = 0; + for(TermsWriter field : fields) { + if (field.numTerms > 0) { + nonZeroCount++; + } + } + + final long dirStart = out.getFilePointer(); + + out.writeVInt(nonZeroCount); + for(TermsWriter field : fields) { + if (field.numTerms > 0) { + out.writeVInt(field.fieldInfo.number); + out.writeVLong(field.numTerms); + out.writeVLong(field.termsStartPointer); + if (!field.fieldInfo.omitTermFreqAndPositions) { + out.writeVLong(field.sumTotalTermFreq); + } + } + } + writeTrailer(dirStart); + } finally { + try { + out.close(); + } finally { + try { + postingsWriter.close(); + } finally { + termsIndexWriter.close(); + } + } + } + } + + protected void writeTrailer(long dirStart) throws IOException { + // TODO Auto-generated method stub + out.seek(CodecUtil.headerLength(CODEC_NAME)); + out.writeLong(dirStart); + } + + private static class TermEntry { + public final BytesRef term = new BytesRef(); + public TermStats stats; + } + + class TermsWriter extends TermsConsumer { + private final FieldInfo fieldInfo; + private final PostingsWriterBase postingsWriter; + private final long termsStartPointer; + private long numTerms; + private final TermsIndexWriterBase.FieldWriter fieldIndexWriter; + long sumTotalTermFreq; + private final BytesRef lastTerm = new BytesRef(); + + private TermEntry[] pendingTerms; + + private int pendingCount; + + TermsWriter( + TermsIndexWriterBase.FieldWriter fieldIndexWriter, + FieldInfo fieldInfo, + PostingsWriterBase postingsWriter) + { + this.fieldInfo = fieldInfo; + this.fieldIndexWriter = fieldIndexWriter; + pendingTerms = new TermEntry[32]; + for(int i=0;i getComparator() { + return termComp; + } + + @Override + public PostingsConsumer startTerm(BytesRef text) throws IOException { + //System.out.println("BTW.startTerm seg=" + segment + " term=" + fieldInfo.name + ":" + text.utf8ToString() + " " + text); + postingsWriter.startTerm(); + return postingsWriter; + } + + private final BytesRef lastPrevTerm = new BytesRef(); + + @Override + public void finishTerm(BytesRef text, TermStats stats) throws IOException { + + assert stats.docFreq > 0; + //System.out.println("BTW.finishTerm seg=" + segment + " term=" + fieldInfo.name + ":" + text.utf8ToString() + " " + text + " df=" + stats.docFreq); + + final boolean isIndexTerm = fieldIndexWriter.checkIndexTerm(text, stats); + + if (isIndexTerm) { + if (pendingCount > 0) { + // Instead of writing each term, live, we gather terms + // in RAM in a pending buffer, and then write the + // entire block in between index terms: + flushBlock(); + } + fieldIndexWriter.add(text, stats, out.getFilePointer()); + } + + if (pendingTerms.length == pendingCount) { + final TermEntry[] newArray = new TermEntry[ArrayUtil.oversize(pendingCount+1, RamUsageEstimator.NUM_BYTES_OBJECT_REF)]; + System.arraycopy(pendingTerms, 0, newArray, 0, pendingCount); + for(int i=pendingCount;i 0) { + flushBlock(); + } + // EOF marker: + out.writeVInt(0); + + this.sumTotalTermFreq = sumTotalTermFreq; + fieldIndexWriter.finish(out.getFilePointer()); + } + + private int sharedPrefix(BytesRef term1, BytesRef term2) { + assert term1.offset == 0; + assert term2.offset == 0; + int pos1 = 0; + int pos1End = pos1 + Math.min(term1.length, term2.length); + int pos2 = 0; + while(pos1 < pos1End) { + if (term1.bytes[pos1] != term2.bytes[pos2]) { + return pos1; + } + pos1++; + pos2++; + } + return pos1; + } + + private final RAMOutputStream bytesWriter = new RAMOutputStream(); + + private void flushBlock() throws IOException { + //System.out.println("BTW.flushBlock pendingCount=" + pendingCount); + + // First pass: compute common prefix for all terms + // in the block, against term before first term in + // this block: + int commonPrefix = sharedPrefix(lastPrevTerm, pendingTerms[0].term); + for(int termCount=1;termCount attClass, String key, Object value); + +} diff --git a/lucene/src/test/org/apache/lucene/analysis/tokenattributes/TestSimpleAttributeImpl.java b/lucene/src/test/org/apache/lucene/analysis/tokenattributes/TestSimpleAttributeImpl.java new file mode 100644 index 00000000000..b8e9a0df7e2 --- /dev/null +++ b/lucene/src/test/org/apache/lucene/analysis/tokenattributes/TestSimpleAttributeImpl.java @@ -0,0 +1,46 @@ +package org.apache.lucene.analysis.tokenattributes; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.util._TestUtil; +import org.apache.lucene.util.LuceneTestCase; + +import java.util.Collections; +import java.util.HashMap; + +public class TestSimpleAttributeImpl extends LuceneTestCase { + + // this checks using reflection API if the defaults are correct + public void testAttributes() { + _TestUtil.assertAttributeReflection(new PositionIncrementAttributeImpl(), + Collections.singletonMap(PositionIncrementAttribute.class.getName()+"#positionIncrement", 1)); + _TestUtil.assertAttributeReflection(new FlagsAttributeImpl(), + Collections.singletonMap(FlagsAttribute.class.getName()+"#flags", 0)); + _TestUtil.assertAttributeReflection(new TypeAttributeImpl(), + Collections.singletonMap(TypeAttribute.class.getName()+"#type", TypeAttribute.DEFAULT_TYPE)); + _TestUtil.assertAttributeReflection(new PayloadAttributeImpl(), + Collections.singletonMap(PayloadAttribute.class.getName()+"#payload", null)); + _TestUtil.assertAttributeReflection(new KeywordAttributeImpl(), + Collections.singletonMap(KeywordAttribute.class.getName()+"#keyword", false)); + _TestUtil.assertAttributeReflection(new OffsetAttributeImpl(), new HashMap() {{ + put(OffsetAttribute.class.getName()+"#startOffset", 0); + put(OffsetAttribute.class.getName()+"#endOffset", 0); + }}); + } + +} diff --git a/solr/contrib/analysis-extras/CHANGES.txt b/solr/contrib/analysis-extras/CHANGES.txt new file mode 100644 index 00000000000..e512decaa99 --- /dev/null +++ b/solr/contrib/analysis-extras/CHANGES.txt @@ -0,0 +1,30 @@ + Apache Solr - Analysis Extras + Release Notes + +Introduction +------------ +The analysis-extras plugin provides additional analyzers that rely +upon large dependencies/dictionaries. + +It includes integration with ICU for multilingual support, and +analyzers for Chinese and Polish. + + +$Id$ +================== Release 4.0-dev ================== + +(No Changes) + +================== Release 3.1-dev ================== + +* SOLR-2210: Add icu-based tokenizer and filters to contrib/analysis-extras (rmuir) + +* SOLR-1336: Add SmartChinese (word segmentation for Simplified Chinese) + tokenizer and filters to contrib/analysis-extras (rmuir) + +* SOLR-2211,LUCENE-2763: Added UAX29URLEmailTokenizerFactory, which implements + UAX#29, a unicode algorithm with good results for most languages, as well as + URL and E-mail tokenization according to the relevant RFCs. + (Tom Burton-West via rmuir) + +* SOLR-2237: Added StempelPolishStemFilterFactory to contrib/analysis-extras (rmuir) diff --git a/solr/src/java/org/apache/solr/search/SolrSortField.java b/solr/src/java/org/apache/solr/search/SolrSortField.java new file mode 100644 index 00000000000..8b21e4357bd --- /dev/null +++ b/solr/src/java/org/apache/solr/search/SolrSortField.java @@ -0,0 +1,31 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +package org.apache.solr.search; + +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.SortField; + +import java.io.IOException; + +/**@lucene.internal + * + */ +public interface SolrSortField { + public SortField weight(IndexSearcher searcher) throws IOException; +} diff --git a/solr/src/test/org/apache/solr/response/TestPHPSerializedResponseWriter.java b/solr/src/test/org/apache/solr/response/TestPHPSerializedResponseWriter.java new file mode 100644 index 00000000000..d67e1fb8cb8 --- /dev/null +++ b/solr/src/test/org/apache/solr/response/TestPHPSerializedResponseWriter.java @@ -0,0 +1,107 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.response; + +import java.io.IOException; +import java.io.StringWriter; +import java.util.Arrays; +import java.util.LinkedHashMap; + +import org.apache.solr.SolrTestCaseJ4; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.response.PHPSerializedResponseWriter; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.response.QueryResponseWriter; +import org.apache.solr.response.SolrQueryResponse; +import org.apache.solr.common.SolrDocument; +import org.apache.solr.common.SolrDocumentList; +import org.junit.BeforeClass; +import org.junit.Test; + +/** + * Basic PHPS tests based on JSONWriterTest + * + */ +public class TestPHPSerializedResponseWriter extends SolrTestCaseJ4 { + @BeforeClass + public static void beforeClass() throws Exception { + initCore("solrconfig.xml","schema.xml"); + } + + @Test + public void testSimple() throws IOException { + SolrQueryRequest req = req("dummy"); + SolrQueryResponse rsp = new SolrQueryResponse(); + QueryResponseWriter w = new PHPSerializedResponseWriter(); + + StringWriter buf = new StringWriter(); + rsp.add("data1", "hello"); + rsp.add("data2", 42); + rsp.add("data3", true); + w.write(buf, req, rsp); + assertEquals("a:3:{s:5:\"data1\";s:5:\"hello\";s:5:\"data2\";i:42;s:5:\"data3\";b:1;}", + buf.toString()); + req.close(); + } + + + @Test + public void testSolrDocuments() throws IOException { + SolrQueryRequest req = req("q","*:*"); + SolrQueryResponse rsp = new SolrQueryResponse(); + QueryResponseWriter w = new PHPSerializedResponseWriter(); + StringWriter buf = new StringWriter(); + + SolrDocument d = new SolrDocument(); + + SolrDocument d1 = d; + d.addField("id","1"); + d.addField("data1","hello"); + d.addField("data2",42); + d.addField("data3",true); + + // multivalued fields: + + // extremely odd edge case: value is a map + + // we use LinkedHashMap because we are doing a string comparison + // later and we need predictible ordering + LinkedHashMap nl = new LinkedHashMap(); + nl.put("data4.1", "hashmap"); + nl.put("data4.2", "hello"); + d.addField("data4",nl); + // array value + d.addField("data5",Arrays.asList("data5.1", "data5.2", "data5.3")); + + // adding one more document to test array indexes + d = new SolrDocument(); + SolrDocument d2 = d; + d.addField("id","2"); + + SolrDocumentList sdl = new SolrDocumentList(); + sdl.add(d1); + sdl.add(d2); + rsp.add("response", sdl); + + w.write(buf, req, rsp); + assertEquals("a:1:{s:8:\"response\";a:3:{s:8:\"numFound\";i:0;s:5:\"start\";i:0;s:4:\"docs\";a:2:{i:0;a:6:{s:2:\"id\";s:1:\"1\";s:5:\"data1\";s:5:\"hello\";s:5:\"data2\";i:42;s:5:\"data3\";b:1;s:5:\"data4\";a:2:{s:7:\"data4.1\";s:7:\"hashmap\";s:7:\"data4.2\";s:5:\"hello\";}s:5:\"data5\";a:3:{i:0;s:7:\"data5.1\";i:1;s:7:\"data5.2\";i:2;s:7:\"data5.3\";}}i:1;a:1:{s:2:\"id\";s:1:\"2\";}}}}", + buf.toString()); + req.close(); + } + +}