mirror of https://github.com/apache/lucene.git
Add BaseKnnVectorsFormatTestCase.testRecall() and fix old codecs (#13910)
* Add BaseKnnVectorsFormatTestCase.testRecall() and fix map ord to doc in Lucene90HnswVectorsReader
This commit is contained in:
parent
1faf33a02a
commit
3983fa2c8d
|
@ -260,7 +260,7 @@ public final class Lucene90HnswVectorsReader extends KnnVectorsReader {
|
|||
int node = results.topNode();
|
||||
float minSimilarity = results.topScore();
|
||||
results.pop();
|
||||
knnCollector.collect(node, minSimilarity);
|
||||
knnCollector.collect(vectorValues.ordToDoc(node), minSimilarity);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -236,7 +236,7 @@ public final class Lucene91HnswGraphBuilder {
|
|||
// extract all the Neighbors from the queue into an array; these will now be
|
||||
// sorted from worst to best
|
||||
for (int i = 0; i < candidateCount; i++) {
|
||||
float similarity = candidates.minCompetitiveSimilarity();
|
||||
float similarity = candidates.minimumScore();
|
||||
scratch.add(candidates.popNode(), similarity);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -555,7 +555,7 @@ public class HnswGraphBuilder implements HnswBuilder {
|
|||
return queue.nodes();
|
||||
}
|
||||
|
||||
float minimumScore() {
|
||||
public float minimumScore() {
|
||||
return queue.topScore();
|
||||
}
|
||||
|
||||
|
|
|
@ -102,6 +102,11 @@ public class TestLucene99ScalarQuantizedVectorsFormat extends BaseKnnVectorsForm
|
|||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void testRecall() {
|
||||
// ignore this test since this class always returns no results from search
|
||||
}
|
||||
|
||||
public void testQuantizedVectorsWriteAndRead() throws Exception {
|
||||
// create lucene directory with codec
|
||||
int numVectors = 1 + random().nextInt(50);
|
||||
|
|
|
@ -562,7 +562,6 @@ public class TestKnnGraph extends LuceneTestCase {
|
|||
String idString = Integer.toString(id);
|
||||
doc.add(new StringField("id", idString, Field.Store.YES));
|
||||
doc.add(new SortedDocValuesField("id", new BytesRef(idString)));
|
||||
// XSSystem.out.println("add " + idString + " " + Arrays.toString(vector));
|
||||
iw.updateDocument(new Term("id", idString), doc);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -16,15 +16,21 @@
|
|||
*/
|
||||
package org.apache.lucene.tests.index;
|
||||
|
||||
import static com.carrotsearch.randomizedtesting.RandomizedTest.randomIntBetween;
|
||||
import static java.nio.charset.StandardCharsets.UTF_8;
|
||||
import static org.apache.lucene.index.VectorSimilarityFunction.DOT_PRODUCT;
|
||||
import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.atomic.AtomicReference;
|
||||
|
@ -70,6 +76,10 @@ import org.apache.lucene.index.Term;
|
|||
import org.apache.lucene.index.VectorEncoding;
|
||||
import org.apache.lucene.index.VectorSimilarityFunction;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.KnnFloatVectorQuery;
|
||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.Sort;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
|
@ -1906,4 +1916,168 @@ public abstract class BaseKnnVectorsFormatTestCase extends BaseIndexFileFormatTe
|
|||
|
||||
IOUtils.close(reader, w2, dir1, dir2);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test that the query is a viable approximation to exact search. This test is designed to uncover
|
||||
* gross failures only, not to represent the true expected recall.
|
||||
*/
|
||||
public void testRecall() throws IOException {
|
||||
VectorSimilarityFunction[] functions = {
|
||||
VectorSimilarityFunction.EUCLIDEAN,
|
||||
VectorSimilarityFunction.COSINE,
|
||||
VectorSimilarityFunction.DOT_PRODUCT,
|
||||
VectorSimilarityFunction.MAXIMUM_INNER_PRODUCT
|
||||
};
|
||||
for (VectorSimilarityFunction similarity : functions) {
|
||||
assertRecall(similarity, 0.5, 1.0);
|
||||
}
|
||||
}
|
||||
|
||||
protected void assertRecall(VectorSimilarityFunction similarity, double min, double max)
|
||||
throws IOException {
|
||||
int dim = 16;
|
||||
int recalled = 0;
|
||||
try (Directory indexStore = getKnownIndexStore("field", dim, similarity);
|
||||
IndexReader reader = DirectoryReader.open(indexStore)) {
|
||||
IndexSearcher searcher = newSearcher(reader);
|
||||
float[] queryEmbedding = new float[dim];
|
||||
// indexed 421 lines from LICENSE.txt
|
||||
// indexed 157 lines from NOTICE.txt
|
||||
int topK = 10;
|
||||
int numQueries = 578;
|
||||
String[] testQueries = {
|
||||
"Apache Lucene",
|
||||
"Apache License",
|
||||
"TERMS AND CONDITIONS",
|
||||
"Copyright 2001",
|
||||
"Permission is hereby",
|
||||
"Copyright © 2003",
|
||||
"The dictionary comes from Morfologik project",
|
||||
"The levenshtein automata tables"
|
||||
};
|
||||
for (String queryString : testQueries) {
|
||||
computeLineEmbedding(queryString, queryEmbedding);
|
||||
|
||||
// pass match-all "filter" to force full traversal, bypassing graph
|
||||
KnnFloatVectorQuery exactQuery =
|
||||
new KnnFloatVectorQuery("field", queryEmbedding, 1000, new MatchAllDocsQuery());
|
||||
assertEquals(numQueries, searcher.count(exactQuery)); // Same for exact search
|
||||
|
||||
KnnFloatVectorQuery query = new KnnFloatVectorQuery("field", queryEmbedding, topK);
|
||||
assertEquals(10, searcher.count(query)); // Expect some results without timeout
|
||||
TopDocs results = searcher.search(query, topK);
|
||||
Set<Integer> resultDocs = new HashSet<>();
|
||||
int i = 0;
|
||||
for (ScoreDoc scoreDoc : results.scoreDocs) {
|
||||
if (VERBOSE) {
|
||||
System.out.println(
|
||||
"result "
|
||||
+ i++
|
||||
+ ": "
|
||||
+ reader.storedFields().document(scoreDoc.doc)
|
||||
+ " "
|
||||
+ scoreDoc);
|
||||
}
|
||||
resultDocs.add(scoreDoc.doc);
|
||||
}
|
||||
TopDocs expected = searcher.search(exactQuery, topK);
|
||||
i = 0;
|
||||
for (ScoreDoc scoreDoc : expected.scoreDocs) {
|
||||
if (VERBOSE) {
|
||||
System.out.println(
|
||||
"expected "
|
||||
+ i++
|
||||
+ ": "
|
||||
+ reader.storedFields().document(scoreDoc.doc)
|
||||
+ " "
|
||||
+ scoreDoc);
|
||||
}
|
||||
if (resultDocs.contains(scoreDoc.doc)) {
|
||||
++recalled;
|
||||
}
|
||||
}
|
||||
}
|
||||
int totalResults = testQueries.length * topK;
|
||||
assertTrue(
|
||||
"Average recall for "
|
||||
+ similarity
|
||||
+ " should be at least "
|
||||
+ (totalResults * min)
|
||||
+ " / "
|
||||
+ totalResults
|
||||
+ ", got "
|
||||
+ recalled,
|
||||
recalled >= (int) (totalResults * min));
|
||||
assertTrue(
|
||||
"Average recall for "
|
||||
+ similarity
|
||||
+ " should be no more than "
|
||||
+ (totalResults * max)
|
||||
+ " / "
|
||||
+ totalResults
|
||||
+ ", got "
|
||||
+ recalled,
|
||||
recalled <= (int) (totalResults * max));
|
||||
}
|
||||
}
|
||||
|
||||
/** Creates a new directory and adds documents with the given vectors as kNN vector fields */
|
||||
Directory getKnownIndexStore(
|
||||
String field, int dimension, VectorSimilarityFunction vectorSimilarityFunction)
|
||||
throws IOException {
|
||||
Directory indexStore = newDirectory(random());
|
||||
IndexWriter writer = new IndexWriter(indexStore, newIndexWriterConfig());
|
||||
float[] scratch = new float[dimension];
|
||||
for (String file : List.of("LICENSE.txt", "NOTICE.txt")) {
|
||||
try (InputStream in = BaseKnnVectorsFormatTestCase.class.getResourceAsStream(file);
|
||||
BufferedReader reader = new BufferedReader(new InputStreamReader(in, UTF_8))) {
|
||||
String line;
|
||||
int lineNo = -1;
|
||||
while ((line = reader.readLine()) != null) {
|
||||
line = line.strip();
|
||||
if (line.isEmpty()) {
|
||||
continue;
|
||||
}
|
||||
++lineNo;
|
||||
Document doc = new Document();
|
||||
doc.add(
|
||||
new KnnFloatVectorField(
|
||||
field, computeLineEmbedding(line, scratch), vectorSimilarityFunction));
|
||||
doc.add(new StoredField("text", line));
|
||||
doc.add(new StringField("id", file + "." + lineNo, Field.Store.YES));
|
||||
writer.addDocument(doc);
|
||||
if (random().nextBoolean()) {
|
||||
// Add some documents without a vector
|
||||
addDocuments(writer, "id" + lineNo + ".", randomIntBetween(1, 5));
|
||||
}
|
||||
}
|
||||
// System.out.println("indexed " + (lineNo + 1) + " lines from " + file);
|
||||
}
|
||||
}
|
||||
// Add some documents without a vector nor an id
|
||||
addDocuments(writer, null, 5);
|
||||
writer.close();
|
||||
return indexStore;
|
||||
}
|
||||
|
||||
private float[] computeLineEmbedding(String line, float[] vector) {
|
||||
Arrays.fill(vector, 0);
|
||||
for (int i = 0; i < line.length(); i++) {
|
||||
char c = line.charAt(i);
|
||||
vector[i % vector.length] += c / ((float) (i + 1) / vector.length);
|
||||
}
|
||||
VectorUtil.l2normalize(vector, false);
|
||||
return vector;
|
||||
}
|
||||
|
||||
private void addDocuments(IndexWriter writer, String idBase, int count) throws IOException {
|
||||
for (int i = 0; i < count; i++) {
|
||||
Document doc = new Document();
|
||||
doc.add(new StringField("other", "value", Field.Store.NO));
|
||||
if (idBase != null) {
|
||||
doc.add(new StringField("id", idBase + i, Field.Store.YES));
|
||||
}
|
||||
writer.addDocument(doc);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,507 @@
|
|||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
||||
|
||||
|
||||
Some code in core/src/java/org/apache/lucene/util/UnicodeUtil.java was
|
||||
derived from unicode conversion examples available at
|
||||
http://www.unicode.org/Public/PROGRAMS/CVTUTF. Here is the copyright
|
||||
from those sources:
|
||||
|
||||
/*
|
||||
* Copyright 2001-2004 Unicode, Inc.
|
||||
*
|
||||
* Disclaimer
|
||||
*
|
||||
* This source code is provided as is by Unicode, Inc. No claims are
|
||||
* made as to fitness for any particular purpose. No warranties of any
|
||||
* kind are expressed or implied. The recipient agrees to determine
|
||||
* applicability of information provided. If this file has been
|
||||
* purchased on magnetic or optical media from Unicode, Inc., the
|
||||
* sole remedy for any claim will be exchange of defective media
|
||||
* within 90 days of receipt.
|
||||
*
|
||||
* Limitations on Rights to Redistribute This Code
|
||||
*
|
||||
* Unicode, Inc. hereby grants the right to freely use the information
|
||||
* supplied in this file in the creation of products supporting the
|
||||
* Unicode Standard, and to make copies of this file in any form
|
||||
* for internal or external distribution as long as this notice
|
||||
* remains attached.
|
||||
*/
|
||||
|
||||
|
||||
Some code in core/src/java/org/apache/lucene/util/ArrayUtil.java was
|
||||
derived from Python 2.4.2 sources available at
|
||||
http://www.python.org. Full license is here:
|
||||
|
||||
http://www.python.org/download/releases/2.4.2/license/
|
||||
|
||||
Some code in core/src/java/org/apache/lucene/util/UnicodeUtil.java was
|
||||
derived from Python 3.1.2 sources available at
|
||||
http://www.python.org. Full license is here:
|
||||
|
||||
http://www.python.org/download/releases/3.1.2/license/
|
||||
|
||||
Some code in core/src/java/org/apache/lucene/util/automaton was
|
||||
derived from Brics automaton sources available at
|
||||
www.brics.dk/automaton/. Here is the copyright from those sources:
|
||||
|
||||
/*
|
||||
* Copyright (c) 2001-2009 Anders Moeller
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. The name of the author may not be used to endorse or promote products
|
||||
* derived from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
The levenshtein automata tables in core/src/java/org/apache/lucene/util/automaton
|
||||
were automatically generated with the moman/finenight FSA package.
|
||||
Here is the copyright for those sources:
|
||||
|
||||
# Copyright (c) 2010, Jean-Philippe Barrette-LaPierre, <jpb@rrette.com>
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person
|
||||
# obtaining a copy of this software and associated documentation
|
||||
# files (the "Software"), to deal in the Software without
|
||||
# restriction, including without limitation the rights to use,
|
||||
# copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the
|
||||
# Software is furnished to do so, subject to the following
|
||||
# conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be
|
||||
# included in all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
# OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
Some code in core/src/java/org/apache/lucene/util/UnicodeUtil.java was
|
||||
derived from ICU (http://www.icu-project.org)
|
||||
The full license is available here:
|
||||
https://github.com/unicode-org/icu/blob/main/icu4c/LICENSE
|
||||
|
||||
/*
|
||||
* Copyright (C) 1999-2010, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, and/or sell copies of the
|
||||
* Software, and to permit persons to whom the Software is furnished to do so,
|
||||
* provided that the above copyright notice(s) and this permission notice appear
|
||||
* in all copies of the Software and that both the above copyright notice(s) and
|
||||
* this permission notice appear in supporting documentation.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.
|
||||
* IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE BE
|
||||
* LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR
|
||||
* ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
|
||||
* IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
|
||||
* OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*
|
||||
* Except as contained in this notice, the name of a copyright holder shall not
|
||||
* be used in advertising or otherwise to promote the sale, use or other
|
||||
* dealings in this Software without prior written authorization of the
|
||||
* copyright holder.
|
||||
*/
|
||||
|
||||
The following license applies to the Snowball stemmers:
|
||||
|
||||
Copyright (c) 2001, Dr Martin Porter
|
||||
Copyright (c) 2002, Richard Boulton
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holders nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
The following license applies to the KStemmer:
|
||||
|
||||
Copyright © 2003,
|
||||
Center for Intelligent Information Retrieval,
|
||||
University of Massachusetts, Amherst.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
3. The names "Center for Intelligent Information Retrieval" and
|
||||
"University of Massachusetts" must not be used to endorse or promote products
|
||||
derived from this software without prior written permission. To obtain
|
||||
permission, contact info@ciir.cs.umass.edu.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY UNIVERSITY OF MASSACHUSETTS AND OTHER CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
||||
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
|
||||
GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
SUCH DAMAGE.
|
||||
|
||||
The following license applies to the Morfologik project:
|
||||
|
||||
Copyright (c) 2006 Dawid Weiss
|
||||
Copyright (c) 2007-2011 Dawid Weiss, Marcin Miłkowski
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Morfologik nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
|
||||
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
---
|
||||
|
||||
The dictionary comes from Morfologik project. Morfologik uses data from
|
||||
Polish ispell/myspell dictionary hosted at http://www.sjp.pl/slownik/en/ and
|
||||
is licenced on the terms of (inter alia) LGPL and Creative Commons
|
||||
ShareAlike. The part-of-speech tags were added in Morfologik project and
|
||||
are not found in the data from sjp.pl. The tagset is similar to IPI PAN
|
||||
tagset.
|
||||
|
||||
---
|
||||
|
||||
The following license applies to the Morfeusz project,
|
||||
used by org.apache.lucene.analysis.morfologik.
|
||||
|
||||
BSD-licensed dictionary of Polish (SGJP)
|
||||
http://sgjp.pl/morfeusz/
|
||||
|
||||
Copyright © 2011 Zygmunt Saloni, Włodzimierz Gruszczyński,
|
||||
Marcin Woliński, Robert Wołosz
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the
|
||||
distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY COPYRIGHT HOLDERS “AS IS” AND ANY EXPRESS
|
||||
OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL COPYRIGHT HOLDERS OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
|
||||
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
||||
WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
|
||||
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
|
||||
IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
---
|
||||
|
||||
core/src/java/org/apache/lucene/util/compress/LZ4.java is a Java
|
||||
implementation of the LZ4 (https://github.com/lz4/lz4/tree/dev/lib)
|
||||
compression format for Lucene's DataInput/DataOutput abstractions.
|
||||
|
||||
LZ4 Library
|
||||
Copyright (c) 2011-2016, Yann Collet
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright notice, this
|
||||
list of conditions and the following disclaimer in the documentation and/or
|
||||
other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
|
||||
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
|
@ -0,0 +1,197 @@
|
|||
Apache Lucene
|
||||
Copyright 2001-2022 The Apache Software Foundation
|
||||
|
||||
This product includes software developed at
|
||||
The Apache Software Foundation (http://www.apache.org/).
|
||||
|
||||
Includes software from other Apache Software Foundation projects,
|
||||
including, but not limited to:
|
||||
- Apache Jakarta Regexp
|
||||
- Apache Commons
|
||||
- Apache Xerces
|
||||
|
||||
ICU4J, (under analysis/icu) is licensed under an MIT styles license
|
||||
and Copyright (c) 1995-2008 International Business Machines Corporation and others
|
||||
|
||||
Some data files (under analysis/icu/src/data) are derived from Unicode data such
|
||||
as the Unicode Character Database. See http://unicode.org/copyright.html for more
|
||||
details.
|
||||
|
||||
Brics Automaton (under core/src/java/org/apache/lucene/util/automaton) is
|
||||
BSD-licensed, created by Anders Møller. See http://www.brics.dk/automaton/
|
||||
|
||||
The levenshtein automata tables (under core/src/java/org/apache/lucene/util/automaton) were
|
||||
automatically generated with the moman/finenight FSA library, created by
|
||||
Jean-Philippe Barrette-LaPierre. This library is available under an MIT license,
|
||||
see http://sites.google.com/site/rrettesite/moman and
|
||||
http://bitbucket.org/jpbarrette/moman/overview/
|
||||
|
||||
The class org.apache.lucene.util.WeakIdentityMap was derived from
|
||||
the Apache CXF project and is Apache License 2.0.
|
||||
|
||||
The class org.apache.lucene.util.compress.LZ4 is a Java rewrite of the LZ4
|
||||
compression library (https://github.com/lz4/lz4/tree/dev/lib) that is licensed
|
||||
under the 2-clause BSD license.
|
||||
(https://opensource.org/licenses/bsd-license.php)
|
||||
|
||||
The Google Code Prettify is Apache License 2.0.
|
||||
See http://code.google.com/p/google-code-prettify/
|
||||
|
||||
This product includes code (JaspellTernarySearchTrie) from Java Spelling Checkin
|
||||
g Package (jaspell): http://jaspell.sourceforge.net/
|
||||
License: The BSD License (http://www.opensource.org/licenses/bsd-license.php)
|
||||
|
||||
The snowball stemmers in
|
||||
analysis/common/src/java/net/sf/snowball
|
||||
were developed by Martin Porter and Richard Boulton.
|
||||
The snowball stopword lists in
|
||||
analysis/common/src/resources/org/apache/lucene/analysis/snowball
|
||||
were developed by Martin Porter and Richard Boulton.
|
||||
The full snowball package is available from
|
||||
https://snowballstem.org/
|
||||
|
||||
The KStem stemmer in
|
||||
analysis/common/src/org/apache/lucene/analysis/en
|
||||
was developed by Bob Krovetz and Sergio Guzman-Lara (CIIR-UMass Amherst)
|
||||
under the BSD-license.
|
||||
|
||||
The Arabic,Persian,Romanian,Bulgarian, Hindi and Bengali analyzers (common) come with a default
|
||||
stopword list that is BSD-licensed created by Jacques Savoy. These files reside in:
|
||||
analysis/common/src/resources/org/apache/lucene/analysis/ar/stopwords.txt,
|
||||
analysis/common/src/resources/org/apache/lucene/analysis/fa/stopwords.txt,
|
||||
analysis/common/src/resources/org/apache/lucene/analysis/ro/stopwords.txt,
|
||||
analysis/common/src/resources/org/apache/lucene/analysis/bg/stopwords.txt,
|
||||
analysis/common/src/resources/org/apache/lucene/analysis/hi/stopwords.txt,
|
||||
analysis/common/src/resources/org/apache/lucene/analysis/bn/stopwords.txt
|
||||
See http://members.unine.ch/jacques.savoy/clef/index.html.
|
||||
|
||||
The German,Spanish,Finnish,French,Hungarian,Italian,Portuguese,Russian and Swedish light stemmers
|
||||
(common) are based on BSD-licensed reference implementations created by Jacques Savoy and
|
||||
Ljiljana Dolamic. These files reside in:
|
||||
analysis/common/src/java/org/apache/lucene/analysis/de/GermanLightStemmer.java
|
||||
analysis/common/src/java/org/apache/lucene/analysis/de/GermanMinimalStemmer.java
|
||||
analysis/common/src/java/org/apache/lucene/analysis/es/SpanishLightStemmer.java
|
||||
analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishLightStemmer.java
|
||||
analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchLightStemmer.java
|
||||
analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchMinimalStemmer.java
|
||||
analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianLightStemmer.java
|
||||
analysis/common/src/java/org/apache/lucene/analysis/it/ItalianLightStemmer.java
|
||||
analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseLightStemmer.java
|
||||
analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLightStemmer.java
|
||||
analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishLightStemmer.java
|
||||
|
||||
The Stempel analyzer (stempel) includes BSD-licensed software developed
|
||||
by the Egothor project http://egothor.sf.net/, created by Leo Galambos, Martin Kvapil,
|
||||
and Edmond Nolan.
|
||||
|
||||
The Polish analyzer (stempel) comes with a default
|
||||
stopword list that is BSD-licensed created by the Carrot2 project. The file resides
|
||||
in stempel/src/resources/org/apache/lucene/analysis/pl/stopwords.txt.
|
||||
See https://github.com/carrot2/carrot2.
|
||||
|
||||
The SmartChineseAnalyzer source code (smartcn) was
|
||||
provided by Xiaoping Gao and copyright 2009 by www.imdict.net.
|
||||
|
||||
WordBreakTestUnicode_*.java (under modules/analysis/common/src/test/)
|
||||
is derived from Unicode data such as the Unicode Character Database.
|
||||
See http://unicode.org/copyright.html for more details.
|
||||
|
||||
The Morfologik analyzer (morfologik) includes BSD-licensed software
|
||||
developed by Dawid Weiss and Marcin Miłkowski
|
||||
(https://github.com/morfologik/morfologik-stemming) and uses
|
||||
data from the BSD-licensed dictionary of Polish (SGJP, http://sgjp.pl/morfeusz/).
|
||||
|
||||
===========================================================================
|
||||
Kuromoji Japanese Morphological Analyzer - Apache Lucene Integration
|
||||
===========================================================================
|
||||
|
||||
This software includes a binary and/or source version of data from
|
||||
|
||||
mecab-ipadic-2.7.0-20070801
|
||||
|
||||
which can be obtained from
|
||||
|
||||
http://atilika.com/releases/mecab-ipadic/mecab-ipadic-2.7.0-20070801.tar.gz
|
||||
|
||||
or
|
||||
|
||||
http://jaist.dl.sourceforge.net/project/mecab/mecab-ipadic/2.7.0-20070801/mecab-ipadic-2.7.0-20070801.tar.gz
|
||||
|
||||
===========================================================================
|
||||
mecab-ipadic-2.7.0-20070801 Notice
|
||||
===========================================================================
|
||||
|
||||
Nara Institute of Science and Technology (NAIST),
|
||||
the copyright holders, disclaims all warranties with regard to this
|
||||
software, including all implied warranties of merchantability and
|
||||
fitness, in no event shall NAIST be liable for
|
||||
any special, indirect or consequential damages or any damages
|
||||
whatsoever resulting from loss of use, data or profits, whether in an
|
||||
action of contract, negligence or other tortuous action, arising out
|
||||
of or in connection with the use or performance of this software.
|
||||
|
||||
A large portion of the dictionary entries
|
||||
originate from ICOT Free Software. The following conditions for ICOT
|
||||
Free Software applies to the current dictionary as well.
|
||||
|
||||
Each User may also freely distribute the Program, whether in its
|
||||
original form or modified, to any third party or parties, PROVIDED
|
||||
that the provisions of Section 3 ("NO WARRANTY") will ALWAYS appear
|
||||
on, or be attached to, the Program, which is distributed substantially
|
||||
in the same form as set out herein and that such intended
|
||||
distribution, if actually made, will neither violate or otherwise
|
||||
contravene any of the laws and regulations of the countries having
|
||||
jurisdiction over the User or the intended distribution itself.
|
||||
|
||||
NO WARRANTY
|
||||
|
||||
The program was produced on an experimental basis in the course of the
|
||||
research and development conducted during the project and is provided
|
||||
to users as so produced on an experimental basis. Accordingly, the
|
||||
program is provided without any warranty whatsoever, whether express,
|
||||
implied, statutory or otherwise. The term "warranty" used herein
|
||||
includes, but is not limited to, any warranty of the quality,
|
||||
performance, merchantability and fitness for a particular purpose of
|
||||
the program and the nonexistence of any infringement or violation of
|
||||
any right of any third party.
|
||||
|
||||
Each user of the program will agree and understand, and be deemed to
|
||||
have agreed and understood, that there is no warranty whatsoever for
|
||||
the program and, accordingly, the entire risk arising from or
|
||||
otherwise connected with the program is assumed by the user.
|
||||
|
||||
Therefore, neither ICOT, the copyright holder, or any other
|
||||
organization that participated in or was otherwise related to the
|
||||
development of the program and their respective officials, directors,
|
||||
officers and other employees shall be held liable for any and all
|
||||
damages, including, without limitation, general, special, incidental
|
||||
and consequential damages, arising out of or otherwise in connection
|
||||
with the use or inability to use the program or any product, material
|
||||
or result produced or otherwise obtained by using the program,
|
||||
regardless of whether they have been advised of, or otherwise had
|
||||
knowledge of, the possibility of such damages at any time during the
|
||||
project or thereafter. Each user will be deemed to have agreed to the
|
||||
foregoing by his or her commencement of use of the program. The term
|
||||
"use" as used herein includes, but is not limited to, the use,
|
||||
modification, copying and distribution of the program and the
|
||||
production of secondary products from the program.
|
||||
|
||||
In the case where the program, whether in its original form or
|
||||
modified, was distributed or delivered to or received by a user from
|
||||
any person, organization or entity other than ICOT, unless it makes or
|
||||
grants independently of ICOT any specific warranty to the user in
|
||||
writing, such person, organization or entity, will also be exempted
|
||||
from and not be held liable to the user for any such damages as noted
|
||||
above as far as the program is concerned.
|
||||
|
||||
===========================================================================
|
||||
Nori Korean Morphological Analyzer - Apache Lucene Integration
|
||||
===========================================================================
|
||||
|
||||
This software includes a binary and/or source version of data from
|
||||
|
||||
mecab-ko-dic-2.1.1-20180720
|
||||
|
||||
which can be obtained from
|
||||
|
||||
https://bitbucket.org/eunjeon/mecab-ko-dic/downloads/mecab-ko-dic-2.1.1-20180720.tar.gz
|
Loading…
Reference in New Issue