diff --git a/lucene/MIGRATE.txt b/lucene/MIGRATE.txt index 7de0f47d4de..d9cfefb2582 100644 --- a/lucene/MIGRATE.txt +++ b/lucene/MIGRATE.txt @@ -1,90 +1,16 @@ # Apache Lucene Migration Guide ## Query.hashCode and Query.equals are now abstract methods (LUCENE-7277) + Any custom query subclasses should redeclare equivalence relationship according to the subclass's details. See code patterns used in existing core Lucene query classes for details. -## The way how number of document calculated is changed (LUCENE-6711) -The number of documents (numDocs) is used to calculate term specificity (idf) and average document length (avdl). -Prior to LUCENE-6711, collectionStats.maxDoc() was used for the statistics. -Now, collectionStats.docCount() is used whenever possible, if not maxDocs() is used. +## CompressionTools removed (LUCENE-7322) -Assume that a collection contains 100 documents, and 50 of them have "keywords" field. -In this example, maxDocs is 100 while docCount is 50 for the "keywords" field. -The total number of tokens for "keywords" field is divided by docCount to obtain avdl. -Therefore, docCount which is the total number of documents that have at least one term for the field, is a more precise metric for optional fields. +Per-field compression has been superseded by codec-level compression, which has +the benefit of being able to compress several fields, or even documents at once, +yielding better compression ratios. In case you would still like to compress on +top of the codec, you can do it on the application side by using the utility +classes from the java.util.zip package. -DefaultSimilarity does not leverage avdl, so this change would have relatively minor change in the result list. -Because relative idf values of terms will remain same. -However, when combined with other factors such as term frequency, relative ranking of documents could change. -Some Similarity implementations (such as the ones instantiated with NormalizationH2 and BM25) take account into avdl and would have notable change in ranked list. -Especially if you have a collection of documents with varying lengths. -Because NormalizationH2 tends to punish documents longer than avdl. - -## FunctionValues.exist() Behavior Changes due to ValueSource bug fixes (LUCENE-5961) - -Bugs fixed in several ValueSource functions may result in different behavior in -situations where some documents do not have values for fields wrapped in other -ValueSources. Users who want to preserve the previous behavior may need to wrap -their ValueSources in a "DefFunction" along with a ConstValueSource of "0.0". - -## Removal of Filter and FilteredQuery (LUCENE-6301,LUCENE-6583) - -Filter and FilteredQuery have been removed. Regular queries can be used instead -of filters as they have been optimized for the filtering case. And you can -construct a BooleanQuery with one MUST clause for the query, and one FILTER -clause for the filter in order to have similar behaviour to FilteredQuery. - -## PhraseQuery and BooleanQuery made immutable (LUCENE-6531 LUCENE-6570) - -PhraseQuery and BooleanQuery are now immutable and have a builder API to help -construct them. For instance a BooleanQuery that used to be constructed like -this: - - BooleanQuery bq = new BooleanQuery(); - bq.add(q1, Occur.SHOULD); - bq.add(q2, Occur.SHOULD); - bq.add(q3, Occur.MUST); - bq.setMinimumNumberShouldMatch(1); - -can now be constructed this way using its builder: - - BooleanQuery bq = new BooleanQuery.Builder() - .add(q1, Occur.SHOULD) - .add(q2, Occur.SHOULD) - .add(q3, Occur.SHOULD) - .setMinimumNumberShouldMatch(1) - .build(); - -## AttributeImpl now requires that reflectWith() is implemented (LUCENE-6651) - -AttributeImpl removed the default, reflection-based implementation of -reflectWith(AtrributeReflector). The method was made abstract. If you have -implemented your own attribute, make sure to add the required method sigature. -See the Javadocs for an example. - -## Query.setBoost() and Query.clone() are removed (LUCENE-6590) - -Query.setBoost has been removed. In order to apply a boost to a Query, you now -need to wrap it inside a BoostQuery. For instance, - - Query q = ...; - float boost = ...; - q = new BoostQuery(q, boost); - -would be equivalent to the following code with the old setBoost API: - - Query q = ...; - float boost = ...; - q.setBoost(q.getBoost() * boost); - -# PointValues replaces NumericField (LUCENE-6917) - -PointValues provides faster indexing and searching, a smaller -index size, and less heap used at search time. See org.apache.lucene.index.PointValues -for an introduction. - -Legacy numeric encodings from previous versions of Lucene are -deprecated as LegacyIntField, LegacyFloatField, LegacyLongField, and LegacyDoubleField, -and can be searched with LegacyNumericRangeQuery. diff --git a/lucene/core/src/java/org/apache/lucene/document/CompressionTools.java b/lucene/core/src/java/org/apache/lucene/document/CompressionTools.java deleted file mode 100644 index 7ea3583a8d0..00000000000 --- a/lucene/core/src/java/org/apache/lucene/document/CompressionTools.java +++ /dev/null @@ -1,150 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.lucene.document; - - -import java.util.zip.Deflater; -import java.util.zip.Inflater; -import java.util.zip.DataFormatException; -import java.io.ByteArrayOutputStream; - -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.UnicodeUtil; - -/** Simple utility class providing static methods to - * compress and decompress binary data for stored fields. - * This class uses java.util.zip.Deflater and Inflater - * classes to compress and decompress. - */ - -public class CompressionTools { - - // Export only static methods - private CompressionTools() {} - - /** Compresses the specified byte range using the - * specified compressionLevel (constants are defined in - * java.util.zip.Deflater). */ - public static byte[] compress(byte[] value, int offset, int length, int compressionLevel) { - - /* Create an expandable byte array to hold the compressed data. - * You cannot use an array that's the same size as the orginal because - * there is no guarantee that the compressed data will be smaller than - * the uncompressed data. */ - ByteArrayOutputStream bos = new ByteArrayOutputStream(length); - - Deflater compressor = new Deflater(); - - try { - compressor.setLevel(compressionLevel); - compressor.setInput(value, offset, length); - compressor.finish(); - - // Compress the data - final byte[] buf = new byte[1024]; - while (!compressor.finished()) { - int count = compressor.deflate(buf); - bos.write(buf, 0, count); - } - } finally { - compressor.end(); - } - - return bos.toByteArray(); - } - - /** Compresses the specified byte range, with default BEST_COMPRESSION level */ - public static byte[] compress(byte[] value, int offset, int length) { - return compress(value, offset, length, Deflater.BEST_COMPRESSION); - } - - /** Compresses all bytes in the array, with default BEST_COMPRESSION level */ - public static byte[] compress(byte[] value) { - return compress(value, 0, value.length, Deflater.BEST_COMPRESSION); - } - - /** Compresses the String value, with default BEST_COMPRESSION level */ - public static byte[] compressString(String value) { - return compressString(value, Deflater.BEST_COMPRESSION); - } - - /** Compresses the String value using the specified - * compressionLevel (constants are defined in - * java.util.zip.Deflater). */ - public static byte[] compressString(String value, int compressionLevel) { - byte[] b = new byte[UnicodeUtil.MAX_UTF8_BYTES_PER_CHAR * value.length()]; - final int len = UnicodeUtil.UTF16toUTF8(value, 0, value.length(), b); - return compress(b, 0, len, compressionLevel); - } - - /** Decompress the byte array previously returned by - * compress (referenced by the provided BytesRef) */ - public static byte[] decompress(BytesRef bytes) throws DataFormatException { - return decompress(bytes.bytes, bytes.offset, bytes.length); - } - - /** Decompress the byte array previously returned by - * compress */ - public static byte[] decompress(byte[] value) throws DataFormatException { - return decompress(value, 0, value.length); - } - - /** Decompress the byte array previously returned by - * compress */ - public static byte[] decompress(byte[] value, int offset, int length) throws DataFormatException { - // Create an expandable byte array to hold the decompressed data - ByteArrayOutputStream bos = new ByteArrayOutputStream(length); - - Inflater decompressor = new Inflater(); - - try { - decompressor.setInput(value, offset, length); - - // Decompress the data - final byte[] buf = new byte[1024]; - while (!decompressor.finished()) { - int count = decompressor.inflate(buf); - bos.write(buf, 0, count); - } - } finally { - decompressor.end(); - } - - return bos.toByteArray(); - } - - /** Decompress the byte array previously returned by - * compressString back into a String */ - public static String decompressString(byte[] value) throws DataFormatException { - return decompressString(value, 0, value.length); - } - - /** Decompress the byte array previously returned by - * compressString back into a String */ - public static String decompressString(byte[] value, int offset, int length) throws DataFormatException { - final byte[] bytes = decompress(value, offset, length); - final char[] result = new char[bytes.length]; - final int len = UnicodeUtil.UTF8toUTF16(bytes, 0, bytes.length, result); - return new String(result, 0, len); - } - - /** Decompress the byte array (referenced by the provided BytesRef) - * previously returned by compressString back into a String */ - public static String decompressString(BytesRef bytes) throws DataFormatException { - return decompressString(bytes.bytes, bytes.offset, bytes.length); - } -} diff --git a/lucene/core/src/test/org/apache/lucene/document/TestBinaryDocument.java b/lucene/core/src/test/org/apache/lucene/document/TestBinaryDocument.java index 7d04f3b2643..16395b3a1b1 100644 --- a/lucene/core/src/test/org/apache/lucene/document/TestBinaryDocument.java +++ b/lucene/core/src/test/org/apache/lucene/document/TestBinaryDocument.java @@ -74,33 +74,4 @@ public class TestBinaryDocument extends LuceneTestCase { reader.close(); dir.close(); } - - public void testCompressionTools() throws Exception { - StoredField binaryFldCompressed = new StoredField("binaryCompressed", CompressionTools.compress(binaryValCompressed.getBytes(StandardCharsets.UTF_8))); - StoredField stringFldCompressed = new StoredField("stringCompressed", CompressionTools.compressString(binaryValCompressed)); - - Document doc = new Document(); - - doc.add(binaryFldCompressed); - doc.add(stringFldCompressed); - - /** add the doc to a ram index */ - Directory dir = newDirectory(); - RandomIndexWriter writer = new RandomIndexWriter(random(), dir); - writer.addDocument(doc); - - /** open a reader and fetch the document */ - IndexReader reader = writer.getReader(); - Document docFromReader = reader.document(0); - assertTrue(docFromReader != null); - - /** fetch the binary compressed field and compare its content with the original one */ - String binaryFldCompressedTest = new String(CompressionTools.decompress(docFromReader.getBinaryValue("binaryCompressed")), StandardCharsets.UTF_8); - assertTrue(binaryFldCompressedTest.equals(binaryValCompressed)); - assertTrue(CompressionTools.decompressString(docFromReader.getBinaryValue("stringCompressed")).equals(binaryValCompressed)); - - writer.close(); - reader.close(); - dir.close(); - } }