mirror of https://github.com/apache/lucene.git
merge trunk
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5969@1631928 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
commit
45d882f470
|
@ -2,7 +2,7 @@
|
||||||
<library name="JUnit">
|
<library name="JUnit">
|
||||||
<CLASSES>
|
<CLASSES>
|
||||||
<root url="jar://$PROJECT_DIR$/lucene/test-framework/lib/junit-4.10.jar!/" />
|
<root url="jar://$PROJECT_DIR$/lucene/test-framework/lib/junit-4.10.jar!/" />
|
||||||
<root url="jar://$PROJECT_DIR$/lucene/test-framework/lib/randomizedtesting-runner-2.1.6.jar!/" />
|
<root url="jar://$PROJECT_DIR$/lucene/test-framework/lib/randomizedtesting-runner-2.1.9.jar!/" />
|
||||||
</CLASSES>
|
</CLASSES>
|
||||||
<JAVADOC />
|
<JAVADOC />
|
||||||
<SOURCES />
|
<SOURCES />
|
||||||
|
|
|
@ -151,6 +151,15 @@ API Changes
|
||||||
to return an instance optimized for merging instead of searching.
|
to return an instance optimized for merging instead of searching.
|
||||||
(Mike McCandless, Robert Muir)
|
(Mike McCandless, Robert Muir)
|
||||||
|
|
||||||
|
* LUCENE-5992: Remove FieldInfos from SegmentInfosWriter.write API. (Robert Muir, Mike McCandless)
|
||||||
|
|
||||||
|
* LUCENE-5998: Simplify Field/SegmentInfoFormat to read+write methods.
|
||||||
|
(Robert Muir)
|
||||||
|
|
||||||
|
* LUCENE-6000: Removed StandardTokenizerInterface. Tokenizers now use
|
||||||
|
their jflex impl directly.
|
||||||
|
(Ryan Ernst)
|
||||||
|
|
||||||
Bug Fixes
|
Bug Fixes
|
||||||
|
|
||||||
* LUCENE-5650: Enforce read-only access to any path outside the temporary
|
* LUCENE-5650: Enforce read-only access to any path outside the temporary
|
||||||
|
@ -169,6 +178,8 @@ Bug Fixes
|
||||||
not have the regular "spinlock" of DirectoryReader.open. It now implements
|
not have the regular "spinlock" of DirectoryReader.open. It now implements
|
||||||
Closeable and you must close it to release the lock. (Mike McCandless, Robert Muir)
|
Closeable and you must close it to release the lock. (Mike McCandless, Robert Muir)
|
||||||
|
|
||||||
|
* LUCENE-5980: Don't let document length overflow. (Robert Muir)
|
||||||
|
|
||||||
Documentation
|
Documentation
|
||||||
|
|
||||||
* LUCENE-5392: Add/improve analysis package documentation to reflect
|
* LUCENE-5392: Add/improve analysis package documentation to reflect
|
||||||
|
@ -187,6 +198,12 @@ Tests
|
||||||
index files to use Version.toString() in filename.
|
index files to use Version.toString() in filename.
|
||||||
(Ryan Ernst)
|
(Ryan Ernst)
|
||||||
|
|
||||||
|
* LUCENE-6002: Monster tests no longer fail. Most of them now have an 80 hour
|
||||||
|
timeout, effectively removing the timeout. The tests that operate near the 2
|
||||||
|
billion limit now use IndexWriter.MAX_DOCS instead of Integer.MAX_VALUE.
|
||||||
|
Some of the slow Monster tests now explicitly choose the default codec.
|
||||||
|
(Mike McCandless, Shawn Heisey)
|
||||||
|
|
||||||
Optimizations
|
Optimizations
|
||||||
|
|
||||||
* LUCENE-5960: Use a more efficient bitset, not a Set<Integer>, to
|
* LUCENE-5960: Use a more efficient bitset, not a Set<Integer>, to
|
||||||
|
@ -206,6 +223,9 @@ Optimizations
|
||||||
per-segment/per-producer, and norms and doc values merging no longer cause
|
per-segment/per-producer, and norms and doc values merging no longer cause
|
||||||
RAM spikes for latent fields. (Mike McCandless, Robert Muir)
|
RAM spikes for latent fields. (Mike McCandless, Robert Muir)
|
||||||
|
|
||||||
|
* LUCENE-5983: CachingWrapperFilter now uses a new DocIdSet implementation
|
||||||
|
called RoaringDocIdSet instead of WAH8DocIdSet. (Adrien Grand)
|
||||||
|
|
||||||
Build
|
Build
|
||||||
|
|
||||||
* LUCENE-5909: Smoke tester now has better command line parsing and
|
* LUCENE-5909: Smoke tester now has better command line parsing and
|
||||||
|
@ -216,6 +236,8 @@ Build
|
||||||
* LUCENE-5962: Rename diffSources.py to createPatch.py and make it work with all text file types.
|
* LUCENE-5962: Rename diffSources.py to createPatch.py and make it work with all text file types.
|
||||||
(Ryan Ernst)
|
(Ryan Ernst)
|
||||||
|
|
||||||
|
* LUCENE-5995: Upgrade ICU to 54.1 (Robert Muir)
|
||||||
|
|
||||||
Other
|
Other
|
||||||
|
|
||||||
* LUCENE-5563: Removed sep layout: which has fallen behind on features and doesn't
|
* LUCENE-5563: Removed sep layout: which has fallen behind on features and doesn't
|
||||||
|
@ -1250,6 +1272,10 @@ New Features
|
||||||
approximate value of the diameter of the earth at the given latitude.
|
approximate value of the diameter of the earth at the given latitude.
|
||||||
(Adrien Grand)
|
(Adrien Grand)
|
||||||
|
|
||||||
|
* LUCENE-5979: FilteredQuery uses the cost API to decide on whether to use
|
||||||
|
random-access or leap-frog to intersect the filter with the query.
|
||||||
|
(Adrien Grand)
|
||||||
|
|
||||||
Build
|
Build
|
||||||
|
|
||||||
* LUCENE-5217,LUCENE-5420: Maven config: get dependencies from Ant+Ivy config;
|
* LUCENE-5217,LUCENE-5420: Maven config: get dependencies from Ant+Ivy config;
|
||||||
|
|
|
@ -49,7 +49,7 @@ import org.apache.lucene.util.AttributeFactory;
|
||||||
|
|
||||||
public final class ClassicTokenizer extends Tokenizer {
|
public final class ClassicTokenizer extends Tokenizer {
|
||||||
/** A private instance of the JFlex-constructed scanner */
|
/** A private instance of the JFlex-constructed scanner */
|
||||||
private StandardTokenizerInterface scanner;
|
private ClassicTokenizerImpl scanner;
|
||||||
|
|
||||||
public static final int ALPHANUM = 0;
|
public static final int ALPHANUM = 0;
|
||||||
public static final int APOSTROPHE = 1;
|
public static final int APOSTROPHE = 1;
|
||||||
|
@ -135,7 +135,7 @@ public final class ClassicTokenizer extends Tokenizer {
|
||||||
while(true) {
|
while(true) {
|
||||||
int tokenType = scanner.getNextToken();
|
int tokenType = scanner.getNextToken();
|
||||||
|
|
||||||
if (tokenType == StandardTokenizerInterface.YYEOF) {
|
if (tokenType == ClassicTokenizerImpl.YYEOF) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -33,7 +33,7 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
* This class implements the classic lucene StandardTokenizer up until 3.0
|
* This class implements the classic lucene StandardTokenizer up until 3.0
|
||||||
*/
|
*/
|
||||||
|
|
||||||
class ClassicTokenizerImpl implements StandardTokenizerInterface {
|
class ClassicTokenizerImpl {
|
||||||
|
|
||||||
/** This character denotes the end of file */
|
/** This character denotes the end of file */
|
||||||
public static final int YYEOF = -1;
|
public static final int YYEOF = -1;
|
||||||
|
|
|
@ -33,7 +33,6 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
%%
|
%%
|
||||||
|
|
||||||
%class ClassicTokenizerImpl
|
%class ClassicTokenizerImpl
|
||||||
%implements StandardTokenizerInterface
|
|
||||||
%unicode 3.0
|
%unicode 3.0
|
||||||
%integer
|
%integer
|
||||||
%function getNextToken
|
%function getNextToken
|
||||||
|
|
|
@ -39,7 +39,7 @@ import org.apache.lucene.util.AttributeFactory;
|
||||||
|
|
||||||
public final class StandardTokenizer extends Tokenizer {
|
public final class StandardTokenizer extends Tokenizer {
|
||||||
/** A private instance of the JFlex-constructed scanner */
|
/** A private instance of the JFlex-constructed scanner */
|
||||||
private StandardTokenizerInterface scanner;
|
private StandardTokenizerImpl scanner;
|
||||||
|
|
||||||
// TODO: how can we remove these old types?!
|
// TODO: how can we remove these old types?!
|
||||||
public static final int ALPHANUM = 0;
|
public static final int ALPHANUM = 0;
|
||||||
|
@ -150,7 +150,7 @@ public final class StandardTokenizer extends Tokenizer {
|
||||||
while(true) {
|
while(true) {
|
||||||
int tokenType = scanner.getNextToken();
|
int tokenType = scanner.getNextToken();
|
||||||
|
|
||||||
if (tokenType == StandardTokenizerInterface.YYEOF) {
|
if (tokenType == StandardTokenizerImpl.YYEOF) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -39,7 +39,7 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
* </ul>
|
* </ul>
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public final class StandardTokenizerImpl implements StandardTokenizerInterface {
|
public final class StandardTokenizerImpl {
|
||||||
|
|
||||||
/** This character denotes the end of file */
|
/** This character denotes the end of file */
|
||||||
public static final int YYEOF = -1;
|
public static final int YYEOF = -1;
|
||||||
|
@ -804,7 +804,7 @@ public final class StandardTokenizerImpl implements StandardTokenizerInterface {
|
||||||
if (zzInput == YYEOF && zzStartRead == zzCurrentPos) {
|
if (zzInput == YYEOF && zzStartRead == zzCurrentPos) {
|
||||||
zzAtEOF = true;
|
zzAtEOF = true;
|
||||||
{
|
{
|
||||||
return StandardTokenizerInterface.YYEOF;
|
return YYEOF;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
|
|
@ -43,7 +43,6 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
%final
|
%final
|
||||||
%public
|
%public
|
||||||
%class StandardTokenizerImpl
|
%class StandardTokenizerImpl
|
||||||
%implements StandardTokenizerInterface
|
|
||||||
%function getNextToken
|
%function getNextToken
|
||||||
%char
|
%char
|
||||||
%buffer 255
|
%buffer 255
|
||||||
|
@ -118,7 +117,7 @@ ComplexContextEx = \p{LB:Complex_Context}
|
||||||
// UAX#29 WB1. sot ÷
|
// UAX#29 WB1. sot ÷
|
||||||
// WB2. ÷ eot
|
// WB2. ÷ eot
|
||||||
//
|
//
|
||||||
<<EOF>> { return StandardTokenizerInterface.YYEOF; }
|
<<EOF>> { return YYEOF; }
|
||||||
|
|
||||||
// UAX#29 WB8. Numeric × Numeric
|
// UAX#29 WB8. Numeric × Numeric
|
||||||
// WB11. Numeric (MidNum | MidNumLet | Single_Quote) × Numeric
|
// WB11. Numeric (MidNum | MidNumLet | Single_Quote) × Numeric
|
||||||
|
|
|
@ -1,74 +0,0 @@
|
||||||
package org.apache.lucene.analysis.standard;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
|
||||||
|
|
||||||
import java.io.Reader;
|
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Internal interface for supporting versioned grammars.
|
|
||||||
* @lucene.internal
|
|
||||||
*/
|
|
||||||
public interface StandardTokenizerInterface {
|
|
||||||
|
|
||||||
/** This token type, as returned from {@link #getNextToken()}, denotes the end of file */
|
|
||||||
public static final int YYEOF = -1;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Copies the matched text into the CharTermAttribute
|
|
||||||
*/
|
|
||||||
public void getText(CharTermAttribute t);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns the current position.
|
|
||||||
*/
|
|
||||||
public int yychar();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Resets the scanner to read from a new input stream.
|
|
||||||
* Does not close the old reader.
|
|
||||||
*
|
|
||||||
* All internal variables are reset, the old input stream
|
|
||||||
* <b>cannot</b> be reused (internal buffer is discarded and lost).
|
|
||||||
* Lexical state is set to <tt>ZZ_INITIAL</tt>.
|
|
||||||
*
|
|
||||||
* @param reader the new input stream
|
|
||||||
*/
|
|
||||||
public void yyreset(Reader reader);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns the length of the matched text region.
|
|
||||||
*/
|
|
||||||
public int yylength();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Resumes scanning until the next regular expression is matched,
|
|
||||||
* the end of input is encountered or an I/O-Error occurs.
|
|
||||||
*
|
|
||||||
* @return the next token, {@link #YYEOF} on end of stream
|
|
||||||
* @exception IOException if any I/O-Error occurs
|
|
||||||
*/
|
|
||||||
public int getNextToken() throws IOException;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Sets the scanner buffer size in chars
|
|
||||||
*/
|
|
||||||
public void setBufferSize(int numChars);
|
|
||||||
}
|
|
|
@ -47,7 +47,7 @@ import org.apache.lucene.util.AttributeFactory;
|
||||||
|
|
||||||
public final class UAX29URLEmailTokenizer extends Tokenizer {
|
public final class UAX29URLEmailTokenizer extends Tokenizer {
|
||||||
/** A private instance of the JFlex-constructed scanner */
|
/** A private instance of the JFlex-constructed scanner */
|
||||||
private final StandardTokenizerInterface scanner;
|
private final UAX29URLEmailTokenizerImpl scanner;
|
||||||
|
|
||||||
public static final int ALPHANUM = 0;
|
public static final int ALPHANUM = 0;
|
||||||
public static final int NUM = 1;
|
public static final int NUM = 1;
|
||||||
|
@ -108,7 +108,7 @@ public final class UAX29URLEmailTokenizer extends Tokenizer {
|
||||||
this.scanner = getScanner();
|
this.scanner = getScanner();
|
||||||
}
|
}
|
||||||
|
|
||||||
private StandardTokenizerInterface getScanner() {
|
private UAX29URLEmailTokenizerImpl getScanner() {
|
||||||
return new UAX29URLEmailTokenizerImpl(input);
|
return new UAX29URLEmailTokenizerImpl(input);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -127,7 +127,7 @@ public final class UAX29URLEmailTokenizer extends Tokenizer {
|
||||||
while(true) {
|
while(true) {
|
||||||
int tokenType = scanner.getNextToken();
|
int tokenType = scanner.getNextToken();
|
||||||
|
|
||||||
if (tokenType == StandardTokenizerInterface.YYEOF) {
|
if (tokenType == UAX29URLEmailTokenizerImpl.YYEOF) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -42,7 +42,7 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
* </ul>
|
* </ul>
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public final class UAX29URLEmailTokenizerImpl implements StandardTokenizerInterface {
|
public final class UAX29URLEmailTokenizerImpl {
|
||||||
|
|
||||||
/** This character denotes the end of file */
|
/** This character denotes the end of file */
|
||||||
public static final int YYEOF = -1;
|
public static final int YYEOF = -1;
|
||||||
|
@ -7204,11 +7204,11 @@ public final class UAX29URLEmailTokenizerImpl implements StandardTokenizerInterf
|
||||||
zzAtEOF = true;
|
zzAtEOF = true;
|
||||||
switch (zzLexicalState) {
|
switch (zzLexicalState) {
|
||||||
case YYINITIAL: {
|
case YYINITIAL: {
|
||||||
return StandardTokenizerInterface.YYEOF;
|
return YYEOF;
|
||||||
}
|
}
|
||||||
case 2910: break;
|
case 2910: break;
|
||||||
case AVOID_BAD_URL: {
|
case AVOID_BAD_URL: {
|
||||||
return StandardTokenizerInterface.YYEOF;
|
return YYEOF;
|
||||||
}
|
}
|
||||||
case 2911: break;
|
case 2911: break;
|
||||||
default:
|
default:
|
||||||
|
|
|
@ -46,7 +46,6 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
%final
|
%final
|
||||||
%public
|
%public
|
||||||
%class UAX29URLEmailTokenizerImpl
|
%class UAX29URLEmailTokenizerImpl
|
||||||
%implements StandardTokenizerInterface
|
|
||||||
%function getNextToken
|
%function getNextToken
|
||||||
%char
|
%char
|
||||||
%xstate AVOID_BAD_URL
|
%xstate AVOID_BAD_URL
|
||||||
|
@ -208,7 +207,7 @@ EMAIL = {EMAILlocalPart} "@" ({DomainNameStrict} | {EMAILbracketedHost})
|
||||||
// UAX#29 WB1. sot ÷
|
// UAX#29 WB1. sot ÷
|
||||||
// WB2. ÷ eot
|
// WB2. ÷ eot
|
||||||
//
|
//
|
||||||
<<EOF>> { return StandardTokenizerInterface.YYEOF; }
|
<<EOF>> { return YYEOF; }
|
||||||
|
|
||||||
{URL} { yybegin(YYINITIAL); return URL_TYPE; }
|
{URL} { yybegin(YYINITIAL); return URL_TYPE; }
|
||||||
|
|
||||||
|
|
|
@ -35,7 +35,7 @@ import org.apache.lucene.analysis.util.CharArraySet;
|
||||||
* It is very similar to the snowball portuguese algorithm but not exactly the same.
|
* It is very similar to the snowball portuguese algorithm but not exactly the same.
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
public class TestBrazilianStemmer extends BaseTokenStreamTestCase {
|
public class TestBrazilianAnalyzer extends BaseTokenStreamTestCase {
|
||||||
|
|
||||||
public void testWithSnowballExamples() throws Exception {
|
public void testWithSnowballExamples() throws Exception {
|
||||||
check("boa", "boa");
|
check("boa", "boa");
|
|
@ -30,7 +30,7 @@ import org.apache.lucene.analysis.util.CharArraySet;
|
||||||
* The code states that it uses the snowball algorithm, but tests reveal some differences.
|
* The code states that it uses the snowball algorithm, but tests reveal some differences.
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
public class TestDutchStemmer extends BaseTokenStreamTestCase {
|
public class TestDutchAnalyzer extends BaseTokenStreamTestCase {
|
||||||
|
|
||||||
public void testWithSnowballExamples() throws Exception {
|
public void testWithSnowballExamples() throws Exception {
|
||||||
check("lichaamsziek", "lichaamsziek");
|
check("lichaamsziek", "lichaamsziek");
|
|
@ -1,4 +1,4 @@
|
||||||
package org.apache.lucene.analysis.core;
|
package org.apache.lucene.analysis.standard;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
|
@ -1,4 +1,4 @@
|
||||||
package org.apache.lucene.analysis.core;
|
package org.apache.lucene.analysis.standard;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
@ -27,8 +27,6 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||||
import org.apache.lucene.analysis.MockGraphTokenFilter;
|
import org.apache.lucene.analysis.MockGraphTokenFilter;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
|
||||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
|
||||||
import org.apache.lucene.util.TestUtil;
|
import org.apache.lucene.util.TestUtil;
|
||||||
|
|
||||||
public class TestStandardAnalyzer extends BaseTokenStreamTestCase {
|
public class TestStandardAnalyzer extends BaseTokenStreamTestCase {
|
|
@ -1,4 +1,4 @@
|
||||||
package org.apache.lucene.analysis.core;
|
package org.apache.lucene.analysis.standard;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
@ -1,4 +1,4 @@
|
||||||
package org.apache.lucene.analysis.core;
|
package org.apache.lucene.analysis.standard;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||||
|
@ -6,11 +6,11 @@ import org.apache.lucene.analysis.TokenFilter;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
import org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer;
|
import org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer;
|
||||||
|
import org.apache.lucene.analysis.standard.WordBreakTestUnicode_6_3_0;
|
||||||
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
||||||
import org.apache.lucene.util.TestUtil;
|
import org.apache.lucene.util.TestUtil;
|
||||||
|
|
||||||
import java.io.BufferedReader;
|
import java.io.BufferedReader;
|
||||||
import java.io.FileInputStream;
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStreamReader;
|
import java.io.InputStreamReader;
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
|
@ -20,7 +20,6 @@ import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Random;
|
import java.util.Random;
|
||||||
import java.util.regex.Pattern;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
@ -1,4 +1,4 @@
|
||||||
package org.apache.lucene.analysis.core;
|
package org.apache.lucene.analysis.standard;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
@ -1,50 +0,0 @@
|
||||||
#
|
|
||||||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
# contributor license agreements. See the NOTICE file distributed with
|
|
||||||
# this work for additional information regarding copyright ownership.
|
|
||||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
# (the "License"); you may not use this file except in compliance with
|
|
||||||
# the License. You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
#
|
|
||||||
#
|
|
||||||
# Parses Myanmar text, with syllable as token.
|
|
||||||
#
|
|
||||||
|
|
||||||
$Cons = [[:Other_Letter:]&[:Myanmar:]];
|
|
||||||
$Virama = [\u1039];
|
|
||||||
$Asat = [\u103A];
|
|
||||||
|
|
||||||
$WordJoin = [:Line_Break=Word_Joiner:];
|
|
||||||
|
|
||||||
#
|
|
||||||
# default numerical definitions
|
|
||||||
#
|
|
||||||
$Extend = [\p{Word_Break = Extend}];
|
|
||||||
$Format = [\p{Word_Break = Format}];
|
|
||||||
$MidNumLet = [\p{Word_Break = MidNumLet}];
|
|
||||||
$MidNum = [\p{Word_Break = MidNum}];
|
|
||||||
$Numeric = [\p{Word_Break = Numeric}];
|
|
||||||
$ExtendNumLet = [\p{Word_Break = ExtendNumLet}];
|
|
||||||
$MidNumLetEx = $MidNumLet ($Extend | $Format)*;
|
|
||||||
$MidNumEx = $MidNum ($Extend | $Format)*;
|
|
||||||
$NumericEx = $Numeric ($Extend | $Format)*;
|
|
||||||
$ExtendNumLetEx = $ExtendNumLet ($Extend | $Format)*;
|
|
||||||
|
|
||||||
$ConsEx = $Cons ($Extend | $Format)*;
|
|
||||||
$AsatEx = $Cons $Asat ($Virama $ConsEx)? ($Extend | $Format)*;
|
|
||||||
$MyanmarSyllableEx = $ConsEx ($Virama $ConsEx)? ($AsatEx)*;
|
|
||||||
$MyanmarJoinedSyllableEx = $MyanmarSyllableEx ($WordJoin $MyanmarSyllableEx)*;
|
|
||||||
|
|
||||||
!!forward;
|
|
||||||
$MyanmarJoinedSyllableEx {200};
|
|
||||||
|
|
||||||
# default numeric rules
|
|
||||||
$NumericEx $ExtendNumLetEx? (($MidNumEx | $MidNumLetEx)? $NumericEx $ExtendNumLetEx?)* {100};
|
|
|
@ -49,6 +49,7 @@
|
||||||
2E17>002D
|
2E17>002D
|
||||||
2E1A>002D
|
2E1A>002D
|
||||||
2E3A..2E3B>002D
|
2E3A..2E3B>002D
|
||||||
|
2E40>002D
|
||||||
301C>002D
|
301C>002D
|
||||||
3030>002D
|
3030>002D
|
||||||
30A0>002D
|
30A0>002D
|
||||||
|
|
|
@ -102,6 +102,7 @@
|
||||||
1939..193B>
|
1939..193B>
|
||||||
1A75..1A7C>
|
1A75..1A7C>
|
||||||
1A7F>
|
1A7F>
|
||||||
|
1AB0..1ABD>
|
||||||
1B34>
|
1B34>
|
||||||
1B44>
|
1B44>
|
||||||
1B6B..1B73>
|
1B6B..1B73>
|
||||||
|
@ -111,8 +112,10 @@
|
||||||
1CD0..1CE8>
|
1CD0..1CE8>
|
||||||
1CED>
|
1CED>
|
||||||
1CF4>
|
1CF4>
|
||||||
|
1CF8..1CF9>
|
||||||
1D2C..1D6A>
|
1D2C..1D6A>
|
||||||
1DC4..1DCF>
|
1DC4..1DCF>
|
||||||
|
1DF5>
|
||||||
1DFD..1DFF>
|
1DFD..1DFF>
|
||||||
1FBD>
|
1FBD>
|
||||||
1FBF..1FC1>
|
1FBF..1FC1>
|
||||||
|
@ -128,6 +131,7 @@
|
||||||
A66F>
|
A66F>
|
||||||
A67C..A67D>
|
A67C..A67D>
|
||||||
A67F>
|
A67F>
|
||||||
|
A69C..A69D>
|
||||||
A6F0..A6F1>
|
A6F0..A6F1>
|
||||||
A717..A721>
|
A717..A721>
|
||||||
A788>
|
A788>
|
||||||
|
@ -138,27 +142,43 @@ A92B..A92E>
|
||||||
A953>
|
A953>
|
||||||
A9B3>
|
A9B3>
|
||||||
A9C0>
|
A9C0>
|
||||||
AA7B>
|
A9E5>
|
||||||
|
AA7B..AA7D>
|
||||||
AABF..AAC2>
|
AABF..AAC2>
|
||||||
AAF6>
|
AAF6>
|
||||||
|
AB5B..AB5F>
|
||||||
ABEC..ABED>
|
ABEC..ABED>
|
||||||
FB1E>
|
FB1E>
|
||||||
FE20..FE26>
|
FE20..FE2D>
|
||||||
FF3E>
|
FF3E>
|
||||||
FF40>
|
FF40>
|
||||||
FF70>
|
FF70>
|
||||||
FF9E..FF9F>
|
FF9E..FF9F>
|
||||||
FFE3>
|
FFE3>
|
||||||
|
102E0>
|
||||||
|
10AE5..10AE6>
|
||||||
110B9..110BA>
|
110B9..110BA>
|
||||||
11133..11134>
|
11133..11134>
|
||||||
|
11173>
|
||||||
111C0>
|
111C0>
|
||||||
|
11235..11236>
|
||||||
|
112E9..112EA>
|
||||||
|
1133C>
|
||||||
|
1134D>
|
||||||
|
11366..1136C>
|
||||||
|
11370..11374>
|
||||||
|
114C2..114C3>
|
||||||
|
115BF..115C0>
|
||||||
|
1163F>
|
||||||
116B6..116B7>
|
116B6..116B7>
|
||||||
|
16AF0..16AF4>
|
||||||
16F8F..16F9F>
|
16F8F..16F9F>
|
||||||
1D167..1D169>
|
1D167..1D169>
|
||||||
1D16D..1D172>
|
1D16D..1D172>
|
||||||
1D17B..1D182>
|
1D17B..1D182>
|
||||||
1D185..1D18B>
|
1D185..1D18B>
|
||||||
1D1AA..1D1AD>
|
1D1AA..1D1AD>
|
||||||
|
1E8D0..1E8D6>
|
||||||
|
|
||||||
# Latin script "composed" that do not further decompose, so decompose here
|
# Latin script "composed" that do not further decompose, so decompose here
|
||||||
# These are from AsciiFoldingFilter
|
# These are from AsciiFoldingFilter
|
||||||
|
|
|
@ -151,6 +151,16 @@
|
||||||
0D6D>0037 # MALAYALAM DIGIT SEVEN
|
0D6D>0037 # MALAYALAM DIGIT SEVEN
|
||||||
0D6E>0038 # MALAYALAM DIGIT EIGHT
|
0D6E>0038 # MALAYALAM DIGIT EIGHT
|
||||||
0D6F>0039 # MALAYALAM DIGIT NINE
|
0D6F>0039 # MALAYALAM DIGIT NINE
|
||||||
|
0DE6>0030 # SINHALA LITH DIGIT ZERO
|
||||||
|
0DE7>0031 # SINHALA LITH DIGIT ONE
|
||||||
|
0DE8>0032 # SINHALA LITH DIGIT TWO
|
||||||
|
0DE9>0033 # SINHALA LITH DIGIT THREE
|
||||||
|
0DEA>0034 # SINHALA LITH DIGIT FOUR
|
||||||
|
0DEB>0035 # SINHALA LITH DIGIT FIVE
|
||||||
|
0DEC>0036 # SINHALA LITH DIGIT SIX
|
||||||
|
0DED>0037 # SINHALA LITH DIGIT SEVEN
|
||||||
|
0DEE>0038 # SINHALA LITH DIGIT EIGHT
|
||||||
|
0DEF>0039 # SINHALA LITH DIGIT NINE
|
||||||
0E50>0030 # THAI DIGIT ZERO
|
0E50>0030 # THAI DIGIT ZERO
|
||||||
0E51>0031 # THAI DIGIT ONE
|
0E51>0031 # THAI DIGIT ONE
|
||||||
0E52>0032 # THAI DIGIT TWO
|
0E52>0032 # THAI DIGIT TWO
|
||||||
|
@ -388,6 +398,16 @@ A9D6>0036 # JAVANESE DIGIT SIX
|
||||||
A9D7>0037 # JAVANESE DIGIT SEVEN
|
A9D7>0037 # JAVANESE DIGIT SEVEN
|
||||||
A9D8>0038 # JAVANESE DIGIT EIGHT
|
A9D8>0038 # JAVANESE DIGIT EIGHT
|
||||||
A9D9>0039 # JAVANESE DIGIT NINE
|
A9D9>0039 # JAVANESE DIGIT NINE
|
||||||
|
A9F0>0030 # MYANMAR TAI LAING DIGIT ZERO
|
||||||
|
A9F1>0031 # MYANMAR TAI LAING DIGIT ONE
|
||||||
|
A9F2>0032 # MYANMAR TAI LAING DIGIT TWO
|
||||||
|
A9F3>0033 # MYANMAR TAI LAING DIGIT THREE
|
||||||
|
A9F4>0034 # MYANMAR TAI LAING DIGIT FOUR
|
||||||
|
A9F5>0035 # MYANMAR TAI LAING DIGIT FIVE
|
||||||
|
A9F6>0036 # MYANMAR TAI LAING DIGIT SIX
|
||||||
|
A9F7>0037 # MYANMAR TAI LAING DIGIT SEVEN
|
||||||
|
A9F8>0038 # MYANMAR TAI LAING DIGIT EIGHT
|
||||||
|
A9F9>0039 # MYANMAR TAI LAING DIGIT NINE
|
||||||
AA50>0030 # CHAM DIGIT ZERO
|
AA50>0030 # CHAM DIGIT ZERO
|
||||||
AA51>0031 # CHAM DIGIT ONE
|
AA51>0031 # CHAM DIGIT ONE
|
||||||
AA52>0032 # CHAM DIGIT TWO
|
AA52>0032 # CHAM DIGIT TWO
|
||||||
|
@ -480,6 +500,36 @@ ABF9>0039 # MEETEI MAYEK DIGIT NINE
|
||||||
111D7>0037 # SHARADA DIGIT SEVEN
|
111D7>0037 # SHARADA DIGIT SEVEN
|
||||||
111D8>0038 # SHARADA DIGIT EIGHT
|
111D8>0038 # SHARADA DIGIT EIGHT
|
||||||
111D9>0039 # SHARADA DIGIT NINE
|
111D9>0039 # SHARADA DIGIT NINE
|
||||||
|
112F0>0030 # KHUDAWADI DIGIT ZERO
|
||||||
|
112F1>0031 # KHUDAWADI DIGIT ONE
|
||||||
|
112F2>0032 # KHUDAWADI DIGIT TWO
|
||||||
|
112F3>0033 # KHUDAWADI DIGIT THREE
|
||||||
|
112F4>0034 # KHUDAWADI DIGIT FOUR
|
||||||
|
112F5>0035 # KHUDAWADI DIGIT FIVE
|
||||||
|
112F6>0036 # KHUDAWADI DIGIT SIX
|
||||||
|
112F7>0037 # KHUDAWADI DIGIT SEVEN
|
||||||
|
112F8>0038 # KHUDAWADI DIGIT EIGHT
|
||||||
|
112F9>0039 # KHUDAWADI DIGIT NINE
|
||||||
|
114D0>0030 # TIRHUTA DIGIT ZERO
|
||||||
|
114D1>0031 # TIRHUTA DIGIT ONE
|
||||||
|
114D2>0032 # TIRHUTA DIGIT TWO
|
||||||
|
114D3>0033 # TIRHUTA DIGIT THREE
|
||||||
|
114D4>0034 # TIRHUTA DIGIT FOUR
|
||||||
|
114D5>0035 # TIRHUTA DIGIT FIVE
|
||||||
|
114D6>0036 # TIRHUTA DIGIT SIX
|
||||||
|
114D7>0037 # TIRHUTA DIGIT SEVEN
|
||||||
|
114D8>0038 # TIRHUTA DIGIT EIGHT
|
||||||
|
114D9>0039 # TIRHUTA DIGIT NINE
|
||||||
|
11650>0030 # MODI DIGIT ZERO
|
||||||
|
11651>0031 # MODI DIGIT ONE
|
||||||
|
11652>0032 # MODI DIGIT TWO
|
||||||
|
11653>0033 # MODI DIGIT THREE
|
||||||
|
11654>0034 # MODI DIGIT FOUR
|
||||||
|
11655>0035 # MODI DIGIT FIVE
|
||||||
|
11656>0036 # MODI DIGIT SIX
|
||||||
|
11657>0037 # MODI DIGIT SEVEN
|
||||||
|
11658>0038 # MODI DIGIT EIGHT
|
||||||
|
11659>0039 # MODI DIGIT NINE
|
||||||
116C0>0030 # TAKRI DIGIT ZERO
|
116C0>0030 # TAKRI DIGIT ZERO
|
||||||
116C1>0031 # TAKRI DIGIT ONE
|
116C1>0031 # TAKRI DIGIT ONE
|
||||||
116C2>0032 # TAKRI DIGIT TWO
|
116C2>0032 # TAKRI DIGIT TWO
|
||||||
|
@ -490,4 +540,34 @@ ABF9>0039 # MEETEI MAYEK DIGIT NINE
|
||||||
116C7>0037 # TAKRI DIGIT SEVEN
|
116C7>0037 # TAKRI DIGIT SEVEN
|
||||||
116C8>0038 # TAKRI DIGIT EIGHT
|
116C8>0038 # TAKRI DIGIT EIGHT
|
||||||
116C9>0039 # TAKRI DIGIT NINE
|
116C9>0039 # TAKRI DIGIT NINE
|
||||||
|
118E0>0030 # WARANG CITI DIGIT ZERO
|
||||||
|
118E1>0031 # WARANG CITI DIGIT ONE
|
||||||
|
118E2>0032 # WARANG CITI DIGIT TWO
|
||||||
|
118E3>0033 # WARANG CITI DIGIT THREE
|
||||||
|
118E4>0034 # WARANG CITI DIGIT FOUR
|
||||||
|
118E5>0035 # WARANG CITI DIGIT FIVE
|
||||||
|
118E6>0036 # WARANG CITI DIGIT SIX
|
||||||
|
118E7>0037 # WARANG CITI DIGIT SEVEN
|
||||||
|
118E8>0038 # WARANG CITI DIGIT EIGHT
|
||||||
|
118E9>0039 # WARANG CITI DIGIT NINE
|
||||||
|
16A60>0030 # MRO DIGIT ZERO
|
||||||
|
16A61>0031 # MRO DIGIT ONE
|
||||||
|
16A62>0032 # MRO DIGIT TWO
|
||||||
|
16A63>0033 # MRO DIGIT THREE
|
||||||
|
16A64>0034 # MRO DIGIT FOUR
|
||||||
|
16A65>0035 # MRO DIGIT FIVE
|
||||||
|
16A66>0036 # MRO DIGIT SIX
|
||||||
|
16A67>0037 # MRO DIGIT SEVEN
|
||||||
|
16A68>0038 # MRO DIGIT EIGHT
|
||||||
|
16A69>0039 # MRO DIGIT NINE
|
||||||
|
16B50>0030 # PAHAWH HMONG DIGIT ZERO
|
||||||
|
16B51>0031 # PAHAWH HMONG DIGIT ONE
|
||||||
|
16B52>0032 # PAHAWH HMONG DIGIT TWO
|
||||||
|
16B53>0033 # PAHAWH HMONG DIGIT THREE
|
||||||
|
16B54>0034 # PAHAWH HMONG DIGIT FOUR
|
||||||
|
16B55>0035 # PAHAWH HMONG DIGIT FIVE
|
||||||
|
16B56>0036 # PAHAWH HMONG DIGIT SIX
|
||||||
|
16B57>0037 # PAHAWH HMONG DIGIT SEVEN
|
||||||
|
16B58>0038 # PAHAWH HMONG DIGIT EIGHT
|
||||||
|
16B59>0039 # PAHAWH HMONG DIGIT NINE
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
# Copyright (C) 1999-2013, International Business Machines
|
# Copyright (C) 1999-2014, International Business Machines
|
||||||
# Corporation and others. All Rights Reserved.
|
# Corporation and others. All Rights Reserved.
|
||||||
#
|
#
|
||||||
# file name: nfc.txt
|
# file name: nfc.txt
|
||||||
|
@ -7,7 +7,7 @@
|
||||||
#
|
#
|
||||||
# Complete data for Unicode NFC normalization.
|
# Complete data for Unicode NFC normalization.
|
||||||
|
|
||||||
* Unicode 6.3.0
|
* Unicode 7.0.0
|
||||||
|
|
||||||
# Canonical_Combining_Class (ccc) values
|
# Canonical_Combining_Class (ccc) values
|
||||||
0300..0314:230
|
0300..0314:230
|
||||||
|
@ -142,7 +142,7 @@
|
||||||
08F6:220
|
08F6:220
|
||||||
08F7..08F8:230
|
08F7..08F8:230
|
||||||
08F9..08FA:220
|
08F9..08FA:220
|
||||||
08FB..08FE:230
|
08FB..08FF:230
|
||||||
093C:7
|
093C:7
|
||||||
094D:9
|
094D:9
|
||||||
0951:230
|
0951:230
|
||||||
|
@ -199,6 +199,10 @@
|
||||||
1A60:9
|
1A60:9
|
||||||
1A75..1A7C:230
|
1A75..1A7C:230
|
||||||
1A7F:220
|
1A7F:220
|
||||||
|
1AB0..1AB4:230
|
||||||
|
1AB5..1ABA:220
|
||||||
|
1ABB..1ABC:230
|
||||||
|
1ABD:220
|
||||||
1B34:7
|
1B34:7
|
||||||
1B44:9
|
1B44:9
|
||||||
1B6B:230
|
1B6B:230
|
||||||
|
@ -217,6 +221,7 @@
|
||||||
1CE2..1CE8:1
|
1CE2..1CE8:1
|
||||||
1CED:220
|
1CED:220
|
||||||
1CF4:230
|
1CF4:230
|
||||||
|
1CF8..1CF9:230
|
||||||
1DC0..1DC1:230
|
1DC0..1DC1:230
|
||||||
1DC2:220
|
1DC2:220
|
||||||
1DC3..1DC9:230
|
1DC3..1DC9:230
|
||||||
|
@ -226,7 +231,7 @@
|
||||||
1DCE:214
|
1DCE:214
|
||||||
1DCF:220
|
1DCF:220
|
||||||
1DD0:202
|
1DD0:202
|
||||||
1DD1..1DE6:230
|
1DD1..1DF5:230
|
||||||
1DFC:233
|
1DFC:233
|
||||||
1DFD:220
|
1DFD:220
|
||||||
1DFE:230
|
1DFE:230
|
||||||
|
@ -274,21 +279,44 @@ AAF6:9
|
||||||
ABED:9
|
ABED:9
|
||||||
FB1E:26
|
FB1E:26
|
||||||
FE20..FE26:230
|
FE20..FE26:230
|
||||||
|
FE27..FE2D:220
|
||||||
101FD:220
|
101FD:220
|
||||||
|
102E0:220
|
||||||
|
10376..1037A:230
|
||||||
10A0D:220
|
10A0D:220
|
||||||
10A0F:230
|
10A0F:230
|
||||||
10A38:230
|
10A38:230
|
||||||
10A39:1
|
10A39:1
|
||||||
10A3A:220
|
10A3A:220
|
||||||
10A3F:9
|
10A3F:9
|
||||||
|
10AE5:230
|
||||||
|
10AE6:220
|
||||||
11046:9
|
11046:9
|
||||||
|
1107F:9
|
||||||
110B9:9
|
110B9:9
|
||||||
110BA:7
|
110BA:7
|
||||||
11100..11102:230
|
11100..11102:230
|
||||||
11133..11134:9
|
11133..11134:9
|
||||||
|
11173:7
|
||||||
111C0:9
|
111C0:9
|
||||||
|
11235:9
|
||||||
|
11236:7
|
||||||
|
112E9:7
|
||||||
|
112EA:9
|
||||||
|
1133C:7
|
||||||
|
1134D:9
|
||||||
|
11366..1136C:230
|
||||||
|
11370..11374:230
|
||||||
|
114C2:9
|
||||||
|
114C3:7
|
||||||
|
115BF:9
|
||||||
|
115C0:7
|
||||||
|
1163F:9
|
||||||
116B6:9
|
116B6:9
|
||||||
116B7:7
|
116B7:7
|
||||||
|
16AF0..16AF4:1
|
||||||
|
16B30..16B36:230
|
||||||
|
1BC9E:1
|
||||||
1D165..1D166:216
|
1D165..1D166:216
|
||||||
1D167..1D169:1
|
1D167..1D169:1
|
||||||
1D16D:226
|
1D16D:226
|
||||||
|
@ -298,6 +326,7 @@ FE20..FE26:230
|
||||||
1D18A..1D18B:220
|
1D18A..1D18B:220
|
||||||
1D1AA..1D1AD:230
|
1D1AA..1D1AD:230
|
||||||
1D242..1D244:230
|
1D242..1D244:230
|
||||||
|
1E8D0..1E8D6:220
|
||||||
|
|
||||||
# Canonical decomposition mappings
|
# Canonical decomposition mappings
|
||||||
00C0>0041 0300 # one-way: diacritic 0300
|
00C0>0041 0300 # one-way: diacritic 0300
|
||||||
|
@ -1798,6 +1827,13 @@ FB4E>05E4 05BF
|
||||||
110AB>110A5 110BA # one-way: diacritic 110BA
|
110AB>110A5 110BA # one-way: diacritic 110BA
|
||||||
1112E=11131 11127
|
1112E=11131 11127
|
||||||
1112F=11132 11127
|
1112F=11132 11127
|
||||||
|
1134B=11347 1133E
|
||||||
|
1134C=11347 11357
|
||||||
|
114BB=114B9 114BA
|
||||||
|
114BC=114B9 114B0
|
||||||
|
114BE=114B9 114BD
|
||||||
|
115BA=115B8 115AF
|
||||||
|
115BB=115B9 115AF
|
||||||
1D15E>1D157 1D165
|
1D15E>1D157 1D165
|
||||||
1D15F>1D158 1D165
|
1D15F>1D158 1D165
|
||||||
1D160>1D15F 1D16E
|
1D160>1D15F 1D16E
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
# Copyright (C) 1999-2013, International Business Machines
|
# Copyright (C) 1999-2014, International Business Machines
|
||||||
# Corporation and others. All Rights Reserved.
|
# Corporation and others. All Rights Reserved.
|
||||||
#
|
#
|
||||||
# file name: nfkc.txt
|
# file name: nfkc.txt
|
||||||
|
@ -11,7 +11,7 @@
|
||||||
# to NFKC one-way mappings.
|
# to NFKC one-way mappings.
|
||||||
# Use this file as the second gennorm2 input file after nfc.txt.
|
# Use this file as the second gennorm2 input file after nfc.txt.
|
||||||
|
|
||||||
* Unicode 6.3.0
|
* Unicode 7.0.0
|
||||||
|
|
||||||
00A0>0020
|
00A0>0020
|
||||||
00A8>0020 0308
|
00A8>0020 0308
|
||||||
|
@ -1361,9 +1361,15 @@
|
||||||
33FD>0033 0030 65E5
|
33FD>0033 0030 65E5
|
||||||
33FE>0033 0031 65E5
|
33FE>0033 0031 65E5
|
||||||
33FF>0067 0061 006C
|
33FF>0067 0061 006C
|
||||||
|
A69C>044A
|
||||||
|
A69D>044C
|
||||||
A770>A76F
|
A770>A76F
|
||||||
A7F8>0126
|
A7F8>0126
|
||||||
A7F9>0153
|
A7F9>0153
|
||||||
|
AB5C>A727
|
||||||
|
AB5D>AB37
|
||||||
|
AB5E>026B
|
||||||
|
AB5F>AB52
|
||||||
FB00>0066 0066
|
FB00>0066 0066
|
||||||
FB01>0066 0069
|
FB01>0066 0069
|
||||||
FB02>0066 006C
|
FB02>0066 006C
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
# Unicode Character Database
|
# Unicode Character Database
|
||||||
# Copyright (c) 1991-2013 Unicode, Inc.
|
# Copyright (c) 1991-2014 Unicode, Inc.
|
||||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||||
# For documentation, see http://www.unicode.org/reports/tr44/
|
# For documentation, see http://www.unicode.org/reports/tr44/
|
||||||
#
|
#
|
||||||
|
@ -12,7 +12,7 @@
|
||||||
# and reformatted into syntax for the gennorm2 Normalizer2 data generator tool.
|
# and reformatted into syntax for the gennorm2 Normalizer2 data generator tool.
|
||||||
# Use this file as the third gennorm2 input file after nfc.txt and nfkc.txt.
|
# Use this file as the third gennorm2 input file after nfc.txt and nfkc.txt.
|
||||||
|
|
||||||
* Unicode 6.3.0
|
* Unicode 7.0.0
|
||||||
|
|
||||||
0041>0061
|
0041>0061
|
||||||
0042>0062
|
0042>0062
|
||||||
|
@ -286,6 +286,7 @@
|
||||||
0376>0377
|
0376>0377
|
||||||
037A>0020 03B9
|
037A>0020 03B9
|
||||||
037E>003B
|
037E>003B
|
||||||
|
037F>03F3
|
||||||
0384>0020 0301
|
0384>0020 0301
|
||||||
0385>0020 0308 0301
|
0385>0020 0308 0301
|
||||||
0386>03AC
|
0386>03AC
|
||||||
|
@ -498,6 +499,10 @@
|
||||||
0522>0523
|
0522>0523
|
||||||
0524>0525
|
0524>0525
|
||||||
0526>0527
|
0526>0527
|
||||||
|
0528>0529
|
||||||
|
052A>052B
|
||||||
|
052C>052D
|
||||||
|
052E>052F
|
||||||
0531>0561
|
0531>0561
|
||||||
0532>0562
|
0532>0562
|
||||||
0533>0563
|
0533>0563
|
||||||
|
@ -2308,6 +2313,10 @@ A690>A691
|
||||||
A692>A693
|
A692>A693
|
||||||
A694>A695
|
A694>A695
|
||||||
A696>A697
|
A696>A697
|
||||||
|
A698>A699
|
||||||
|
A69A>A69B
|
||||||
|
A69C>044A
|
||||||
|
A69D>044C
|
||||||
A722>A723
|
A722>A723
|
||||||
A724>A725
|
A724>A725
|
||||||
A726>A727
|
A726>A727
|
||||||
|
@ -2359,14 +2368,28 @@ A78B>A78C
|
||||||
A78D>0265
|
A78D>0265
|
||||||
A790>A791
|
A790>A791
|
||||||
A792>A793
|
A792>A793
|
||||||
|
A796>A797
|
||||||
|
A798>A799
|
||||||
|
A79A>A79B
|
||||||
|
A79C>A79D
|
||||||
|
A79E>A79F
|
||||||
A7A0>A7A1
|
A7A0>A7A1
|
||||||
A7A2>A7A3
|
A7A2>A7A3
|
||||||
A7A4>A7A5
|
A7A4>A7A5
|
||||||
A7A6>A7A7
|
A7A6>A7A7
|
||||||
A7A8>A7A9
|
A7A8>A7A9
|
||||||
A7AA>0266
|
A7AA>0266
|
||||||
|
A7AB>025C
|
||||||
|
A7AC>0261
|
||||||
|
A7AD>026C
|
||||||
|
A7B0>029E
|
||||||
|
A7B1>0287
|
||||||
A7F8>0127
|
A7F8>0127
|
||||||
A7F9>0153
|
A7F9>0153
|
||||||
|
AB5C>A727
|
||||||
|
AB5D>AB37
|
||||||
|
AB5E>026B
|
||||||
|
AB5F>AB52
|
||||||
F900>8C48
|
F900>8C48
|
||||||
F901>66F4
|
F901>66F4
|
||||||
F902>8ECA
|
F902>8ECA
|
||||||
|
@ -3743,6 +3766,39 @@ FFF0..FFF8>
|
||||||
10425>1044D
|
10425>1044D
|
||||||
10426>1044E
|
10426>1044E
|
||||||
10427>1044F
|
10427>1044F
|
||||||
|
118A0>118C0
|
||||||
|
118A1>118C1
|
||||||
|
118A2>118C2
|
||||||
|
118A3>118C3
|
||||||
|
118A4>118C4
|
||||||
|
118A5>118C5
|
||||||
|
118A6>118C6
|
||||||
|
118A7>118C7
|
||||||
|
118A8>118C8
|
||||||
|
118A9>118C9
|
||||||
|
118AA>118CA
|
||||||
|
118AB>118CB
|
||||||
|
118AC>118CC
|
||||||
|
118AD>118CD
|
||||||
|
118AE>118CE
|
||||||
|
118AF>118CF
|
||||||
|
118B0>118D0
|
||||||
|
118B1>118D1
|
||||||
|
118B2>118D2
|
||||||
|
118B3>118D3
|
||||||
|
118B4>118D4
|
||||||
|
118B5>118D5
|
||||||
|
118B6>118D6
|
||||||
|
118B7>118D7
|
||||||
|
118B8>118D8
|
||||||
|
118B9>118D9
|
||||||
|
118BA>118DA
|
||||||
|
118BB>118DB
|
||||||
|
118BC>118DC
|
||||||
|
118BD>118DD
|
||||||
|
118BE>118DE
|
||||||
|
118BF>118DF
|
||||||
|
1BCA0..1BCA3>
|
||||||
1D15E>1D157 1D165
|
1D15E>1D157 1D165
|
||||||
1D15F>1D158 1D165
|
1D15F>1D158 1D165
|
||||||
1D160>1D158 1D165 1D16E
|
1D160>1D158 1D165 1D16E
|
||||||
|
|
|
@ -35,8 +35,8 @@ import com.ibm.icu.util.ULocale;
|
||||||
* ({@link BreakIterator#getWordInstance(ULocale) BreakIterator.getWordInstance(ULocale.ROOT)}),
|
* ({@link BreakIterator#getWordInstance(ULocale) BreakIterator.getWordInstance(ULocale.ROOT)}),
|
||||||
* but with the following tailorings:
|
* but with the following tailorings:
|
||||||
* <ul>
|
* <ul>
|
||||||
* <li>Thai, Lao, and CJK text is broken into words with a dictionary.
|
* <li>Thai, Lao, Myanmar, and CJK text is broken into words with a dictionary.
|
||||||
* <li>Myanmar, and Khmer text is broken into syllables
|
* <li>Khmer text is broken into syllables
|
||||||
* based on custom BreakIterator rules.
|
* based on custom BreakIterator rules.
|
||||||
* </ul>
|
* </ul>
|
||||||
* @lucene.experimental
|
* @lucene.experimental
|
||||||
|
@ -67,8 +67,6 @@ public class DefaultICUTokenizerConfig extends ICUTokenizerConfig {
|
||||||
readBreakIterator("Default.brk");
|
readBreakIterator("Default.brk");
|
||||||
private static final BreakIterator khmerBreakIterator =
|
private static final BreakIterator khmerBreakIterator =
|
||||||
readBreakIterator("Khmer.brk");
|
readBreakIterator("Khmer.brk");
|
||||||
private static final BreakIterator myanmarBreakIterator =
|
|
||||||
readBreakIterator("Myanmar.brk");
|
|
||||||
|
|
||||||
// TODO: deprecate this boolean? you only care if you are doing super-expert stuff...
|
// TODO: deprecate this boolean? you only care if you are doing super-expert stuff...
|
||||||
private final boolean cjkAsWords;
|
private final boolean cjkAsWords;
|
||||||
|
@ -94,7 +92,6 @@ public class DefaultICUTokenizerConfig extends ICUTokenizerConfig {
|
||||||
public BreakIterator getBreakIterator(int script) {
|
public BreakIterator getBreakIterator(int script) {
|
||||||
switch(script) {
|
switch(script) {
|
||||||
case UScript.KHMER: return (BreakIterator)khmerBreakIterator.clone();
|
case UScript.KHMER: return (BreakIterator)khmerBreakIterator.clone();
|
||||||
case UScript.MYANMAR: return (BreakIterator)myanmarBreakIterator.clone();
|
|
||||||
case UScript.JAPANESE: return (BreakIterator)cjkBreakIterator.clone();
|
case UScript.JAPANESE: return (BreakIterator)cjkBreakIterator.clone();
|
||||||
default: return (BreakIterator)defaultBreakIterator.clone();
|
default: return (BreakIterator)defaultBreakIterator.clone();
|
||||||
}
|
}
|
||||||
|
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -122,6 +122,10 @@ public class TestICUTokenizer extends BaseTokenStreamTestCase {
|
||||||
assertAnalyzesTo(a, "ພາສາລາວ", new String[] { "ພາສາ", "ລາວ"}, new String[] { "<ALPHANUM>", "<ALPHANUM>" });
|
assertAnalyzesTo(a, "ພາສາລາວ", new String[] { "ພາສາ", "ລາວ"}, new String[] { "<ALPHANUM>", "<ALPHANUM>" });
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testMyanmar() throws Exception {
|
||||||
|
assertAnalyzesTo(a, "သက်ဝင်လှုပ်ရှားစေပြီး", new String[] { "သက်ဝင်", "လှုပ်ရှား", "စေ", "ပြီး" });
|
||||||
|
}
|
||||||
|
|
||||||
public void testThai() throws Exception {
|
public void testThai() throws Exception {
|
||||||
assertAnalyzesTo(a, "การที่ได้ต้องแสดงว่างานดี. แล้วเธอจะไปไหน? ๑๒๓๔",
|
assertAnalyzesTo(a, "การที่ได้ต้องแสดงว่างานดี. แล้วเธอจะไปไหน? ๑๒๓๔",
|
||||||
new String[] { "การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี", "แล้ว", "เธอ", "จะ", "ไป", "ไหน", "๑๒๓๔"});
|
new String[] { "การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี", "แล้ว", "เธอ", "จะ", "ไป", "ไหน", "๑๒๓๔"});
|
||||||
|
|
|
@ -63,7 +63,7 @@ import java.util.regex.Pattern;
|
||||||
public class GenerateUTR30DataFiles {
|
public class GenerateUTR30DataFiles {
|
||||||
private static final String ICU_SVN_TAG_URL
|
private static final String ICU_SVN_TAG_URL
|
||||||
= "http://source.icu-project.org/repos/icu/icu/tags";
|
= "http://source.icu-project.org/repos/icu/icu/tags";
|
||||||
private static final String ICU_RELEASE_TAG = "release-52-1";
|
private static final String ICU_RELEASE_TAG = "release-54-1";
|
||||||
private static final String ICU_DATA_NORM2_PATH = "source/data/unidata/norm2";
|
private static final String ICU_DATA_NORM2_PATH = "source/data/unidata/norm2";
|
||||||
private static final String NFC_TXT = "nfc.txt";
|
private static final String NFC_TXT = "nfc.txt";
|
||||||
private static final String NFKC_TXT = "nfkc.txt";
|
private static final String NFKC_TXT = "nfkc.txt";
|
||||||
|
|
|
@ -51,7 +51,7 @@ public class Lucene40DocValuesFormat extends DocValuesFormat {
|
||||||
String filename = IndexFileNames.segmentFileName(state.segmentInfo.name,
|
String filename = IndexFileNames.segmentFileName(state.segmentInfo.name,
|
||||||
"dv",
|
"dv",
|
||||||
Lucene40CompoundFormat.COMPOUND_FILE_EXTENSION);
|
Lucene40CompoundFormat.COMPOUND_FILE_EXTENSION);
|
||||||
return new Lucene40DocValuesReader(state, filename, Lucene40FieldInfosReader.LEGACY_DV_TYPE_KEY);
|
return new Lucene40DocValuesReader(state, filename, Lucene40FieldInfosFormat.LEGACY_DV_TYPE_KEY);
|
||||||
}
|
}
|
||||||
|
|
||||||
// constants for VAR_INTS
|
// constants for VAR_INTS
|
||||||
|
|
|
@ -24,7 +24,7 @@ import java.util.concurrent.atomic.AtomicLong;
|
||||||
|
|
||||||
import org.apache.lucene.codecs.CodecUtil;
|
import org.apache.lucene.codecs.CodecUtil;
|
||||||
import org.apache.lucene.codecs.DocValuesProducer;
|
import org.apache.lucene.codecs.DocValuesProducer;
|
||||||
import org.apache.lucene.codecs.lucene40.Lucene40FieldInfosReader.LegacyDocValuesType;
|
import org.apache.lucene.codecs.lucene40.Lucene40FieldInfosFormat.LegacyDocValuesType;
|
||||||
import org.apache.lucene.index.BinaryDocValues;
|
import org.apache.lucene.index.BinaryDocValues;
|
||||||
import org.apache.lucene.index.CorruptIndexException;
|
import org.apache.lucene.index.CorruptIndexException;
|
||||||
import org.apache.lucene.index.FieldInfo;
|
import org.apache.lucene.index.FieldInfo;
|
||||||
|
|
|
@ -18,10 +18,22 @@ package org.apache.lucene.codecs.lucene40;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
import org.apache.lucene.codecs.CodecUtil;
|
||||||
import org.apache.lucene.codecs.FieldInfosFormat;
|
import org.apache.lucene.codecs.FieldInfosFormat;
|
||||||
import org.apache.lucene.codecs.FieldInfosReader;
|
import org.apache.lucene.index.CorruptIndexException;
|
||||||
import org.apache.lucene.codecs.FieldInfosWriter;
|
import org.apache.lucene.index.FieldInfo;
|
||||||
|
import org.apache.lucene.index.FieldInfos;
|
||||||
|
import org.apache.lucene.index.IndexFileNames;
|
||||||
|
import org.apache.lucene.index.SegmentInfo;
|
||||||
|
import org.apache.lucene.index.FieldInfo.DocValuesType;
|
||||||
|
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.store.IOContext;
|
||||||
|
import org.apache.lucene.store.IndexInput;
|
||||||
|
import org.apache.lucene.util.IOUtils;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Lucene 4.0 Field Infos format.
|
* Lucene 4.0 Field Infos format.
|
||||||
|
@ -29,22 +41,119 @@ import org.apache.lucene.codecs.FieldInfosWriter;
|
||||||
*/
|
*/
|
||||||
@Deprecated
|
@Deprecated
|
||||||
public class Lucene40FieldInfosFormat extends FieldInfosFormat {
|
public class Lucene40FieldInfosFormat extends FieldInfosFormat {
|
||||||
private final FieldInfosReader reader = new Lucene40FieldInfosReader();
|
|
||||||
|
|
||||||
/** Sole constructor. */
|
/** Sole constructor. */
|
||||||
public Lucene40FieldInfosFormat() {
|
public Lucene40FieldInfosFormat() {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public final FieldInfosReader getFieldInfosReader() throws IOException {
|
public final FieldInfos read(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, IOContext iocontext) throws IOException {
|
||||||
return reader;
|
final String fileName = IndexFileNames.segmentFileName(segmentInfo.name, "", Lucene40FieldInfosFormat.FIELD_INFOS_EXTENSION);
|
||||||
}
|
IndexInput input = directory.openInput(fileName, iocontext);
|
||||||
|
|
||||||
|
boolean success = false;
|
||||||
|
try {
|
||||||
|
CodecUtil.checkHeader(input, Lucene40FieldInfosFormat.CODEC_NAME,
|
||||||
|
Lucene40FieldInfosFormat.FORMAT_START,
|
||||||
|
Lucene40FieldInfosFormat.FORMAT_CURRENT);
|
||||||
|
|
||||||
@Override
|
final int size = input.readVInt(); //read in the size
|
||||||
public FieldInfosWriter getFieldInfosWriter() throws IOException {
|
FieldInfo infos[] = new FieldInfo[size];
|
||||||
throw new UnsupportedOperationException("this codec can only be used for reading");
|
|
||||||
|
for (int i = 0; i < size; i++) {
|
||||||
|
String name = input.readString();
|
||||||
|
final int fieldNumber = input.readVInt();
|
||||||
|
byte bits = input.readByte();
|
||||||
|
boolean isIndexed = (bits & Lucene40FieldInfosFormat.IS_INDEXED) != 0;
|
||||||
|
boolean storeTermVector = (bits & Lucene40FieldInfosFormat.STORE_TERMVECTOR) != 0;
|
||||||
|
boolean omitNorms = (bits & Lucene40FieldInfosFormat.OMIT_NORMS) != 0;
|
||||||
|
boolean storePayloads = (bits & Lucene40FieldInfosFormat.STORE_PAYLOADS) != 0;
|
||||||
|
final IndexOptions indexOptions;
|
||||||
|
if (!isIndexed) {
|
||||||
|
indexOptions = null;
|
||||||
|
} else if ((bits & Lucene40FieldInfosFormat.OMIT_TERM_FREQ_AND_POSITIONS) != 0) {
|
||||||
|
indexOptions = IndexOptions.DOCS_ONLY;
|
||||||
|
} else if ((bits & Lucene40FieldInfosFormat.OMIT_POSITIONS) != 0) {
|
||||||
|
indexOptions = IndexOptions.DOCS_AND_FREQS;
|
||||||
|
} else if ((bits & Lucene40FieldInfosFormat.STORE_OFFSETS_IN_POSTINGS) != 0) {
|
||||||
|
indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
|
||||||
|
} else {
|
||||||
|
indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
|
||||||
|
}
|
||||||
|
|
||||||
|
// LUCENE-3027: past indices were able to write
|
||||||
|
// storePayloads=true when omitTFAP is also true,
|
||||||
|
// which is invalid. We correct that, here:
|
||||||
|
if (isIndexed && indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
|
||||||
|
storePayloads = false;
|
||||||
|
}
|
||||||
|
// DV Types are packed in one byte
|
||||||
|
byte val = input.readByte();
|
||||||
|
final LegacyDocValuesType oldValuesType = getDocValuesType((byte) (val & 0x0F));
|
||||||
|
final LegacyDocValuesType oldNormsType = getDocValuesType((byte) ((val >>> 4) & 0x0F));
|
||||||
|
final Map<String,String> attributes = input.readStringStringMap();;
|
||||||
|
if (oldValuesType.mapping != null) {
|
||||||
|
attributes.put(LEGACY_DV_TYPE_KEY, oldValuesType.name());
|
||||||
|
}
|
||||||
|
if (oldNormsType.mapping != null) {
|
||||||
|
if (oldNormsType.mapping != DocValuesType.NUMERIC) {
|
||||||
|
throw new CorruptIndexException("invalid norm type: " + oldNormsType, input);
|
||||||
|
}
|
||||||
|
attributes.put(LEGACY_NORM_TYPE_KEY, oldNormsType.name());
|
||||||
|
}
|
||||||
|
infos[i] = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector,
|
||||||
|
omitNorms, storePayloads, indexOptions, oldValuesType.mapping, oldNormsType.mapping, -1, Collections.unmodifiableMap(attributes));
|
||||||
|
}
|
||||||
|
|
||||||
|
CodecUtil.checkEOF(input);
|
||||||
|
FieldInfos fieldInfos = new FieldInfos(infos);
|
||||||
|
success = true;
|
||||||
|
return fieldInfos;
|
||||||
|
} finally {
|
||||||
|
if (success) {
|
||||||
|
input.close();
|
||||||
|
} else {
|
||||||
|
IOUtils.closeWhileHandlingException(input);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static final String LEGACY_DV_TYPE_KEY = Lucene40FieldInfosFormat.class.getSimpleName() + ".dvtype";
|
||||||
|
static final String LEGACY_NORM_TYPE_KEY = Lucene40FieldInfosFormat.class.getSimpleName() + ".normtype";
|
||||||
|
|
||||||
|
// mapping of 4.0 types -> 4.2 types
|
||||||
|
static enum LegacyDocValuesType {
|
||||||
|
NONE(null),
|
||||||
|
VAR_INTS(DocValuesType.NUMERIC),
|
||||||
|
FLOAT_32(DocValuesType.NUMERIC),
|
||||||
|
FLOAT_64(DocValuesType.NUMERIC),
|
||||||
|
BYTES_FIXED_STRAIGHT(DocValuesType.BINARY),
|
||||||
|
BYTES_FIXED_DEREF(DocValuesType.BINARY),
|
||||||
|
BYTES_VAR_STRAIGHT(DocValuesType.BINARY),
|
||||||
|
BYTES_VAR_DEREF(DocValuesType.BINARY),
|
||||||
|
FIXED_INTS_16(DocValuesType.NUMERIC),
|
||||||
|
FIXED_INTS_32(DocValuesType.NUMERIC),
|
||||||
|
FIXED_INTS_64(DocValuesType.NUMERIC),
|
||||||
|
FIXED_INTS_8(DocValuesType.NUMERIC),
|
||||||
|
BYTES_FIXED_SORTED(DocValuesType.SORTED),
|
||||||
|
BYTES_VAR_SORTED(DocValuesType.SORTED);
|
||||||
|
|
||||||
|
final DocValuesType mapping;
|
||||||
|
LegacyDocValuesType(DocValuesType mapping) {
|
||||||
|
this.mapping = mapping;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// decodes a 4.0 type
|
||||||
|
private static LegacyDocValuesType getDocValuesType(byte b) {
|
||||||
|
return LegacyDocValuesType.values()[b];
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void write(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, FieldInfos infos, IOContext context) throws IOException {
|
||||||
|
throw new UnsupportedOperationException("this codec can only be used for reading");
|
||||||
|
}
|
||||||
|
|
||||||
/** Extension of field infos */
|
/** Extension of field infos */
|
||||||
static final String FIELD_INFOS_EXTENSION = "fnm";
|
static final String FIELD_INFOS_EXTENSION = "fnm";
|
||||||
|
|
||||||
|
|
|
@ -1,151 +0,0 @@
|
||||||
package org.apache.lucene.codecs.lucene40;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
import org.apache.lucene.codecs.CodecUtil;
|
|
||||||
import org.apache.lucene.codecs.FieldInfosReader;
|
|
||||||
import org.apache.lucene.index.CorruptIndexException;
|
|
||||||
import org.apache.lucene.index.FieldInfo;
|
|
||||||
import org.apache.lucene.index.FieldInfos;
|
|
||||||
import org.apache.lucene.index.IndexFileNames;
|
|
||||||
import org.apache.lucene.index.FieldInfo.DocValuesType;
|
|
||||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
|
||||||
import org.apache.lucene.index.SegmentInfo;
|
|
||||||
import org.apache.lucene.store.Directory;
|
|
||||||
import org.apache.lucene.store.IOContext;
|
|
||||||
import org.apache.lucene.store.IndexInput;
|
|
||||||
import org.apache.lucene.util.IOUtils;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Lucene 4.0 FieldInfos reader.
|
|
||||||
* @deprecated Only for reading old 4.0 and 4.1 segments
|
|
||||||
*/
|
|
||||||
@Deprecated
|
|
||||||
final class Lucene40FieldInfosReader extends FieldInfosReader {
|
|
||||||
|
|
||||||
/** Sole constructor. */
|
|
||||||
public Lucene40FieldInfosReader() {
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public FieldInfos read(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, IOContext iocontext) throws IOException {
|
|
||||||
final String fileName = IndexFileNames.segmentFileName(segmentInfo.name, "", Lucene40FieldInfosFormat.FIELD_INFOS_EXTENSION);
|
|
||||||
IndexInput input = directory.openInput(fileName, iocontext);
|
|
||||||
|
|
||||||
boolean success = false;
|
|
||||||
try {
|
|
||||||
CodecUtil.checkHeader(input, Lucene40FieldInfosFormat.CODEC_NAME,
|
|
||||||
Lucene40FieldInfosFormat.FORMAT_START,
|
|
||||||
Lucene40FieldInfosFormat.FORMAT_CURRENT);
|
|
||||||
|
|
||||||
final int size = input.readVInt(); //read in the size
|
|
||||||
FieldInfo infos[] = new FieldInfo[size];
|
|
||||||
|
|
||||||
for (int i = 0; i < size; i++) {
|
|
||||||
String name = input.readString();
|
|
||||||
final int fieldNumber = input.readVInt();
|
|
||||||
byte bits = input.readByte();
|
|
||||||
boolean isIndexed = (bits & Lucene40FieldInfosFormat.IS_INDEXED) != 0;
|
|
||||||
boolean storeTermVector = (bits & Lucene40FieldInfosFormat.STORE_TERMVECTOR) != 0;
|
|
||||||
boolean omitNorms = (bits & Lucene40FieldInfosFormat.OMIT_NORMS) != 0;
|
|
||||||
boolean storePayloads = (bits & Lucene40FieldInfosFormat.STORE_PAYLOADS) != 0;
|
|
||||||
final IndexOptions indexOptions;
|
|
||||||
if (!isIndexed) {
|
|
||||||
indexOptions = null;
|
|
||||||
} else if ((bits & Lucene40FieldInfosFormat.OMIT_TERM_FREQ_AND_POSITIONS) != 0) {
|
|
||||||
indexOptions = IndexOptions.DOCS_ONLY;
|
|
||||||
} else if ((bits & Lucene40FieldInfosFormat.OMIT_POSITIONS) != 0) {
|
|
||||||
indexOptions = IndexOptions.DOCS_AND_FREQS;
|
|
||||||
} else if ((bits & Lucene40FieldInfosFormat.STORE_OFFSETS_IN_POSTINGS) != 0) {
|
|
||||||
indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
|
|
||||||
} else {
|
|
||||||
indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
|
|
||||||
}
|
|
||||||
|
|
||||||
// LUCENE-3027: past indices were able to write
|
|
||||||
// storePayloads=true when omitTFAP is also true,
|
|
||||||
// which is invalid. We correct that, here:
|
|
||||||
if (isIndexed && indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
|
|
||||||
storePayloads = false;
|
|
||||||
}
|
|
||||||
// DV Types are packed in one byte
|
|
||||||
byte val = input.readByte();
|
|
||||||
final LegacyDocValuesType oldValuesType = getDocValuesType((byte) (val & 0x0F));
|
|
||||||
final LegacyDocValuesType oldNormsType = getDocValuesType((byte) ((val >>> 4) & 0x0F));
|
|
||||||
final Map<String,String> attributes = input.readStringStringMap();;
|
|
||||||
if (oldValuesType.mapping != null) {
|
|
||||||
attributes.put(LEGACY_DV_TYPE_KEY, oldValuesType.name());
|
|
||||||
}
|
|
||||||
if (oldNormsType.mapping != null) {
|
|
||||||
if (oldNormsType.mapping != DocValuesType.NUMERIC) {
|
|
||||||
throw new CorruptIndexException("invalid norm type: " + oldNormsType, input);
|
|
||||||
}
|
|
||||||
attributes.put(LEGACY_NORM_TYPE_KEY, oldNormsType.name());
|
|
||||||
}
|
|
||||||
infos[i] = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector,
|
|
||||||
omitNorms, storePayloads, indexOptions, oldValuesType.mapping, oldNormsType.mapping, -1, Collections.unmodifiableMap(attributes));
|
|
||||||
}
|
|
||||||
|
|
||||||
CodecUtil.checkEOF(input);
|
|
||||||
FieldInfos fieldInfos = new FieldInfos(infos);
|
|
||||||
success = true;
|
|
||||||
return fieldInfos;
|
|
||||||
} finally {
|
|
||||||
if (success) {
|
|
||||||
input.close();
|
|
||||||
} else {
|
|
||||||
IOUtils.closeWhileHandlingException(input);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static final String LEGACY_DV_TYPE_KEY = Lucene40FieldInfosReader.class.getSimpleName() + ".dvtype";
|
|
||||||
static final String LEGACY_NORM_TYPE_KEY = Lucene40FieldInfosReader.class.getSimpleName() + ".normtype";
|
|
||||||
|
|
||||||
// mapping of 4.0 types -> 4.2 types
|
|
||||||
static enum LegacyDocValuesType {
|
|
||||||
NONE(null),
|
|
||||||
VAR_INTS(DocValuesType.NUMERIC),
|
|
||||||
FLOAT_32(DocValuesType.NUMERIC),
|
|
||||||
FLOAT_64(DocValuesType.NUMERIC),
|
|
||||||
BYTES_FIXED_STRAIGHT(DocValuesType.BINARY),
|
|
||||||
BYTES_FIXED_DEREF(DocValuesType.BINARY),
|
|
||||||
BYTES_VAR_STRAIGHT(DocValuesType.BINARY),
|
|
||||||
BYTES_VAR_DEREF(DocValuesType.BINARY),
|
|
||||||
FIXED_INTS_16(DocValuesType.NUMERIC),
|
|
||||||
FIXED_INTS_32(DocValuesType.NUMERIC),
|
|
||||||
FIXED_INTS_64(DocValuesType.NUMERIC),
|
|
||||||
FIXED_INTS_8(DocValuesType.NUMERIC),
|
|
||||||
BYTES_FIXED_SORTED(DocValuesType.SORTED),
|
|
||||||
BYTES_VAR_SORTED(DocValuesType.SORTED);
|
|
||||||
|
|
||||||
final DocValuesType mapping;
|
|
||||||
LegacyDocValuesType(DocValuesType mapping) {
|
|
||||||
this.mapping = mapping;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// decodes a 4.0 type
|
|
||||||
private static LegacyDocValuesType getDocValuesType(byte b) {
|
|
||||||
return LegacyDocValuesType.values()[b];
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -40,7 +40,7 @@ final class Lucene40NormsReader extends NormsProducer {
|
||||||
}
|
}
|
||||||
|
|
||||||
Lucene40NormsReader(SegmentReadState state, String filename) throws IOException {
|
Lucene40NormsReader(SegmentReadState state, String filename) throws IOException {
|
||||||
impl = new Lucene40DocValuesReader(state, filename, Lucene40FieldInfosReader.LEGACY_NORM_TYPE_KEY);
|
impl = new Lucene40DocValuesReader(state, filename, Lucene40FieldInfosFormat.LEGACY_NORM_TYPE_KEY);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -17,10 +17,21 @@ package org.apache.lucene.codecs.lucene40;
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.text.ParseException;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
import org.apache.lucene.codecs.CodecUtil;
|
||||||
import org.apache.lucene.codecs.SegmentInfoFormat;
|
import org.apache.lucene.codecs.SegmentInfoFormat;
|
||||||
import org.apache.lucene.codecs.SegmentInfoReader;
|
import org.apache.lucene.index.CorruptIndexException;
|
||||||
import org.apache.lucene.codecs.SegmentInfoWriter;
|
import org.apache.lucene.index.IndexFileNames;
|
||||||
import org.apache.lucene.index.SegmentInfo;
|
import org.apache.lucene.index.SegmentInfo;
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.store.IOContext;
|
||||||
|
import org.apache.lucene.store.IndexInput;
|
||||||
|
import org.apache.lucene.util.IOUtils;
|
||||||
|
import org.apache.lucene.util.Version;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Lucene 4.0 Segment info format.
|
* Lucene 4.0 Segment info format.
|
||||||
|
@ -28,19 +39,55 @@ import org.apache.lucene.index.SegmentInfo;
|
||||||
*/
|
*/
|
||||||
@Deprecated
|
@Deprecated
|
||||||
public class Lucene40SegmentInfoFormat extends SegmentInfoFormat {
|
public class Lucene40SegmentInfoFormat extends SegmentInfoFormat {
|
||||||
private final SegmentInfoReader reader = new Lucene40SegmentInfoReader();
|
|
||||||
|
|
||||||
/** Sole constructor. */
|
/** Sole constructor. */
|
||||||
public Lucene40SegmentInfoFormat() {
|
public Lucene40SegmentInfoFormat() {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public final SegmentInfoReader getSegmentInfoReader() {
|
public final SegmentInfo read(Directory dir, String segment, IOContext context) throws IOException {
|
||||||
return reader;
|
final String fileName = IndexFileNames.segmentFileName(segment, "", Lucene40SegmentInfoFormat.SI_EXTENSION);
|
||||||
|
final IndexInput input = dir.openInput(fileName, context);
|
||||||
|
boolean success = false;
|
||||||
|
try {
|
||||||
|
CodecUtil.checkHeader(input, Lucene40SegmentInfoFormat.CODEC_NAME,
|
||||||
|
Lucene40SegmentInfoFormat.VERSION_START,
|
||||||
|
Lucene40SegmentInfoFormat.VERSION_CURRENT);
|
||||||
|
final Version version;
|
||||||
|
try {
|
||||||
|
version = Version.parse(input.readString());
|
||||||
|
} catch (ParseException pe) {
|
||||||
|
throw new CorruptIndexException("unable to parse version string: " + pe.getMessage(), input, pe);
|
||||||
|
}
|
||||||
|
final int docCount = input.readInt();
|
||||||
|
if (docCount < 0) {
|
||||||
|
throw new CorruptIndexException("invalid docCount: " + docCount, input);
|
||||||
|
}
|
||||||
|
final boolean isCompoundFile = input.readByte() == SegmentInfo.YES;
|
||||||
|
final Map<String,String> diagnostics = input.readStringStringMap();
|
||||||
|
input.readStringStringMap(); // read deprecated attributes
|
||||||
|
final Set<String> files = input.readStringSet();
|
||||||
|
|
||||||
|
CodecUtil.checkEOF(input);
|
||||||
|
|
||||||
|
final SegmentInfo si = new SegmentInfo(dir, version, segment, docCount, isCompoundFile, null, diagnostics, null);
|
||||||
|
si.setFiles(files);
|
||||||
|
|
||||||
|
success = true;
|
||||||
|
|
||||||
|
return si;
|
||||||
|
|
||||||
|
} finally {
|
||||||
|
if (!success) {
|
||||||
|
IOUtils.closeWhileHandlingException(input);
|
||||||
|
} else {
|
||||||
|
input.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public SegmentInfoWriter getSegmentInfoWriter() {
|
public void write(Directory dir, SegmentInfo info, IOContext ioContext) throws IOException {
|
||||||
throw new UnsupportedOperationException("this codec can only be used for reading");
|
throw new UnsupportedOperationException("this codec can only be used for reading");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,88 +0,0 @@
|
||||||
package org.apache.lucene.codecs.lucene40;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.text.ParseException;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.Set;
|
|
||||||
|
|
||||||
import org.apache.lucene.codecs.CodecUtil;
|
|
||||||
import org.apache.lucene.codecs.SegmentInfoReader;
|
|
||||||
import org.apache.lucene.index.CorruptIndexException;
|
|
||||||
import org.apache.lucene.index.IndexFileNames;
|
|
||||||
import org.apache.lucene.index.SegmentInfo;
|
|
||||||
import org.apache.lucene.store.Directory;
|
|
||||||
import org.apache.lucene.store.IOContext;
|
|
||||||
import org.apache.lucene.store.IndexInput;
|
|
||||||
import org.apache.lucene.util.IOUtils;
|
|
||||||
import org.apache.lucene.util.Version;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Lucene 4.0 SI reader
|
|
||||||
* @deprecated Only for reading old 4.0-4.5 segments
|
|
||||||
*/
|
|
||||||
@Deprecated
|
|
||||||
final class Lucene40SegmentInfoReader extends SegmentInfoReader {
|
|
||||||
|
|
||||||
/** Sole constructor. */
|
|
||||||
public Lucene40SegmentInfoReader() {
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public SegmentInfo read(Directory dir, String segment, IOContext context) throws IOException {
|
|
||||||
final String fileName = IndexFileNames.segmentFileName(segment, "", Lucene40SegmentInfoFormat.SI_EXTENSION);
|
|
||||||
final IndexInput input = dir.openInput(fileName, context);
|
|
||||||
boolean success = false;
|
|
||||||
try {
|
|
||||||
CodecUtil.checkHeader(input, Lucene40SegmentInfoFormat.CODEC_NAME,
|
|
||||||
Lucene40SegmentInfoFormat.VERSION_START,
|
|
||||||
Lucene40SegmentInfoFormat.VERSION_CURRENT);
|
|
||||||
final Version version;
|
|
||||||
try {
|
|
||||||
version = Version.parse(input.readString());
|
|
||||||
} catch (ParseException pe) {
|
|
||||||
throw new CorruptIndexException("unable to parse version string: " + pe.getMessage(), input, pe);
|
|
||||||
}
|
|
||||||
final int docCount = input.readInt();
|
|
||||||
if (docCount < 0) {
|
|
||||||
throw new CorruptIndexException("invalid docCount: " + docCount, input);
|
|
||||||
}
|
|
||||||
final boolean isCompoundFile = input.readByte() == SegmentInfo.YES;
|
|
||||||
final Map<String,String> diagnostics = input.readStringStringMap();
|
|
||||||
input.readStringStringMap(); // read deprecated attributes
|
|
||||||
final Set<String> files = input.readStringSet();
|
|
||||||
|
|
||||||
CodecUtil.checkEOF(input);
|
|
||||||
|
|
||||||
final SegmentInfo si = new SegmentInfo(dir, version, segment, docCount, isCompoundFile, null, diagnostics, null);
|
|
||||||
si.setFiles(files);
|
|
||||||
|
|
||||||
success = true;
|
|
||||||
|
|
||||||
return si;
|
|
||||||
|
|
||||||
} finally {
|
|
||||||
if (!success) {
|
|
||||||
IOUtils.closeWhileHandlingException(input);
|
|
||||||
} else {
|
|
||||||
input.close();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -18,10 +18,22 @@ package org.apache.lucene.codecs.lucene42;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
import org.apache.lucene.codecs.CodecUtil;
|
||||||
import org.apache.lucene.codecs.FieldInfosFormat;
|
import org.apache.lucene.codecs.FieldInfosFormat;
|
||||||
import org.apache.lucene.codecs.FieldInfosReader;
|
import org.apache.lucene.index.CorruptIndexException;
|
||||||
import org.apache.lucene.codecs.FieldInfosWriter;
|
import org.apache.lucene.index.FieldInfo;
|
||||||
|
import org.apache.lucene.index.FieldInfos;
|
||||||
|
import org.apache.lucene.index.IndexFileNames;
|
||||||
|
import org.apache.lucene.index.SegmentInfo;
|
||||||
|
import org.apache.lucene.index.FieldInfo.DocValuesType;
|
||||||
|
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.store.IOContext;
|
||||||
|
import org.apache.lucene.store.IndexInput;
|
||||||
|
import org.apache.lucene.util.IOUtils;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Lucene 4.2 Field Infos format.
|
* Lucene 4.2 Field Infos format.
|
||||||
|
@ -29,22 +41,89 @@ import org.apache.lucene.codecs.FieldInfosWriter;
|
||||||
*/
|
*/
|
||||||
@Deprecated
|
@Deprecated
|
||||||
public class Lucene42FieldInfosFormat extends FieldInfosFormat {
|
public class Lucene42FieldInfosFormat extends FieldInfosFormat {
|
||||||
private final FieldInfosReader reader = new Lucene42FieldInfosReader();
|
|
||||||
|
|
||||||
/** Sole constructor. */
|
/** Sole constructor. */
|
||||||
public Lucene42FieldInfosFormat() {
|
public Lucene42FieldInfosFormat() {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public final FieldInfosReader getFieldInfosReader() throws IOException {
|
public FieldInfos read(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, IOContext iocontext) throws IOException {
|
||||||
return reader;
|
final String fileName = IndexFileNames.segmentFileName(segmentInfo.name, "", Lucene42FieldInfosFormat.EXTENSION);
|
||||||
}
|
IndexInput input = directory.openInput(fileName, iocontext);
|
||||||
|
|
||||||
|
boolean success = false;
|
||||||
|
try {
|
||||||
|
CodecUtil.checkHeader(input, Lucene42FieldInfosFormat.CODEC_NAME,
|
||||||
|
Lucene42FieldInfosFormat.FORMAT_START,
|
||||||
|
Lucene42FieldInfosFormat.FORMAT_CURRENT);
|
||||||
|
|
||||||
@Override
|
final int size = input.readVInt(); //read in the size
|
||||||
public FieldInfosWriter getFieldInfosWriter() throws IOException {
|
FieldInfo infos[] = new FieldInfo[size];
|
||||||
throw new UnsupportedOperationException("this codec can only be used for reading");
|
|
||||||
|
for (int i = 0; i < size; i++) {
|
||||||
|
String name = input.readString();
|
||||||
|
final int fieldNumber = input.readVInt();
|
||||||
|
byte bits = input.readByte();
|
||||||
|
boolean isIndexed = (bits & Lucene42FieldInfosFormat.IS_INDEXED) != 0;
|
||||||
|
boolean storeTermVector = (bits & Lucene42FieldInfosFormat.STORE_TERMVECTOR) != 0;
|
||||||
|
boolean omitNorms = (bits & Lucene42FieldInfosFormat.OMIT_NORMS) != 0;
|
||||||
|
boolean storePayloads = (bits & Lucene42FieldInfosFormat.STORE_PAYLOADS) != 0;
|
||||||
|
final IndexOptions indexOptions;
|
||||||
|
if (!isIndexed) {
|
||||||
|
indexOptions = null;
|
||||||
|
} else if ((bits & Lucene42FieldInfosFormat.OMIT_TERM_FREQ_AND_POSITIONS) != 0) {
|
||||||
|
indexOptions = IndexOptions.DOCS_ONLY;
|
||||||
|
} else if ((bits & Lucene42FieldInfosFormat.OMIT_POSITIONS) != 0) {
|
||||||
|
indexOptions = IndexOptions.DOCS_AND_FREQS;
|
||||||
|
} else if ((bits & Lucene42FieldInfosFormat.STORE_OFFSETS_IN_POSTINGS) != 0) {
|
||||||
|
indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
|
||||||
|
} else {
|
||||||
|
indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
|
||||||
|
}
|
||||||
|
|
||||||
|
// DV Types are packed in one byte
|
||||||
|
byte val = input.readByte();
|
||||||
|
final DocValuesType docValuesType = getDocValuesType(input, (byte) (val & 0x0F));
|
||||||
|
final DocValuesType normsType = getDocValuesType(input, (byte) ((val >>> 4) & 0x0F));
|
||||||
|
final Map<String,String> attributes = input.readStringStringMap();
|
||||||
|
infos[i] = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector,
|
||||||
|
omitNorms, storePayloads, indexOptions, docValuesType, normsType, -1, Collections.unmodifiableMap(attributes));
|
||||||
|
}
|
||||||
|
|
||||||
|
CodecUtil.checkEOF(input);
|
||||||
|
FieldInfos fieldInfos = new FieldInfos(infos);
|
||||||
|
success = true;
|
||||||
|
return fieldInfos;
|
||||||
|
} finally {
|
||||||
|
if (success) {
|
||||||
|
input.close();
|
||||||
|
} else {
|
||||||
|
IOUtils.closeWhileHandlingException(input);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static DocValuesType getDocValuesType(IndexInput input, byte b) throws IOException {
|
||||||
|
if (b == 0) {
|
||||||
|
return null;
|
||||||
|
} else if (b == 1) {
|
||||||
|
return DocValuesType.NUMERIC;
|
||||||
|
} else if (b == 2) {
|
||||||
|
return DocValuesType.BINARY;
|
||||||
|
} else if (b == 3) {
|
||||||
|
return DocValuesType.SORTED;
|
||||||
|
} else if (b == 4) {
|
||||||
|
return DocValuesType.SORTED_SET;
|
||||||
|
} else {
|
||||||
|
throw new CorruptIndexException("invalid docvalues byte: " + b, input);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void write(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, FieldInfos infos, IOContext context) throws IOException {
|
||||||
|
throw new UnsupportedOperationException("this codec can only be used for reading");
|
||||||
|
}
|
||||||
|
|
||||||
/** Extension of field infos */
|
/** Extension of field infos */
|
||||||
static final String EXTENSION = "fnm";
|
static final String EXTENSION = "fnm";
|
||||||
|
|
||||||
|
|
|
@ -1,122 +0,0 @@
|
||||||
package org.apache.lucene.codecs.lucene42;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
import org.apache.lucene.codecs.CodecUtil;
|
|
||||||
import org.apache.lucene.codecs.FieldInfosReader;
|
|
||||||
import org.apache.lucene.index.CorruptIndexException;
|
|
||||||
import org.apache.lucene.index.FieldInfo;
|
|
||||||
import org.apache.lucene.index.FieldInfos;
|
|
||||||
import org.apache.lucene.index.IndexFileNames;
|
|
||||||
import org.apache.lucene.index.FieldInfo.DocValuesType;
|
|
||||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
|
||||||
import org.apache.lucene.index.SegmentInfo;
|
|
||||||
import org.apache.lucene.store.Directory;
|
|
||||||
import org.apache.lucene.store.IOContext;
|
|
||||||
import org.apache.lucene.store.IndexInput;
|
|
||||||
import org.apache.lucene.util.IOUtils;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Lucene 4.2 FieldInfos reader.
|
|
||||||
*
|
|
||||||
* @deprecated Only for reading old 4.2-4.5 segments
|
|
||||||
*/
|
|
||||||
@Deprecated
|
|
||||||
final class Lucene42FieldInfosReader extends FieldInfosReader {
|
|
||||||
|
|
||||||
/** Sole constructor. */
|
|
||||||
public Lucene42FieldInfosReader() {
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public FieldInfos read(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, IOContext iocontext) throws IOException {
|
|
||||||
final String fileName = IndexFileNames.segmentFileName(segmentInfo.name, "", Lucene42FieldInfosFormat.EXTENSION);
|
|
||||||
IndexInput input = directory.openInput(fileName, iocontext);
|
|
||||||
|
|
||||||
boolean success = false;
|
|
||||||
try {
|
|
||||||
CodecUtil.checkHeader(input, Lucene42FieldInfosFormat.CODEC_NAME,
|
|
||||||
Lucene42FieldInfosFormat.FORMAT_START,
|
|
||||||
Lucene42FieldInfosFormat.FORMAT_CURRENT);
|
|
||||||
|
|
||||||
final int size = input.readVInt(); //read in the size
|
|
||||||
FieldInfo infos[] = new FieldInfo[size];
|
|
||||||
|
|
||||||
for (int i = 0; i < size; i++) {
|
|
||||||
String name = input.readString();
|
|
||||||
final int fieldNumber = input.readVInt();
|
|
||||||
byte bits = input.readByte();
|
|
||||||
boolean isIndexed = (bits & Lucene42FieldInfosFormat.IS_INDEXED) != 0;
|
|
||||||
boolean storeTermVector = (bits & Lucene42FieldInfosFormat.STORE_TERMVECTOR) != 0;
|
|
||||||
boolean omitNorms = (bits & Lucene42FieldInfosFormat.OMIT_NORMS) != 0;
|
|
||||||
boolean storePayloads = (bits & Lucene42FieldInfosFormat.STORE_PAYLOADS) != 0;
|
|
||||||
final IndexOptions indexOptions;
|
|
||||||
if (!isIndexed) {
|
|
||||||
indexOptions = null;
|
|
||||||
} else if ((bits & Lucene42FieldInfosFormat.OMIT_TERM_FREQ_AND_POSITIONS) != 0) {
|
|
||||||
indexOptions = IndexOptions.DOCS_ONLY;
|
|
||||||
} else if ((bits & Lucene42FieldInfosFormat.OMIT_POSITIONS) != 0) {
|
|
||||||
indexOptions = IndexOptions.DOCS_AND_FREQS;
|
|
||||||
} else if ((bits & Lucene42FieldInfosFormat.STORE_OFFSETS_IN_POSTINGS) != 0) {
|
|
||||||
indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
|
|
||||||
} else {
|
|
||||||
indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
|
|
||||||
}
|
|
||||||
|
|
||||||
// DV Types are packed in one byte
|
|
||||||
byte val = input.readByte();
|
|
||||||
final DocValuesType docValuesType = getDocValuesType(input, (byte) (val & 0x0F));
|
|
||||||
final DocValuesType normsType = getDocValuesType(input, (byte) ((val >>> 4) & 0x0F));
|
|
||||||
final Map<String,String> attributes = input.readStringStringMap();
|
|
||||||
infos[i] = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector,
|
|
||||||
omitNorms, storePayloads, indexOptions, docValuesType, normsType, -1, Collections.unmodifiableMap(attributes));
|
|
||||||
}
|
|
||||||
|
|
||||||
CodecUtil.checkEOF(input);
|
|
||||||
FieldInfos fieldInfos = new FieldInfos(infos);
|
|
||||||
success = true;
|
|
||||||
return fieldInfos;
|
|
||||||
} finally {
|
|
||||||
if (success) {
|
|
||||||
input.close();
|
|
||||||
} else {
|
|
||||||
IOUtils.closeWhileHandlingException(input);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private static DocValuesType getDocValuesType(IndexInput input, byte b) throws IOException {
|
|
||||||
if (b == 0) {
|
|
||||||
return null;
|
|
||||||
} else if (b == 1) {
|
|
||||||
return DocValuesType.NUMERIC;
|
|
||||||
} else if (b == 2) {
|
|
||||||
return DocValuesType.BINARY;
|
|
||||||
} else if (b == 3) {
|
|
||||||
return DocValuesType.SORTED;
|
|
||||||
} else if (b == 4) {
|
|
||||||
return DocValuesType.SORTED_SET;
|
|
||||||
} else {
|
|
||||||
throw new CorruptIndexException("invalid docvalues byte: " + b, input);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -18,10 +18,23 @@ package org.apache.lucene.codecs.lucene46;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
import org.apache.lucene.codecs.CodecUtil;
|
||||||
import org.apache.lucene.codecs.FieldInfosFormat;
|
import org.apache.lucene.codecs.FieldInfosFormat;
|
||||||
import org.apache.lucene.codecs.FieldInfosReader;
|
import org.apache.lucene.index.CorruptIndexException;
|
||||||
import org.apache.lucene.codecs.FieldInfosWriter;
|
import org.apache.lucene.index.FieldInfo;
|
||||||
|
import org.apache.lucene.index.FieldInfos;
|
||||||
|
import org.apache.lucene.index.IndexFileNames;
|
||||||
|
import org.apache.lucene.index.SegmentInfo;
|
||||||
|
import org.apache.lucene.index.FieldInfo.DocValuesType;
|
||||||
|
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||||
|
import org.apache.lucene.store.ChecksumIndexInput;
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.store.IOContext;
|
||||||
|
import org.apache.lucene.store.IndexInput;
|
||||||
|
import org.apache.lucene.store.IndexOutput;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Lucene 4.6 Field Infos format.
|
* Lucene 4.6 Field Infos format.
|
||||||
|
@ -29,21 +42,139 @@ import org.apache.lucene.codecs.FieldInfosWriter;
|
||||||
*/
|
*/
|
||||||
@Deprecated
|
@Deprecated
|
||||||
public final class Lucene46FieldInfosFormat extends FieldInfosFormat {
|
public final class Lucene46FieldInfosFormat extends FieldInfosFormat {
|
||||||
private final FieldInfosReader reader = new Lucene46FieldInfosReader();
|
|
||||||
private final FieldInfosWriter writer = new Lucene46FieldInfosWriter();
|
|
||||||
|
|
||||||
/** Sole constructor. */
|
/** Sole constructor. */
|
||||||
public Lucene46FieldInfosFormat() {
|
public Lucene46FieldInfosFormat() {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public final FieldInfosReader getFieldInfosReader() throws IOException {
|
public FieldInfos read(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, IOContext context) throws IOException {
|
||||||
return reader;
|
final String fileName = IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, Lucene46FieldInfosFormat.EXTENSION);
|
||||||
|
try (ChecksumIndexInput input = directory.openChecksumInput(fileName, context)) {
|
||||||
|
int codecVersion = CodecUtil.checkHeader(input, Lucene46FieldInfosFormat.CODEC_NAME,
|
||||||
|
Lucene46FieldInfosFormat.FORMAT_START,
|
||||||
|
Lucene46FieldInfosFormat.FORMAT_CURRENT);
|
||||||
|
|
||||||
|
final int size = input.readVInt(); //read in the size
|
||||||
|
FieldInfo infos[] = new FieldInfo[size];
|
||||||
|
|
||||||
|
for (int i = 0; i < size; i++) {
|
||||||
|
String name = input.readString();
|
||||||
|
final int fieldNumber = input.readVInt();
|
||||||
|
if (fieldNumber < 0) {
|
||||||
|
throw new CorruptIndexException("invalid field number for field: " + name + ", fieldNumber=" + fieldNumber, input);
|
||||||
|
}
|
||||||
|
byte bits = input.readByte();
|
||||||
|
boolean isIndexed = (bits & Lucene46FieldInfosFormat.IS_INDEXED) != 0;
|
||||||
|
boolean storeTermVector = (bits & Lucene46FieldInfosFormat.STORE_TERMVECTOR) != 0;
|
||||||
|
boolean omitNorms = (bits & Lucene46FieldInfosFormat.OMIT_NORMS) != 0;
|
||||||
|
boolean storePayloads = (bits & Lucene46FieldInfosFormat.STORE_PAYLOADS) != 0;
|
||||||
|
final IndexOptions indexOptions;
|
||||||
|
if (!isIndexed) {
|
||||||
|
indexOptions = null;
|
||||||
|
} else if ((bits & Lucene46FieldInfosFormat.OMIT_TERM_FREQ_AND_POSITIONS) != 0) {
|
||||||
|
indexOptions = IndexOptions.DOCS_ONLY;
|
||||||
|
} else if ((bits & Lucene46FieldInfosFormat.OMIT_POSITIONS) != 0) {
|
||||||
|
indexOptions = IndexOptions.DOCS_AND_FREQS;
|
||||||
|
} else if ((bits & Lucene46FieldInfosFormat.STORE_OFFSETS_IN_POSTINGS) != 0) {
|
||||||
|
indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
|
||||||
|
} else {
|
||||||
|
indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
|
||||||
|
}
|
||||||
|
|
||||||
|
// DV Types are packed in one byte
|
||||||
|
byte val = input.readByte();
|
||||||
|
final DocValuesType docValuesType = getDocValuesType(input, (byte) (val & 0x0F));
|
||||||
|
final DocValuesType normsType = getDocValuesType(input, (byte) ((val >>> 4) & 0x0F));
|
||||||
|
final long dvGen = input.readLong();
|
||||||
|
final Map<String,String> attributes = input.readStringStringMap();
|
||||||
|
infos[i] = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector,
|
||||||
|
omitNorms, storePayloads, indexOptions, docValuesType, normsType, dvGen, Collections.unmodifiableMap(attributes));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (codecVersion >= Lucene46FieldInfosFormat.FORMAT_CHECKSUM) {
|
||||||
|
CodecUtil.checkFooter(input);
|
||||||
|
} else {
|
||||||
|
CodecUtil.checkEOF(input);
|
||||||
|
}
|
||||||
|
return new FieldInfos(infos);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static DocValuesType getDocValuesType(IndexInput input, byte b) throws IOException {
|
||||||
|
if (b == 0) {
|
||||||
|
return null;
|
||||||
|
} else if (b == 1) {
|
||||||
|
return DocValuesType.NUMERIC;
|
||||||
|
} else if (b == 2) {
|
||||||
|
return DocValuesType.BINARY;
|
||||||
|
} else if (b == 3) {
|
||||||
|
return DocValuesType.SORTED;
|
||||||
|
} else if (b == 4) {
|
||||||
|
return DocValuesType.SORTED_SET;
|
||||||
|
} else if (b == 5) {
|
||||||
|
return DocValuesType.SORTED_NUMERIC;
|
||||||
|
} else {
|
||||||
|
throw new CorruptIndexException("invalid docvalues byte: " + b, input);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public FieldInfosWriter getFieldInfosWriter() throws IOException {
|
public void write(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, FieldInfos infos, IOContext context) throws IOException {
|
||||||
return writer;
|
final String fileName = IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, Lucene46FieldInfosFormat.EXTENSION);
|
||||||
|
try (IndexOutput output = directory.createOutput(fileName, context)) {
|
||||||
|
CodecUtil.writeHeader(output, Lucene46FieldInfosFormat.CODEC_NAME, Lucene46FieldInfosFormat.FORMAT_CURRENT);
|
||||||
|
output.writeVInt(infos.size());
|
||||||
|
for (FieldInfo fi : infos) {
|
||||||
|
IndexOptions indexOptions = fi.getIndexOptions();
|
||||||
|
byte bits = 0x0;
|
||||||
|
if (fi.hasVectors()) bits |= Lucene46FieldInfosFormat.STORE_TERMVECTOR;
|
||||||
|
if (fi.omitsNorms()) bits |= Lucene46FieldInfosFormat.OMIT_NORMS;
|
||||||
|
if (fi.hasPayloads()) bits |= Lucene46FieldInfosFormat.STORE_PAYLOADS;
|
||||||
|
if (fi.isIndexed()) {
|
||||||
|
bits |= Lucene46FieldInfosFormat.IS_INDEXED;
|
||||||
|
assert indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0 || !fi.hasPayloads();
|
||||||
|
if (indexOptions == IndexOptions.DOCS_ONLY) {
|
||||||
|
bits |= Lucene46FieldInfosFormat.OMIT_TERM_FREQ_AND_POSITIONS;
|
||||||
|
} else if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) {
|
||||||
|
bits |= Lucene46FieldInfosFormat.STORE_OFFSETS_IN_POSTINGS;
|
||||||
|
} else if (indexOptions == IndexOptions.DOCS_AND_FREQS) {
|
||||||
|
bits |= Lucene46FieldInfosFormat.OMIT_POSITIONS;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
output.writeString(fi.name);
|
||||||
|
output.writeVInt(fi.number);
|
||||||
|
output.writeByte(bits);
|
||||||
|
|
||||||
|
// pack the DV types in one byte
|
||||||
|
final byte dv = docValuesByte(fi.getDocValuesType());
|
||||||
|
final byte nrm = docValuesByte(fi.getNormType());
|
||||||
|
assert (dv & (~0xF)) == 0 && (nrm & (~0x0F)) == 0;
|
||||||
|
byte val = (byte) (0xff & ((nrm << 4) | dv));
|
||||||
|
output.writeByte(val);
|
||||||
|
output.writeLong(fi.getDocValuesGen());
|
||||||
|
output.writeStringStringMap(fi.attributes());
|
||||||
|
}
|
||||||
|
CodecUtil.writeFooter(output);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static byte docValuesByte(DocValuesType type) {
|
||||||
|
if (type == null) {
|
||||||
|
return 0;
|
||||||
|
} else if (type == DocValuesType.NUMERIC) {
|
||||||
|
return 1;
|
||||||
|
} else if (type == DocValuesType.BINARY) {
|
||||||
|
return 2;
|
||||||
|
} else if (type == DocValuesType.SORTED) {
|
||||||
|
return 3;
|
||||||
|
} else if (type == DocValuesType.SORTED_SET) {
|
||||||
|
return 4;
|
||||||
|
} else if (type == DocValuesType.SORTED_NUMERIC) {
|
||||||
|
return 5;
|
||||||
|
} else {
|
||||||
|
throw new AssertionError();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Extension of field infos */
|
/** Extension of field infos */
|
||||||
|
|
|
@ -1,121 +0,0 @@
|
||||||
package org.apache.lucene.codecs.lucene46;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
import org.apache.lucene.codecs.CodecUtil;
|
|
||||||
import org.apache.lucene.codecs.FieldInfosReader;
|
|
||||||
import org.apache.lucene.index.CorruptIndexException;
|
|
||||||
import org.apache.lucene.index.FieldInfo;
|
|
||||||
import org.apache.lucene.index.FieldInfos;
|
|
||||||
import org.apache.lucene.index.IndexFileNames;
|
|
||||||
import org.apache.lucene.index.FieldInfo.DocValuesType;
|
|
||||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
|
||||||
import org.apache.lucene.index.SegmentInfo;
|
|
||||||
import org.apache.lucene.store.ChecksumIndexInput;
|
|
||||||
import org.apache.lucene.store.Directory;
|
|
||||||
import org.apache.lucene.store.IOContext;
|
|
||||||
import org.apache.lucene.store.IndexInput;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Lucene 4.6 FieldInfos reader.
|
|
||||||
*
|
|
||||||
* @deprecated only for old 4.x segments
|
|
||||||
*/
|
|
||||||
@Deprecated
|
|
||||||
final class Lucene46FieldInfosReader extends FieldInfosReader {
|
|
||||||
|
|
||||||
/** Sole constructor. */
|
|
||||||
public Lucene46FieldInfosReader() {
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public FieldInfos read(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, IOContext context) throws IOException {
|
|
||||||
final String fileName = IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, Lucene46FieldInfosFormat.EXTENSION);
|
|
||||||
try (ChecksumIndexInput input = directory.openChecksumInput(fileName, context)) {
|
|
||||||
int codecVersion = CodecUtil.checkHeader(input, Lucene46FieldInfosFormat.CODEC_NAME,
|
|
||||||
Lucene46FieldInfosFormat.FORMAT_START,
|
|
||||||
Lucene46FieldInfosFormat.FORMAT_CURRENT);
|
|
||||||
|
|
||||||
final int size = input.readVInt(); //read in the size
|
|
||||||
FieldInfo infos[] = new FieldInfo[size];
|
|
||||||
|
|
||||||
for (int i = 0; i < size; i++) {
|
|
||||||
String name = input.readString();
|
|
||||||
final int fieldNumber = input.readVInt();
|
|
||||||
if (fieldNumber < 0) {
|
|
||||||
throw new CorruptIndexException("invalid field number for field: " + name + ", fieldNumber=" + fieldNumber, input);
|
|
||||||
}
|
|
||||||
byte bits = input.readByte();
|
|
||||||
boolean isIndexed = (bits & Lucene46FieldInfosFormat.IS_INDEXED) != 0;
|
|
||||||
boolean storeTermVector = (bits & Lucene46FieldInfosFormat.STORE_TERMVECTOR) != 0;
|
|
||||||
boolean omitNorms = (bits & Lucene46FieldInfosFormat.OMIT_NORMS) != 0;
|
|
||||||
boolean storePayloads = (bits & Lucene46FieldInfosFormat.STORE_PAYLOADS) != 0;
|
|
||||||
final IndexOptions indexOptions;
|
|
||||||
if (!isIndexed) {
|
|
||||||
indexOptions = null;
|
|
||||||
} else if ((bits & Lucene46FieldInfosFormat.OMIT_TERM_FREQ_AND_POSITIONS) != 0) {
|
|
||||||
indexOptions = IndexOptions.DOCS_ONLY;
|
|
||||||
} else if ((bits & Lucene46FieldInfosFormat.OMIT_POSITIONS) != 0) {
|
|
||||||
indexOptions = IndexOptions.DOCS_AND_FREQS;
|
|
||||||
} else if ((bits & Lucene46FieldInfosFormat.STORE_OFFSETS_IN_POSTINGS) != 0) {
|
|
||||||
indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
|
|
||||||
} else {
|
|
||||||
indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
|
|
||||||
}
|
|
||||||
|
|
||||||
// DV Types are packed in one byte
|
|
||||||
byte val = input.readByte();
|
|
||||||
final DocValuesType docValuesType = getDocValuesType(input, (byte) (val & 0x0F));
|
|
||||||
final DocValuesType normsType = getDocValuesType(input, (byte) ((val >>> 4) & 0x0F));
|
|
||||||
final long dvGen = input.readLong();
|
|
||||||
final Map<String,String> attributes = input.readStringStringMap();
|
|
||||||
infos[i] = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector,
|
|
||||||
omitNorms, storePayloads, indexOptions, docValuesType, normsType, dvGen, Collections.unmodifiableMap(attributes));
|
|
||||||
}
|
|
||||||
|
|
||||||
if (codecVersion >= Lucene46FieldInfosFormat.FORMAT_CHECKSUM) {
|
|
||||||
CodecUtil.checkFooter(input);
|
|
||||||
} else {
|
|
||||||
CodecUtil.checkEOF(input);
|
|
||||||
}
|
|
||||||
return new FieldInfos(infos);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private static DocValuesType getDocValuesType(IndexInput input, byte b) throws IOException {
|
|
||||||
if (b == 0) {
|
|
||||||
return null;
|
|
||||||
} else if (b == 1) {
|
|
||||||
return DocValuesType.NUMERIC;
|
|
||||||
} else if (b == 2) {
|
|
||||||
return DocValuesType.BINARY;
|
|
||||||
} else if (b == 3) {
|
|
||||||
return DocValuesType.SORTED;
|
|
||||||
} else if (b == 4) {
|
|
||||||
return DocValuesType.SORTED_SET;
|
|
||||||
} else if (b == 5) {
|
|
||||||
return DocValuesType.SORTED_NUMERIC;
|
|
||||||
} else {
|
|
||||||
throw new CorruptIndexException("invalid docvalues byte: " + b, input);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,103 +0,0 @@
|
||||||
package org.apache.lucene.codecs.lucene46;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
import org.apache.lucene.codecs.CodecUtil;
|
|
||||||
import org.apache.lucene.codecs.FieldInfosWriter;
|
|
||||||
import org.apache.lucene.index.FieldInfo.DocValuesType;
|
|
||||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
|
||||||
import org.apache.lucene.index.FieldInfo;
|
|
||||||
import org.apache.lucene.index.FieldInfos;
|
|
||||||
import org.apache.lucene.index.IndexFileNames;
|
|
||||||
import org.apache.lucene.index.SegmentInfo;
|
|
||||||
import org.apache.lucene.store.IndexOutput;
|
|
||||||
import org.apache.lucene.store.Directory;
|
|
||||||
import org.apache.lucene.store.IOContext;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Lucene 4.6 FieldInfos writer.
|
|
||||||
*
|
|
||||||
* @deprecated only for old 4.x segments
|
|
||||||
*/
|
|
||||||
@Deprecated
|
|
||||||
final class Lucene46FieldInfosWriter extends FieldInfosWriter {
|
|
||||||
|
|
||||||
/** Sole constructor. */
|
|
||||||
public Lucene46FieldInfosWriter() {
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void write(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, FieldInfos infos, IOContext context) throws IOException {
|
|
||||||
final String fileName = IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, Lucene46FieldInfosFormat.EXTENSION);
|
|
||||||
try (IndexOutput output = directory.createOutput(fileName, context)) {
|
|
||||||
CodecUtil.writeHeader(output, Lucene46FieldInfosFormat.CODEC_NAME, Lucene46FieldInfosFormat.FORMAT_CURRENT);
|
|
||||||
output.writeVInt(infos.size());
|
|
||||||
for (FieldInfo fi : infos) {
|
|
||||||
IndexOptions indexOptions = fi.getIndexOptions();
|
|
||||||
byte bits = 0x0;
|
|
||||||
if (fi.hasVectors()) bits |= Lucene46FieldInfosFormat.STORE_TERMVECTOR;
|
|
||||||
if (fi.omitsNorms()) bits |= Lucene46FieldInfosFormat.OMIT_NORMS;
|
|
||||||
if (fi.hasPayloads()) bits |= Lucene46FieldInfosFormat.STORE_PAYLOADS;
|
|
||||||
if (fi.isIndexed()) {
|
|
||||||
bits |= Lucene46FieldInfosFormat.IS_INDEXED;
|
|
||||||
assert indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0 || !fi.hasPayloads();
|
|
||||||
if (indexOptions == IndexOptions.DOCS_ONLY) {
|
|
||||||
bits |= Lucene46FieldInfosFormat.OMIT_TERM_FREQ_AND_POSITIONS;
|
|
||||||
} else if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) {
|
|
||||||
bits |= Lucene46FieldInfosFormat.STORE_OFFSETS_IN_POSTINGS;
|
|
||||||
} else if (indexOptions == IndexOptions.DOCS_AND_FREQS) {
|
|
||||||
bits |= Lucene46FieldInfosFormat.OMIT_POSITIONS;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
output.writeString(fi.name);
|
|
||||||
output.writeVInt(fi.number);
|
|
||||||
output.writeByte(bits);
|
|
||||||
|
|
||||||
// pack the DV types in one byte
|
|
||||||
final byte dv = docValuesByte(fi.getDocValuesType());
|
|
||||||
final byte nrm = docValuesByte(fi.getNormType());
|
|
||||||
assert (dv & (~0xF)) == 0 && (nrm & (~0x0F)) == 0;
|
|
||||||
byte val = (byte) (0xff & ((nrm << 4) | dv));
|
|
||||||
output.writeByte(val);
|
|
||||||
output.writeLong(fi.getDocValuesGen());
|
|
||||||
output.writeStringStringMap(fi.attributes());
|
|
||||||
}
|
|
||||||
CodecUtil.writeFooter(output);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private static byte docValuesByte(DocValuesType type) {
|
|
||||||
if (type == null) {
|
|
||||||
return 0;
|
|
||||||
} else if (type == DocValuesType.NUMERIC) {
|
|
||||||
return 1;
|
|
||||||
} else if (type == DocValuesType.BINARY) {
|
|
||||||
return 2;
|
|
||||||
} else if (type == DocValuesType.SORTED) {
|
|
||||||
return 3;
|
|
||||||
} else if (type == DocValuesType.SORTED_SET) {
|
|
||||||
return 4;
|
|
||||||
} else if (type == DocValuesType.SORTED_NUMERIC) {
|
|
||||||
return 5;
|
|
||||||
} else {
|
|
||||||
throw new AssertionError();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -17,10 +17,20 @@ package org.apache.lucene.codecs.lucene46;
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.text.ParseException;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
import org.apache.lucene.codecs.CodecUtil;
|
||||||
import org.apache.lucene.codecs.SegmentInfoFormat;
|
import org.apache.lucene.codecs.SegmentInfoFormat;
|
||||||
import org.apache.lucene.codecs.SegmentInfoReader;
|
import org.apache.lucene.index.CorruptIndexException;
|
||||||
import org.apache.lucene.codecs.SegmentInfoWriter;
|
import org.apache.lucene.index.IndexFileNames;
|
||||||
import org.apache.lucene.index.SegmentInfo;
|
import org.apache.lucene.index.SegmentInfo;
|
||||||
|
import org.apache.lucene.store.ChecksumIndexInput;
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.store.IOContext;
|
||||||
|
import org.apache.lucene.util.Version;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Lucene 4.6 Segment info format.
|
* Lucene 4.6 Segment info format.
|
||||||
|
@ -28,19 +38,48 @@ import org.apache.lucene.index.SegmentInfo;
|
||||||
*/
|
*/
|
||||||
@Deprecated
|
@Deprecated
|
||||||
public class Lucene46SegmentInfoFormat extends SegmentInfoFormat {
|
public class Lucene46SegmentInfoFormat extends SegmentInfoFormat {
|
||||||
private final SegmentInfoReader reader = new Lucene46SegmentInfoReader();
|
|
||||||
|
|
||||||
/** Sole constructor. */
|
/** Sole constructor. */
|
||||||
public Lucene46SegmentInfoFormat() {
|
public Lucene46SegmentInfoFormat() {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public final SegmentInfoReader getSegmentInfoReader() {
|
public SegmentInfo read(Directory dir, String segment, IOContext context) throws IOException {
|
||||||
return reader;
|
final String fileName = IndexFileNames.segmentFileName(segment, "", Lucene46SegmentInfoFormat.SI_EXTENSION);
|
||||||
|
try (ChecksumIndexInput input = dir.openChecksumInput(fileName, context)) {
|
||||||
|
int codecVersion = CodecUtil.checkHeader(input, Lucene46SegmentInfoFormat.CODEC_NAME,
|
||||||
|
Lucene46SegmentInfoFormat.VERSION_START,
|
||||||
|
Lucene46SegmentInfoFormat.VERSION_CURRENT);
|
||||||
|
final Version version;
|
||||||
|
try {
|
||||||
|
version = Version.parse(input.readString());
|
||||||
|
} catch (ParseException pe) {
|
||||||
|
throw new CorruptIndexException("unable to parse version string: " + pe.getMessage(), input, pe);
|
||||||
|
}
|
||||||
|
|
||||||
|
final int docCount = input.readInt();
|
||||||
|
if (docCount < 0) {
|
||||||
|
throw new CorruptIndexException("invalid docCount: " + docCount, input);
|
||||||
|
}
|
||||||
|
final boolean isCompoundFile = input.readByte() == SegmentInfo.YES;
|
||||||
|
final Map<String,String> diagnostics = input.readStringStringMap();
|
||||||
|
final Set<String> files = input.readStringSet();
|
||||||
|
|
||||||
|
if (codecVersion >= Lucene46SegmentInfoFormat.VERSION_CHECKSUM) {
|
||||||
|
CodecUtil.checkFooter(input);
|
||||||
|
} else {
|
||||||
|
CodecUtil.checkEOF(input);
|
||||||
|
}
|
||||||
|
|
||||||
|
final SegmentInfo si = new SegmentInfo(dir, version, segment, docCount, isCompoundFile, null, diagnostics, null);
|
||||||
|
si.setFiles(files);
|
||||||
|
|
||||||
|
return si;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public SegmentInfoWriter getSegmentInfoWriter() {
|
public void write(Directory dir, SegmentInfo info, IOContext ioContext) throws IOException {
|
||||||
throw new UnsupportedOperationException("this codec can only be used for reading");
|
throw new UnsupportedOperationException("this codec can only be used for reading");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,80 +0,0 @@
|
||||||
package org.apache.lucene.codecs.lucene46;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.text.ParseException;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.Set;
|
|
||||||
|
|
||||||
import org.apache.lucene.codecs.CodecUtil;
|
|
||||||
import org.apache.lucene.codecs.SegmentInfoReader;
|
|
||||||
import org.apache.lucene.index.CorruptIndexException;
|
|
||||||
import org.apache.lucene.index.IndexFileNames;
|
|
||||||
import org.apache.lucene.index.SegmentInfo;
|
|
||||||
import org.apache.lucene.store.ChecksumIndexInput;
|
|
||||||
import org.apache.lucene.store.Directory;
|
|
||||||
import org.apache.lucene.store.IOContext;
|
|
||||||
import org.apache.lucene.util.Version;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Lucene 4.6 segment infos reader
|
|
||||||
* @deprecated only for old 4.x segments
|
|
||||||
*/
|
|
||||||
@Deprecated
|
|
||||||
final class Lucene46SegmentInfoReader extends SegmentInfoReader {
|
|
||||||
|
|
||||||
/** Sole constructor. */
|
|
||||||
public Lucene46SegmentInfoReader() {
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public SegmentInfo read(Directory dir, String segment, IOContext context) throws IOException {
|
|
||||||
final String fileName = IndexFileNames.segmentFileName(segment, "", Lucene46SegmentInfoFormat.SI_EXTENSION);
|
|
||||||
try (ChecksumIndexInput input = dir.openChecksumInput(fileName, context)) {
|
|
||||||
int codecVersion = CodecUtil.checkHeader(input, Lucene46SegmentInfoFormat.CODEC_NAME,
|
|
||||||
Lucene46SegmentInfoFormat.VERSION_START,
|
|
||||||
Lucene46SegmentInfoFormat.VERSION_CURRENT);
|
|
||||||
final Version version;
|
|
||||||
try {
|
|
||||||
version = Version.parse(input.readString());
|
|
||||||
} catch (ParseException pe) {
|
|
||||||
throw new CorruptIndexException("unable to parse version string: " + pe.getMessage(), input, pe);
|
|
||||||
}
|
|
||||||
|
|
||||||
final int docCount = input.readInt();
|
|
||||||
if (docCount < 0) {
|
|
||||||
throw new CorruptIndexException("invalid docCount: " + docCount, input);
|
|
||||||
}
|
|
||||||
final boolean isCompoundFile = input.readByte() == SegmentInfo.YES;
|
|
||||||
final Map<String,String> diagnostics = input.readStringStringMap();
|
|
||||||
final Set<String> files = input.readStringSet();
|
|
||||||
|
|
||||||
if (codecVersion >= Lucene46SegmentInfoFormat.VERSION_CHECKSUM) {
|
|
||||||
CodecUtil.checkFooter(input);
|
|
||||||
} else {
|
|
||||||
CodecUtil.checkEOF(input);
|
|
||||||
}
|
|
||||||
|
|
||||||
final SegmentInfo si = new SegmentInfo(dir, version, segment, docCount, isCompoundFile, null, diagnostics, null);
|
|
||||||
si.setFiles(files);
|
|
||||||
|
|
||||||
return si;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -25,7 +25,7 @@ import java.util.TreeSet;
|
||||||
import org.apache.lucene.codecs.CodecUtil;
|
import org.apache.lucene.codecs.CodecUtil;
|
||||||
import org.apache.lucene.codecs.DocValuesConsumer;
|
import org.apache.lucene.codecs.DocValuesConsumer;
|
||||||
import org.apache.lucene.codecs.MissingOrdRemapper;
|
import org.apache.lucene.codecs.MissingOrdRemapper;
|
||||||
import org.apache.lucene.codecs.lucene40.Lucene40FieldInfosReader.LegacyDocValuesType;
|
import org.apache.lucene.codecs.lucene40.Lucene40FieldInfosFormat.LegacyDocValuesType;
|
||||||
import org.apache.lucene.index.FieldInfo;
|
import org.apache.lucene.index.FieldInfo;
|
||||||
import org.apache.lucene.index.IndexFileNames;
|
import org.apache.lucene.index.IndexFileNames;
|
||||||
import org.apache.lucene.index.SegmentWriteState;
|
import org.apache.lucene.index.SegmentWriteState;
|
||||||
|
|
|
@ -4,7 +4,6 @@ import java.io.IOException;
|
||||||
|
|
||||||
import org.apache.lucene.codecs.DocValuesFormat;
|
import org.apache.lucene.codecs.DocValuesFormat;
|
||||||
import org.apache.lucene.codecs.FieldInfosFormat;
|
import org.apache.lucene.codecs.FieldInfosFormat;
|
||||||
import org.apache.lucene.codecs.FieldInfosWriter;
|
|
||||||
import org.apache.lucene.codecs.NormsFormat;
|
import org.apache.lucene.codecs.NormsFormat;
|
||||||
import org.apache.lucene.codecs.PostingsFormat;
|
import org.apache.lucene.codecs.PostingsFormat;
|
||||||
import org.apache.lucene.codecs.SegmentInfoFormat;
|
import org.apache.lucene.codecs.SegmentInfoFormat;
|
||||||
|
@ -35,13 +34,7 @@ import org.apache.lucene.codecs.TermVectorsFormat;
|
||||||
@Deprecated
|
@Deprecated
|
||||||
public final class Lucene40RWCodec extends Lucene40Codec {
|
public final class Lucene40RWCodec extends Lucene40Codec {
|
||||||
|
|
||||||
private final FieldInfosFormat fieldInfos = new Lucene40FieldInfosFormat() {
|
private final FieldInfosFormat fieldInfos = new Lucene40RWFieldInfosFormat();
|
||||||
@Override
|
|
||||||
public FieldInfosWriter getFieldInfosWriter() throws IOException {
|
|
||||||
return new Lucene40FieldInfosWriter();
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
private final DocValuesFormat docValues = new Lucene40RWDocValuesFormat();
|
private final DocValuesFormat docValues = new Lucene40RWDocValuesFormat();
|
||||||
private final NormsFormat norms = new Lucene40RWNormsFormat();
|
private final NormsFormat norms = new Lucene40RWNormsFormat();
|
||||||
private final StoredFieldsFormat stored = new Lucene40RWStoredFieldsFormat();
|
private final StoredFieldsFormat stored = new Lucene40RWStoredFieldsFormat();
|
||||||
|
|
|
@ -35,6 +35,6 @@ public final class Lucene40RWDocValuesFormat extends Lucene40DocValuesFormat {
|
||||||
String filename = IndexFileNames.segmentFileName(state.segmentInfo.name,
|
String filename = IndexFileNames.segmentFileName(state.segmentInfo.name,
|
||||||
"dv",
|
"dv",
|
||||||
Lucene40CompoundFormat.COMPOUND_FILE_EXTENSION);
|
Lucene40CompoundFormat.COMPOUND_FILE_EXTENSION);
|
||||||
return new Lucene40DocValuesWriter(state, filename, Lucene40FieldInfosReader.LEGACY_DV_TYPE_KEY);
|
return new Lucene40DocValuesWriter(state, filename, Lucene40FieldInfosFormat.LEGACY_DV_TYPE_KEY);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,8 +19,6 @@ package org.apache.lucene.codecs.lucene40;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
import org.apache.lucene.codecs.CodecUtil;
|
import org.apache.lucene.codecs.CodecUtil;
|
||||||
import org.apache.lucene.codecs.FieldInfosWriter;
|
|
||||||
import org.apache.lucene.codecs.lucene40.Lucene40FieldInfosReader.LegacyDocValuesType;
|
|
||||||
import org.apache.lucene.index.FieldInfo.DocValuesType;
|
import org.apache.lucene.index.FieldInfo.DocValuesType;
|
||||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||||
import org.apache.lucene.index.FieldInfo;
|
import org.apache.lucene.index.FieldInfo;
|
||||||
|
@ -37,10 +35,10 @@ import org.apache.lucene.util.IOUtils;
|
||||||
* @deprecated for test purposes only
|
* @deprecated for test purposes only
|
||||||
*/
|
*/
|
||||||
@Deprecated
|
@Deprecated
|
||||||
public final class Lucene40FieldInfosWriter extends FieldInfosWriter {
|
public final class Lucene40RWFieldInfosFormat extends Lucene40FieldInfosFormat {
|
||||||
|
|
||||||
/** Sole constructor. */
|
/** Sole constructor. */
|
||||||
public Lucene40FieldInfosWriter() {
|
public Lucene40RWFieldInfosFormat() {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -76,8 +74,8 @@ public final class Lucene40FieldInfosWriter extends FieldInfosWriter {
|
||||||
output.writeByte(bits);
|
output.writeByte(bits);
|
||||||
|
|
||||||
// pack the DV types in one byte
|
// pack the DV types in one byte
|
||||||
final byte dv = docValuesByte(fi.getDocValuesType(), fi.getAttribute(Lucene40FieldInfosReader.LEGACY_DV_TYPE_KEY));
|
final byte dv = docValuesByte(fi.getDocValuesType(), fi.getAttribute(LEGACY_DV_TYPE_KEY));
|
||||||
final byte nrm = docValuesByte(fi.getNormType(), fi.getAttribute(Lucene40FieldInfosReader.LEGACY_NORM_TYPE_KEY));
|
final byte nrm = docValuesByte(fi.getNormType(), fi.getAttribute(LEGACY_NORM_TYPE_KEY));
|
||||||
assert (dv & (~0xF)) == 0 && (nrm & (~0x0F)) == 0;
|
assert (dv & (~0xF)) == 0 && (nrm & (~0x0F)) == 0;
|
||||||
byte val = (byte) (0xff & ((nrm << 4) | dv));
|
byte val = (byte) (0xff & ((nrm << 4) | dv));
|
||||||
output.writeByte(val);
|
output.writeByte(val);
|
|
@ -36,7 +36,7 @@ public final class Lucene40RWNormsFormat extends Lucene40NormsFormat {
|
||||||
String filename = IndexFileNames.segmentFileName(state.segmentInfo.name,
|
String filename = IndexFileNames.segmentFileName(state.segmentInfo.name,
|
||||||
"nrm",
|
"nrm",
|
||||||
Lucene40CompoundFormat.COMPOUND_FILE_EXTENSION);
|
Lucene40CompoundFormat.COMPOUND_FILE_EXTENSION);
|
||||||
final Lucene40DocValuesWriter impl = new Lucene40DocValuesWriter(state, filename, Lucene40FieldInfosReader.LEGACY_NORM_TYPE_KEY);
|
final Lucene40DocValuesWriter impl = new Lucene40DocValuesWriter(state, filename, Lucene40FieldInfosFormat.LEGACY_NORM_TYPE_KEY);
|
||||||
return new NormsConsumer() {
|
return new NormsConsumer() {
|
||||||
@Override
|
@Override
|
||||||
public void addNormsField(FieldInfo field, Iterable<Number> values) throws IOException {
|
public void addNormsField(FieldInfo field, Iterable<Number> values) throws IOException {
|
||||||
|
|
|
@ -17,7 +17,16 @@ package org.apache.lucene.codecs.lucene40;
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import org.apache.lucene.codecs.SegmentInfoWriter;
|
import java.io.IOException;
|
||||||
|
import java.util.Collections;
|
||||||
|
|
||||||
|
import org.apache.lucene.codecs.CodecUtil;
|
||||||
|
import org.apache.lucene.index.IndexFileNames;
|
||||||
|
import org.apache.lucene.index.SegmentInfo;
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.store.IOContext;
|
||||||
|
import org.apache.lucene.store.IndexOutput;
|
||||||
|
import org.apache.lucene.util.IOUtils;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Read-write version of 4.0 segmentinfo format for testing
|
* Read-write version of 4.0 segmentinfo format for testing
|
||||||
|
@ -27,7 +36,33 @@ import org.apache.lucene.codecs.SegmentInfoWriter;
|
||||||
public final class Lucene40RWSegmentInfoFormat extends Lucene40SegmentInfoFormat {
|
public final class Lucene40RWSegmentInfoFormat extends Lucene40SegmentInfoFormat {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public SegmentInfoWriter getSegmentInfoWriter() {
|
public void write(Directory dir, SegmentInfo si, IOContext ioContext) throws IOException {
|
||||||
return new Lucene40SegmentInfoWriter();
|
final String fileName = IndexFileNames.segmentFileName(si.name, "", Lucene40SegmentInfoFormat.SI_EXTENSION);
|
||||||
|
si.addFile(fileName);
|
||||||
|
|
||||||
|
final IndexOutput output = dir.createOutput(fileName, ioContext);
|
||||||
|
|
||||||
|
boolean success = false;
|
||||||
|
try {
|
||||||
|
CodecUtil.writeHeader(output, Lucene40SegmentInfoFormat.CODEC_NAME, Lucene40SegmentInfoFormat.VERSION_CURRENT);
|
||||||
|
// Write the Lucene version that created this segment, since 3.1
|
||||||
|
output.writeString(si.getVersion().toString());
|
||||||
|
output.writeInt(si.getDocCount());
|
||||||
|
|
||||||
|
output.writeByte((byte) (si.getUseCompoundFile() ? SegmentInfo.YES : SegmentInfo.NO));
|
||||||
|
output.writeStringStringMap(si.getDiagnostics());
|
||||||
|
output.writeStringStringMap(Collections.<String,String>emptyMap());
|
||||||
|
output.writeStringSet(si.files());
|
||||||
|
|
||||||
|
success = true;
|
||||||
|
} finally {
|
||||||
|
if (!success) {
|
||||||
|
IOUtils.closeWhileHandlingException(output);
|
||||||
|
// TODO: why must we do this? do we not get tracking dir wrapper?
|
||||||
|
IOUtils.deleteFilesIgnoringExceptions(si.dir, fileName);
|
||||||
|
} else {
|
||||||
|
output.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,75 +0,0 @@
|
||||||
package org.apache.lucene.codecs.lucene40;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.Collections;
|
|
||||||
|
|
||||||
import org.apache.lucene.codecs.CodecUtil;
|
|
||||||
import org.apache.lucene.codecs.SegmentInfoWriter;
|
|
||||||
import org.apache.lucene.index.FieldInfos;
|
|
||||||
import org.apache.lucene.index.IndexFileNames;
|
|
||||||
import org.apache.lucene.index.SegmentInfo;
|
|
||||||
import org.apache.lucene.store.Directory;
|
|
||||||
import org.apache.lucene.store.IOContext;
|
|
||||||
import org.apache.lucene.store.IndexOutput;
|
|
||||||
import org.apache.lucene.util.IOUtils;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* writer for 4.0 segmentinfos for testing
|
|
||||||
* @deprecated for test purposes only
|
|
||||||
*/
|
|
||||||
@Deprecated
|
|
||||||
public final class Lucene40SegmentInfoWriter extends SegmentInfoWriter {
|
|
||||||
|
|
||||||
/** Sole constructor. */
|
|
||||||
public Lucene40SegmentInfoWriter() {
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Save a single segment's info. */
|
|
||||||
@Override
|
|
||||||
public void write(Directory dir, SegmentInfo si, FieldInfos fis, IOContext ioContext) throws IOException {
|
|
||||||
final String fileName = IndexFileNames.segmentFileName(si.name, "", Lucene40SegmentInfoFormat.SI_EXTENSION);
|
|
||||||
si.addFile(fileName);
|
|
||||||
|
|
||||||
final IndexOutput output = dir.createOutput(fileName, ioContext);
|
|
||||||
|
|
||||||
boolean success = false;
|
|
||||||
try {
|
|
||||||
CodecUtil.writeHeader(output, Lucene40SegmentInfoFormat.CODEC_NAME, Lucene40SegmentInfoFormat.VERSION_CURRENT);
|
|
||||||
// Write the Lucene version that created this segment, since 3.1
|
|
||||||
output.writeString(si.getVersion().toString());
|
|
||||||
output.writeInt(si.getDocCount());
|
|
||||||
|
|
||||||
output.writeByte((byte) (si.getUseCompoundFile() ? SegmentInfo.YES : SegmentInfo.NO));
|
|
||||||
output.writeStringStringMap(si.getDiagnostics());
|
|
||||||
output.writeStringStringMap(Collections.<String,String>emptyMap());
|
|
||||||
output.writeStringSet(si.files());
|
|
||||||
|
|
||||||
success = true;
|
|
||||||
} finally {
|
|
||||||
if (!success) {
|
|
||||||
IOUtils.closeWhileHandlingException(output);
|
|
||||||
// TODO: why must we do this? do we not get tracking dir wrapper?
|
|
||||||
IOUtils.deleteFilesIgnoringExceptions(si.dir, fileName);
|
|
||||||
} else {
|
|
||||||
output.close();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -0,0 +1,71 @@
|
||||||
|
package org.apache.lucene.codecs.lucene40;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.codecs.Codec;
|
||||||
|
import org.apache.lucene.codecs.lucene40.Lucene40FieldInfosFormat.LegacyDocValuesType;
|
||||||
|
import org.apache.lucene.index.BaseFieldInfoFormatTestCase;
|
||||||
|
import org.apache.lucene.index.FieldInfo;
|
||||||
|
import org.apache.lucene.index.FieldInfo.DocValuesType;
|
||||||
|
|
||||||
|
/** Test Lucene 4.0 FieldInfos Format */
|
||||||
|
public class TestLucene40FieldInfoFormat extends BaseFieldInfoFormatTestCase {
|
||||||
|
private final Codec codec = new Lucene40RWCodec();
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Codec getCodec() {
|
||||||
|
return codec;
|
||||||
|
}
|
||||||
|
|
||||||
|
// we only support these three dv types
|
||||||
|
@Override
|
||||||
|
@Deprecated
|
||||||
|
protected DocValuesType[] getDocValuesTypes() {
|
||||||
|
return new DocValuesType[] {
|
||||||
|
DocValuesType.BINARY,
|
||||||
|
DocValuesType.NUMERIC,
|
||||||
|
DocValuesType.SORTED
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// but we have more internal typing information, previously recorded in fieldinfos.
|
||||||
|
// this is exposed via attributes (so our writer expects them to be set by the dv impl)
|
||||||
|
@Override
|
||||||
|
protected void addAttributes(FieldInfo fi) {
|
||||||
|
DocValuesType dvType = fi.getDocValuesType();
|
||||||
|
if (dvType != null) {
|
||||||
|
switch (dvType) {
|
||||||
|
case BINARY:
|
||||||
|
fi.putAttribute(Lucene40FieldInfosFormat.LEGACY_DV_TYPE_KEY, LegacyDocValuesType.BYTES_FIXED_STRAIGHT.name());
|
||||||
|
break;
|
||||||
|
case NUMERIC:
|
||||||
|
fi.putAttribute(Lucene40FieldInfosFormat.LEGACY_DV_TYPE_KEY, LegacyDocValuesType.FIXED_INTS_32.name());
|
||||||
|
break;
|
||||||
|
case SORTED:
|
||||||
|
fi.putAttribute(Lucene40FieldInfosFormat.LEGACY_DV_TYPE_KEY, LegacyDocValuesType.BYTES_FIXED_SORTED.name());
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
throw new AssertionError();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (fi.getNormType() != null) {
|
||||||
|
fi.putAttribute(Lucene40FieldInfosFormat.LEGACY_NORM_TYPE_KEY, LegacyDocValuesType.FIXED_INTS_8.name());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,58 @@
|
||||||
|
package org.apache.lucene.codecs.lucene40;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.codecs.Codec;
|
||||||
|
import org.apache.lucene.index.BaseSegmentInfoFormatTestCase;
|
||||||
|
import org.apache.lucene.util.Version;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tests Lucene40InfoFormat
|
||||||
|
*/
|
||||||
|
public class TestLucene40SegmentInfoFormat extends BaseSegmentInfoFormatTestCase {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Version[] getVersions() {
|
||||||
|
// NOTE: some of these bugfix releases we never actually "wrote",
|
||||||
|
// but staying on the safe side...
|
||||||
|
return new Version[] {
|
||||||
|
Version.LUCENE_4_0_0_ALPHA,
|
||||||
|
Version.LUCENE_4_0_0_BETA,
|
||||||
|
Version.LUCENE_4_0_0,
|
||||||
|
Version.LUCENE_4_1_0,
|
||||||
|
Version.LUCENE_4_2_0,
|
||||||
|
Version.LUCENE_4_2_1,
|
||||||
|
Version.LUCENE_4_3_0,
|
||||||
|
Version.LUCENE_4_3_1,
|
||||||
|
Version.LUCENE_4_4_0,
|
||||||
|
Version.LUCENE_4_5_0,
|
||||||
|
Version.LUCENE_4_5_1,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
@Deprecated
|
||||||
|
protected void assertIDEquals(byte[] expected, byte[] actual) {
|
||||||
|
assertNull(actual); // we don't support IDs
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Codec getCodec() {
|
||||||
|
return new Lucene40RWCodec();
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,17 +1,13 @@
|
||||||
package org.apache.lucene.codecs.lucene41;
|
package org.apache.lucene.codecs.lucene41;
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
import org.apache.lucene.codecs.DocValuesFormat;
|
import org.apache.lucene.codecs.DocValuesFormat;
|
||||||
import org.apache.lucene.codecs.FieldInfosFormat;
|
import org.apache.lucene.codecs.FieldInfosFormat;
|
||||||
import org.apache.lucene.codecs.FieldInfosWriter;
|
|
||||||
import org.apache.lucene.codecs.NormsFormat;
|
import org.apache.lucene.codecs.NormsFormat;
|
||||||
import org.apache.lucene.codecs.SegmentInfoFormat;
|
import org.apache.lucene.codecs.SegmentInfoFormat;
|
||||||
import org.apache.lucene.codecs.StoredFieldsFormat;
|
import org.apache.lucene.codecs.StoredFieldsFormat;
|
||||||
import org.apache.lucene.codecs.TermVectorsFormat;
|
import org.apache.lucene.codecs.TermVectorsFormat;
|
||||||
import org.apache.lucene.codecs.lucene40.Lucene40FieldInfosFormat;
|
|
||||||
import org.apache.lucene.codecs.lucene40.Lucene40FieldInfosWriter;
|
|
||||||
import org.apache.lucene.codecs.lucene40.Lucene40RWDocValuesFormat;
|
import org.apache.lucene.codecs.lucene40.Lucene40RWDocValuesFormat;
|
||||||
|
import org.apache.lucene.codecs.lucene40.Lucene40RWFieldInfosFormat;
|
||||||
import org.apache.lucene.codecs.lucene40.Lucene40RWNormsFormat;
|
import org.apache.lucene.codecs.lucene40.Lucene40RWNormsFormat;
|
||||||
import org.apache.lucene.codecs.lucene40.Lucene40RWSegmentInfoFormat;
|
import org.apache.lucene.codecs.lucene40.Lucene40RWSegmentInfoFormat;
|
||||||
import org.apache.lucene.codecs.lucene40.Lucene40RWTermVectorsFormat;
|
import org.apache.lucene.codecs.lucene40.Lucene40RWTermVectorsFormat;
|
||||||
|
@ -40,13 +36,7 @@ import org.apache.lucene.codecs.lucene40.Lucene40RWTermVectorsFormat;
|
||||||
@Deprecated
|
@Deprecated
|
||||||
public final class Lucene41RWCodec extends Lucene41Codec {
|
public final class Lucene41RWCodec extends Lucene41Codec {
|
||||||
private final StoredFieldsFormat fieldsFormat = new Lucene41RWStoredFieldsFormat();
|
private final StoredFieldsFormat fieldsFormat = new Lucene41RWStoredFieldsFormat();
|
||||||
private final FieldInfosFormat fieldInfos = new Lucene40FieldInfosFormat() {
|
private final FieldInfosFormat fieldInfos = new Lucene40RWFieldInfosFormat();
|
||||||
@Override
|
|
||||||
public FieldInfosWriter getFieldInfosWriter() throws IOException {
|
|
||||||
return new Lucene40FieldInfosWriter();
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
private final DocValuesFormat docValues = new Lucene40RWDocValuesFormat();
|
private final DocValuesFormat docValues = new Lucene40RWDocValuesFormat();
|
||||||
private final NormsFormat norms = new Lucene40RWNormsFormat();
|
private final NormsFormat norms = new Lucene40RWNormsFormat();
|
||||||
private final TermVectorsFormat vectors = new Lucene40RWTermVectorsFormat();
|
private final TermVectorsFormat vectors = new Lucene40RWTermVectorsFormat();
|
||||||
|
|
|
@ -17,11 +17,8 @@ package org.apache.lucene.codecs.lucene42;
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
import org.apache.lucene.codecs.DocValuesFormat;
|
import org.apache.lucene.codecs.DocValuesFormat;
|
||||||
import org.apache.lucene.codecs.FieldInfosFormat;
|
import org.apache.lucene.codecs.FieldInfosFormat;
|
||||||
import org.apache.lucene.codecs.FieldInfosWriter;
|
|
||||||
import org.apache.lucene.codecs.NormsFormat;
|
import org.apache.lucene.codecs.NormsFormat;
|
||||||
import org.apache.lucene.codecs.SegmentInfoFormat;
|
import org.apache.lucene.codecs.SegmentInfoFormat;
|
||||||
import org.apache.lucene.codecs.StoredFieldsFormat;
|
import org.apache.lucene.codecs.StoredFieldsFormat;
|
||||||
|
@ -39,13 +36,7 @@ public final class Lucene42RWCodec extends Lucene42Codec {
|
||||||
private static final DocValuesFormat dv = new Lucene42RWDocValuesFormat();
|
private static final DocValuesFormat dv = new Lucene42RWDocValuesFormat();
|
||||||
private static final NormsFormat norms = new Lucene42RWNormsFormat();
|
private static final NormsFormat norms = new Lucene42RWNormsFormat();
|
||||||
private static final StoredFieldsFormat storedFields = new Lucene41RWStoredFieldsFormat();
|
private static final StoredFieldsFormat storedFields = new Lucene41RWStoredFieldsFormat();
|
||||||
|
private static final FieldInfosFormat fieldInfosFormat = new Lucene42RWFieldInfosFormat();
|
||||||
private final FieldInfosFormat fieldInfosFormat = new Lucene42FieldInfosFormat() {
|
|
||||||
@Override
|
|
||||||
public FieldInfosWriter getFieldInfosWriter() throws IOException {
|
|
||||||
return new Lucene42FieldInfosWriter();
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public DocValuesFormat getDocValuesFormatForField(String field) {
|
public DocValuesFormat getDocValuesFormatForField(String field) {
|
||||||
|
|
|
@ -20,7 +20,6 @@ package org.apache.lucene.codecs.lucene42;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
import org.apache.lucene.codecs.CodecUtil;
|
import org.apache.lucene.codecs.CodecUtil;
|
||||||
import org.apache.lucene.codecs.FieldInfosWriter;
|
|
||||||
import org.apache.lucene.index.FieldInfo.DocValuesType;
|
import org.apache.lucene.index.FieldInfo.DocValuesType;
|
||||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||||
import org.apache.lucene.index.FieldInfo;
|
import org.apache.lucene.index.FieldInfo;
|
||||||
|
@ -37,10 +36,10 @@ import org.apache.lucene.util.IOUtils;
|
||||||
* @deprecated for test purposes only
|
* @deprecated for test purposes only
|
||||||
*/
|
*/
|
||||||
@Deprecated
|
@Deprecated
|
||||||
public final class Lucene42FieldInfosWriter extends FieldInfosWriter {
|
public final class Lucene42RWFieldInfosFormat extends Lucene42FieldInfosFormat {
|
||||||
|
|
||||||
/** Sole constructor. */
|
/** Sole constructor. */
|
||||||
public Lucene42FieldInfosWriter() {
|
public Lucene42RWFieldInfosFormat() {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
|
@ -0,0 +1,44 @@
|
||||||
|
package org.apache.lucene.codecs.lucene42;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.codecs.Codec;
|
||||||
|
import org.apache.lucene.index.BaseFieldInfoFormatTestCase;
|
||||||
|
import org.apache.lucene.index.FieldInfo.DocValuesType;
|
||||||
|
|
||||||
|
/** Test Lucene 4.2 FieldInfos Format */
|
||||||
|
public class TestLucene42FieldInfoFormat extends BaseFieldInfoFormatTestCase {
|
||||||
|
private final Codec codec = new Lucene42RWCodec();
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Codec getCodec() {
|
||||||
|
return codec;
|
||||||
|
}
|
||||||
|
|
||||||
|
// we only support these four dv types
|
||||||
|
@Override
|
||||||
|
@Deprecated
|
||||||
|
protected DocValuesType[] getDocValuesTypes() {
|
||||||
|
return new DocValuesType[] {
|
||||||
|
DocValuesType.BINARY,
|
||||||
|
DocValuesType.NUMERIC,
|
||||||
|
DocValuesType.SORTED,
|
||||||
|
DocValuesType.SORTED_SET
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
|
@ -17,19 +17,15 @@ package org.apache.lucene.codecs.lucene45;
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
import org.apache.lucene.codecs.DocValuesFormat;
|
import org.apache.lucene.codecs.DocValuesFormat;
|
||||||
import org.apache.lucene.codecs.FieldInfosFormat;
|
import org.apache.lucene.codecs.FieldInfosFormat;
|
||||||
import org.apache.lucene.codecs.FieldInfosWriter;
|
|
||||||
import org.apache.lucene.codecs.NormsFormat;
|
import org.apache.lucene.codecs.NormsFormat;
|
||||||
import org.apache.lucene.codecs.SegmentInfoFormat;
|
import org.apache.lucene.codecs.SegmentInfoFormat;
|
||||||
import org.apache.lucene.codecs.StoredFieldsFormat;
|
import org.apache.lucene.codecs.StoredFieldsFormat;
|
||||||
import org.apache.lucene.codecs.TermVectorsFormat;
|
import org.apache.lucene.codecs.TermVectorsFormat;
|
||||||
import org.apache.lucene.codecs.lucene40.Lucene40RWSegmentInfoFormat;
|
import org.apache.lucene.codecs.lucene40.Lucene40RWSegmentInfoFormat;
|
||||||
import org.apache.lucene.codecs.lucene41.Lucene41RWStoredFieldsFormat;
|
import org.apache.lucene.codecs.lucene41.Lucene41RWStoredFieldsFormat;
|
||||||
import org.apache.lucene.codecs.lucene42.Lucene42FieldInfosFormat;
|
import org.apache.lucene.codecs.lucene42.Lucene42RWFieldInfosFormat;
|
||||||
import org.apache.lucene.codecs.lucene42.Lucene42FieldInfosWriter;
|
|
||||||
import org.apache.lucene.codecs.lucene42.Lucene42RWNormsFormat;
|
import org.apache.lucene.codecs.lucene42.Lucene42RWNormsFormat;
|
||||||
import org.apache.lucene.codecs.lucene42.Lucene42RWTermVectorsFormat;
|
import org.apache.lucene.codecs.lucene42.Lucene42RWTermVectorsFormat;
|
||||||
|
|
||||||
|
@ -39,12 +35,7 @@ import org.apache.lucene.codecs.lucene42.Lucene42RWTermVectorsFormat;
|
||||||
@SuppressWarnings("deprecation")
|
@SuppressWarnings("deprecation")
|
||||||
public final class Lucene45RWCodec extends Lucene45Codec {
|
public final class Lucene45RWCodec extends Lucene45Codec {
|
||||||
|
|
||||||
private final FieldInfosFormat fieldInfosFormat = new Lucene42FieldInfosFormat() {
|
private static final FieldInfosFormat fieldInfosFormat = new Lucene42RWFieldInfosFormat();
|
||||||
@Override
|
|
||||||
public FieldInfosWriter getFieldInfosWriter() throws IOException {
|
|
||||||
return new Lucene42FieldInfosWriter();
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public FieldInfosFormat fieldInfosFormat() {
|
public FieldInfosFormat fieldInfosFormat() {
|
||||||
|
|
|
@ -17,7 +17,16 @@ package org.apache.lucene.codecs.lucene46;
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import org.apache.lucene.codecs.SegmentInfoWriter;
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.lucene.codecs.CodecUtil;
|
||||||
|
import org.apache.lucene.index.IndexFileNames;
|
||||||
|
import org.apache.lucene.index.SegmentInfo;
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.store.IOContext;
|
||||||
|
import org.apache.lucene.store.IndexOutput;
|
||||||
|
import org.apache.lucene.util.IOUtils;
|
||||||
|
import org.apache.lucene.util.Version;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Read-Write version of 4.6 segmentinfo format for testing
|
* Read-Write version of 4.6 segmentinfo format for testing
|
||||||
|
@ -26,7 +35,36 @@ import org.apache.lucene.codecs.SegmentInfoWriter;
|
||||||
@Deprecated
|
@Deprecated
|
||||||
public final class Lucene46RWSegmentInfoFormat extends Lucene46SegmentInfoFormat {
|
public final class Lucene46RWSegmentInfoFormat extends Lucene46SegmentInfoFormat {
|
||||||
@Override
|
@Override
|
||||||
public SegmentInfoWriter getSegmentInfoWriter() {
|
public void write(Directory dir, SegmentInfo si, IOContext ioContext) throws IOException {
|
||||||
return new Lucene46SegmentInfoWriter();
|
final String fileName = IndexFileNames.segmentFileName(si.name, "", Lucene46SegmentInfoFormat.SI_EXTENSION);
|
||||||
|
si.addFile(fileName);
|
||||||
|
|
||||||
|
final IndexOutput output = dir.createOutput(fileName, ioContext);
|
||||||
|
|
||||||
|
boolean success = false;
|
||||||
|
try {
|
||||||
|
CodecUtil.writeHeader(output, Lucene46SegmentInfoFormat.CODEC_NAME, Lucene46SegmentInfoFormat.VERSION_CURRENT);
|
||||||
|
Version version = si.getVersion();
|
||||||
|
if (version.major < 4) {
|
||||||
|
throw new IllegalArgumentException("invalid major version: should be >= 4 but got: " + version.major + " segment=" + si);
|
||||||
|
}
|
||||||
|
// Write the Lucene version that created this segment, since 3.1
|
||||||
|
output.writeString(version.toString());
|
||||||
|
output.writeInt(si.getDocCount());
|
||||||
|
|
||||||
|
output.writeByte((byte) (si.getUseCompoundFile() ? SegmentInfo.YES : SegmentInfo.NO));
|
||||||
|
output.writeStringStringMap(si.getDiagnostics());
|
||||||
|
output.writeStringSet(si.files());
|
||||||
|
CodecUtil.writeFooter(output);
|
||||||
|
success = true;
|
||||||
|
} finally {
|
||||||
|
if (!success) {
|
||||||
|
IOUtils.closeWhileHandlingException(output);
|
||||||
|
// TODO: are we doing this outside of the tracking wrapper? why must SIWriter cleanup like this?
|
||||||
|
IOUtils.deleteFilesIgnoringExceptions(si.dir, fileName);
|
||||||
|
} else {
|
||||||
|
output.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,78 +0,0 @@
|
||||||
package org.apache.lucene.codecs.lucene46;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
import org.apache.lucene.codecs.CodecUtil;
|
|
||||||
import org.apache.lucene.codecs.SegmentInfoWriter;
|
|
||||||
import org.apache.lucene.index.FieldInfos;
|
|
||||||
import org.apache.lucene.index.IndexFileNames;
|
|
||||||
import org.apache.lucene.index.SegmentInfo;
|
|
||||||
import org.apache.lucene.store.Directory;
|
|
||||||
import org.apache.lucene.store.IOContext;
|
|
||||||
import org.apache.lucene.store.IndexOutput;
|
|
||||||
import org.apache.lucene.util.IOUtils;
|
|
||||||
import org.apache.lucene.util.Version;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Writer for 4.0 segmentinfo format for testing
|
|
||||||
* @deprecated for test purposes only
|
|
||||||
*/
|
|
||||||
@Deprecated
|
|
||||||
final class Lucene46SegmentInfoWriter extends SegmentInfoWriter {
|
|
||||||
|
|
||||||
/** Sole constructor. */
|
|
||||||
public Lucene46SegmentInfoWriter() {
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Save a single segment's info. */
|
|
||||||
@Override
|
|
||||||
public void write(Directory dir, SegmentInfo si, FieldInfos fis, IOContext ioContext) throws IOException {
|
|
||||||
final String fileName = IndexFileNames.segmentFileName(si.name, "", Lucene46SegmentInfoFormat.SI_EXTENSION);
|
|
||||||
si.addFile(fileName);
|
|
||||||
|
|
||||||
final IndexOutput output = dir.createOutput(fileName, ioContext);
|
|
||||||
|
|
||||||
boolean success = false;
|
|
||||||
try {
|
|
||||||
CodecUtil.writeHeader(output, Lucene46SegmentInfoFormat.CODEC_NAME, Lucene46SegmentInfoFormat.VERSION_CURRENT);
|
|
||||||
Version version = si.getVersion();
|
|
||||||
if (version.major < 4) {
|
|
||||||
throw new IllegalArgumentException("invalid major version: should be >= 4 but got: " + version.major + " segment=" + si);
|
|
||||||
}
|
|
||||||
// Write the Lucene version that created this segment, since 3.1
|
|
||||||
output.writeString(version.toString());
|
|
||||||
output.writeInt(si.getDocCount());
|
|
||||||
|
|
||||||
output.writeByte((byte) (si.getUseCompoundFile() ? SegmentInfo.YES : SegmentInfo.NO));
|
|
||||||
output.writeStringStringMap(si.getDiagnostics());
|
|
||||||
output.writeStringSet(si.files());
|
|
||||||
CodecUtil.writeFooter(output);
|
|
||||||
success = true;
|
|
||||||
} finally {
|
|
||||||
if (!success) {
|
|
||||||
IOUtils.closeWhileHandlingException(output);
|
|
||||||
// TODO: are we doing this outside of the tracking wrapper? why must SIWriter cleanup like this?
|
|
||||||
IOUtils.deleteFilesIgnoringExceptions(si.dir, fileName);
|
|
||||||
} else {
|
|
||||||
output.close();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,9 +1,4 @@
|
||||||
package org.apache.lucene.util;
|
package org.apache.lucene.codecs.lucene46;
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.BitSet;
|
|
||||||
|
|
||||||
import org.junit.Ignore;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
@ -22,17 +17,17 @@ import org.junit.Ignore;
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public class TestDocIdBitSet extends BaseDocIdSetTestCase<DocIdBitSet> {
|
import org.apache.lucene.codecs.Codec;
|
||||||
|
import org.apache.lucene.index.BaseFieldInfoFormatTestCase;
|
||||||
|
|
||||||
|
/** Test Lucene 4.2 FieldInfos Format */
|
||||||
|
public class TestLucene46FieldInfoFormat extends BaseFieldInfoFormatTestCase {
|
||||||
|
private final Codec codec = new Lucene46RWCodec();
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public DocIdBitSet copyOf(BitSet bs, int length) throws IOException {
|
protected Codec getCodec() {
|
||||||
return new DocIdBitSet((BitSet) bs.clone());
|
return codec;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
// TODO: we actually didnt support SORTED_NUMERIC initially, it was done in a minor rev.
|
||||||
@Ignore("no access to the internals of this impl")
|
|
||||||
public void testRamBytesUsed() throws IOException {
|
|
||||||
super.testRamBytesUsed();
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
|
@ -0,0 +1,57 @@
|
||||||
|
package org.apache.lucene.codecs.lucene46;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.codecs.Codec;
|
||||||
|
import org.apache.lucene.index.BaseSegmentInfoFormatTestCase;
|
||||||
|
import org.apache.lucene.util.Version;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tests Lucene46InfoFormat
|
||||||
|
*/
|
||||||
|
public class TestLucene46SegmentInfoFormat extends BaseSegmentInfoFormatTestCase {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Version[] getVersions() {
|
||||||
|
// NOTE: some of these bugfix releases we never actually "wrote",
|
||||||
|
// but staying on the safe side...
|
||||||
|
return new Version[] {
|
||||||
|
Version.LUCENE_4_6_0,
|
||||||
|
Version.LUCENE_4_6_1,
|
||||||
|
Version.LUCENE_4_7_0,
|
||||||
|
Version.LUCENE_4_7_1,
|
||||||
|
Version.LUCENE_4_7_2,
|
||||||
|
Version.LUCENE_4_8_0,
|
||||||
|
Version.LUCENE_4_8_1,
|
||||||
|
Version.LUCENE_4_9_0,
|
||||||
|
Version.LUCENE_4_10_0,
|
||||||
|
Version.LUCENE_4_10_1
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
@Deprecated
|
||||||
|
protected void assertIDEquals(byte[] expected, byte[] actual) {
|
||||||
|
assertNull(actual); // we don't support IDs
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Codec getCodec() {
|
||||||
|
return new Lucene46RWCodec();
|
||||||
|
}
|
||||||
|
}
|
|
@ -18,10 +18,26 @@ package org.apache.lucene.codecs.simpletext;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
import org.apache.lucene.codecs.FieldInfosFormat;
|
import org.apache.lucene.codecs.FieldInfosFormat;
|
||||||
import org.apache.lucene.codecs.FieldInfosReader;
|
import org.apache.lucene.index.FieldInfo;
|
||||||
import org.apache.lucene.codecs.FieldInfosWriter;
|
import org.apache.lucene.index.FieldInfos;
|
||||||
|
import org.apache.lucene.index.IndexFileNames;
|
||||||
|
import org.apache.lucene.index.SegmentInfo;
|
||||||
|
import org.apache.lucene.index.FieldInfo.DocValuesType;
|
||||||
|
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||||
|
import org.apache.lucene.store.ChecksumIndexInput;
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.store.IOContext;
|
||||||
|
import org.apache.lucene.store.IndexOutput;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.BytesRefBuilder;
|
||||||
|
import org.apache.lucene.util.IOUtils;
|
||||||
|
import org.apache.lucene.util.StringHelper;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* plaintext field infos format
|
* plaintext field infos format
|
||||||
|
@ -30,16 +46,220 @@ import org.apache.lucene.codecs.FieldInfosWriter;
|
||||||
* @lucene.experimental
|
* @lucene.experimental
|
||||||
*/
|
*/
|
||||||
public class SimpleTextFieldInfosFormat extends FieldInfosFormat {
|
public class SimpleTextFieldInfosFormat extends FieldInfosFormat {
|
||||||
private final FieldInfosReader reader = new SimpleTextFieldInfosReader();
|
|
||||||
private final FieldInfosWriter writer = new SimpleTextFieldInfosWriter();
|
/** Extension of field infos */
|
||||||
|
static final String FIELD_INFOS_EXTENSION = "inf";
|
||||||
|
|
||||||
|
static final BytesRef NUMFIELDS = new BytesRef("number of fields ");
|
||||||
|
static final BytesRef NAME = new BytesRef(" name ");
|
||||||
|
static final BytesRef NUMBER = new BytesRef(" number ");
|
||||||
|
static final BytesRef ISINDEXED = new BytesRef(" indexed ");
|
||||||
|
static final BytesRef STORETV = new BytesRef(" term vectors ");
|
||||||
|
static final BytesRef STORETVPOS = new BytesRef(" term vector positions ");
|
||||||
|
static final BytesRef STORETVOFF = new BytesRef(" term vector offsets ");
|
||||||
|
static final BytesRef PAYLOADS = new BytesRef(" payloads ");
|
||||||
|
static final BytesRef NORMS = new BytesRef(" norms ");
|
||||||
|
static final BytesRef NORMS_TYPE = new BytesRef(" norms type ");
|
||||||
|
static final BytesRef DOCVALUES = new BytesRef(" doc values ");
|
||||||
|
static final BytesRef DOCVALUES_GEN = new BytesRef(" doc values gen ");
|
||||||
|
static final BytesRef INDEXOPTIONS = new BytesRef(" index options ");
|
||||||
|
static final BytesRef NUM_ATTS = new BytesRef(" attributes ");
|
||||||
|
static final BytesRef ATT_KEY = new BytesRef(" key ");
|
||||||
|
static final BytesRef ATT_VALUE = new BytesRef(" value ");
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public FieldInfosReader getFieldInfosReader() throws IOException {
|
public FieldInfos read(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, IOContext iocontext) throws IOException {
|
||||||
return reader;
|
final String fileName = IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, FIELD_INFOS_EXTENSION);
|
||||||
|
ChecksumIndexInput input = directory.openChecksumInput(fileName, iocontext);
|
||||||
|
BytesRefBuilder scratch = new BytesRefBuilder();
|
||||||
|
|
||||||
|
boolean success = false;
|
||||||
|
try {
|
||||||
|
|
||||||
|
SimpleTextUtil.readLine(input, scratch);
|
||||||
|
assert StringHelper.startsWith(scratch.get(), NUMFIELDS);
|
||||||
|
final int size = Integer.parseInt(readString(NUMFIELDS.length, scratch));
|
||||||
|
FieldInfo infos[] = new FieldInfo[size];
|
||||||
|
|
||||||
|
for (int i = 0; i < size; i++) {
|
||||||
|
SimpleTextUtil.readLine(input, scratch);
|
||||||
|
assert StringHelper.startsWith(scratch.get(), NAME);
|
||||||
|
String name = readString(NAME.length, scratch);
|
||||||
|
|
||||||
|
SimpleTextUtil.readLine(input, scratch);
|
||||||
|
assert StringHelper.startsWith(scratch.get(), NUMBER);
|
||||||
|
int fieldNumber = Integer.parseInt(readString(NUMBER.length, scratch));
|
||||||
|
|
||||||
|
SimpleTextUtil.readLine(input, scratch);
|
||||||
|
assert StringHelper.startsWith(scratch.get(), ISINDEXED);
|
||||||
|
boolean isIndexed = Boolean.parseBoolean(readString(ISINDEXED.length, scratch));
|
||||||
|
|
||||||
|
final IndexOptions indexOptions;
|
||||||
|
if (isIndexed) {
|
||||||
|
SimpleTextUtil.readLine(input, scratch);
|
||||||
|
assert StringHelper.startsWith(scratch.get(), INDEXOPTIONS);
|
||||||
|
indexOptions = IndexOptions.valueOf(readString(INDEXOPTIONS.length, scratch));
|
||||||
|
} else {
|
||||||
|
indexOptions = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
SimpleTextUtil.readLine(input, scratch);
|
||||||
|
assert StringHelper.startsWith(scratch.get(), STORETV);
|
||||||
|
boolean storeTermVector = Boolean.parseBoolean(readString(STORETV.length, scratch));
|
||||||
|
|
||||||
|
SimpleTextUtil.readLine(input, scratch);
|
||||||
|
assert StringHelper.startsWith(scratch.get(), PAYLOADS);
|
||||||
|
boolean storePayloads = Boolean.parseBoolean(readString(PAYLOADS.length, scratch));
|
||||||
|
|
||||||
|
SimpleTextUtil.readLine(input, scratch);
|
||||||
|
assert StringHelper.startsWith(scratch.get(), NORMS);
|
||||||
|
boolean omitNorms = !Boolean.parseBoolean(readString(NORMS.length, scratch));
|
||||||
|
|
||||||
|
SimpleTextUtil.readLine(input, scratch);
|
||||||
|
assert StringHelper.startsWith(scratch.get(), NORMS_TYPE);
|
||||||
|
String nrmType = readString(NORMS_TYPE.length, scratch);
|
||||||
|
final DocValuesType normsType = docValuesType(nrmType);
|
||||||
|
|
||||||
|
SimpleTextUtil.readLine(input, scratch);
|
||||||
|
assert StringHelper.startsWith(scratch.get(), DOCVALUES);
|
||||||
|
String dvType = readString(DOCVALUES.length, scratch);
|
||||||
|
final DocValuesType docValuesType = docValuesType(dvType);
|
||||||
|
|
||||||
|
SimpleTextUtil.readLine(input, scratch);
|
||||||
|
assert StringHelper.startsWith(scratch.get(), DOCVALUES_GEN);
|
||||||
|
final long dvGen = Long.parseLong(readString(DOCVALUES_GEN.length, scratch));
|
||||||
|
|
||||||
|
SimpleTextUtil.readLine(input, scratch);
|
||||||
|
assert StringHelper.startsWith(scratch.get(), NUM_ATTS);
|
||||||
|
int numAtts = Integer.parseInt(readString(NUM_ATTS.length, scratch));
|
||||||
|
Map<String,String> atts = new HashMap<>();
|
||||||
|
|
||||||
|
for (int j = 0; j < numAtts; j++) {
|
||||||
|
SimpleTextUtil.readLine(input, scratch);
|
||||||
|
assert StringHelper.startsWith(scratch.get(), ATT_KEY);
|
||||||
|
String key = readString(ATT_KEY.length, scratch);
|
||||||
|
|
||||||
|
SimpleTextUtil.readLine(input, scratch);
|
||||||
|
assert StringHelper.startsWith(scratch.get(), ATT_VALUE);
|
||||||
|
String value = readString(ATT_VALUE.length, scratch);
|
||||||
|
atts.put(key, value);
|
||||||
|
}
|
||||||
|
|
||||||
|
infos[i] = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector,
|
||||||
|
omitNorms, storePayloads, indexOptions, docValuesType, normsType, dvGen, Collections.unmodifiableMap(atts));
|
||||||
|
}
|
||||||
|
|
||||||
|
SimpleTextUtil.checkFooter(input);
|
||||||
|
|
||||||
|
FieldInfos fieldInfos = new FieldInfos(infos);
|
||||||
|
success = true;
|
||||||
|
return fieldInfos;
|
||||||
|
} finally {
|
||||||
|
if (success) {
|
||||||
|
input.close();
|
||||||
|
} else {
|
||||||
|
IOUtils.closeWhileHandlingException(input);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public DocValuesType docValuesType(String dvType) {
|
||||||
|
if ("false".equals(dvType)) {
|
||||||
|
return null;
|
||||||
|
} else {
|
||||||
|
return DocValuesType.valueOf(dvType);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private String readString(int offset, BytesRefBuilder scratch) {
|
||||||
|
return new String(scratch.bytes(), offset, scratch.length()-offset, StandardCharsets.UTF_8);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public FieldInfosWriter getFieldInfosWriter() throws IOException {
|
public void write(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, FieldInfos infos, IOContext context) throws IOException {
|
||||||
return writer;
|
final String fileName = IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, FIELD_INFOS_EXTENSION);
|
||||||
|
IndexOutput out = directory.createOutput(fileName, context);
|
||||||
|
BytesRefBuilder scratch = new BytesRefBuilder();
|
||||||
|
boolean success = false;
|
||||||
|
try {
|
||||||
|
SimpleTextUtil.write(out, NUMFIELDS);
|
||||||
|
SimpleTextUtil.write(out, Integer.toString(infos.size()), scratch);
|
||||||
|
SimpleTextUtil.writeNewline(out);
|
||||||
|
|
||||||
|
for (FieldInfo fi : infos) {
|
||||||
|
SimpleTextUtil.write(out, NAME);
|
||||||
|
SimpleTextUtil.write(out, fi.name, scratch);
|
||||||
|
SimpleTextUtil.writeNewline(out);
|
||||||
|
|
||||||
|
SimpleTextUtil.write(out, NUMBER);
|
||||||
|
SimpleTextUtil.write(out, Integer.toString(fi.number), scratch);
|
||||||
|
SimpleTextUtil.writeNewline(out);
|
||||||
|
|
||||||
|
SimpleTextUtil.write(out, ISINDEXED);
|
||||||
|
SimpleTextUtil.write(out, Boolean.toString(fi.isIndexed()), scratch);
|
||||||
|
SimpleTextUtil.writeNewline(out);
|
||||||
|
|
||||||
|
if (fi.isIndexed()) {
|
||||||
|
assert fi.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0 || !fi.hasPayloads();
|
||||||
|
SimpleTextUtil.write(out, INDEXOPTIONS);
|
||||||
|
SimpleTextUtil.write(out, fi.getIndexOptions().toString(), scratch);
|
||||||
|
SimpleTextUtil.writeNewline(out);
|
||||||
|
}
|
||||||
|
|
||||||
|
SimpleTextUtil.write(out, STORETV);
|
||||||
|
SimpleTextUtil.write(out, Boolean.toString(fi.hasVectors()), scratch);
|
||||||
|
SimpleTextUtil.writeNewline(out);
|
||||||
|
|
||||||
|
SimpleTextUtil.write(out, PAYLOADS);
|
||||||
|
SimpleTextUtil.write(out, Boolean.toString(fi.hasPayloads()), scratch);
|
||||||
|
SimpleTextUtil.writeNewline(out);
|
||||||
|
|
||||||
|
SimpleTextUtil.write(out, NORMS);
|
||||||
|
SimpleTextUtil.write(out, Boolean.toString(!fi.omitsNorms()), scratch);
|
||||||
|
SimpleTextUtil.writeNewline(out);
|
||||||
|
|
||||||
|
SimpleTextUtil.write(out, NORMS_TYPE);
|
||||||
|
SimpleTextUtil.write(out, getDocValuesType(fi.getNormType()), scratch);
|
||||||
|
SimpleTextUtil.writeNewline(out);
|
||||||
|
|
||||||
|
SimpleTextUtil.write(out, DOCVALUES);
|
||||||
|
SimpleTextUtil.write(out, getDocValuesType(fi.getDocValuesType()), scratch);
|
||||||
|
SimpleTextUtil.writeNewline(out);
|
||||||
|
|
||||||
|
SimpleTextUtil.write(out, DOCVALUES_GEN);
|
||||||
|
SimpleTextUtil.write(out, Long.toString(fi.getDocValuesGen()), scratch);
|
||||||
|
SimpleTextUtil.writeNewline(out);
|
||||||
|
|
||||||
|
Map<String,String> atts = fi.attributes();
|
||||||
|
int numAtts = atts == null ? 0 : atts.size();
|
||||||
|
SimpleTextUtil.write(out, NUM_ATTS);
|
||||||
|
SimpleTextUtil.write(out, Integer.toString(numAtts), scratch);
|
||||||
|
SimpleTextUtil.writeNewline(out);
|
||||||
|
|
||||||
|
if (numAtts > 0) {
|
||||||
|
for (Map.Entry<String,String> entry : atts.entrySet()) {
|
||||||
|
SimpleTextUtil.write(out, ATT_KEY);
|
||||||
|
SimpleTextUtil.write(out, entry.getKey(), scratch);
|
||||||
|
SimpleTextUtil.writeNewline(out);
|
||||||
|
|
||||||
|
SimpleTextUtil.write(out, ATT_VALUE);
|
||||||
|
SimpleTextUtil.write(out, entry.getValue(), scratch);
|
||||||
|
SimpleTextUtil.writeNewline(out);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
SimpleTextUtil.writeChecksum(out, scratch);
|
||||||
|
success = true;
|
||||||
|
} finally {
|
||||||
|
if (success) {
|
||||||
|
out.close();
|
||||||
|
} else {
|
||||||
|
IOUtils.closeWhileHandlingException(out);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static String getDocValuesType(DocValuesType type) {
|
||||||
|
return type == null ? "false" : type.toString();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,157 +0,0 @@
|
||||||
package org.apache.lucene.codecs.simpletext;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.nio.charset.StandardCharsets;
|
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
import org.apache.lucene.codecs.FieldInfosReader;
|
|
||||||
import org.apache.lucene.index.FieldInfo;
|
|
||||||
import org.apache.lucene.index.FieldInfo.DocValuesType;
|
|
||||||
import org.apache.lucene.index.FieldInfos;
|
|
||||||
import org.apache.lucene.index.IndexFileNames;
|
|
||||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
|
||||||
import org.apache.lucene.index.SegmentInfo;
|
|
||||||
import org.apache.lucene.store.ChecksumIndexInput;
|
|
||||||
import org.apache.lucene.store.Directory;
|
|
||||||
import org.apache.lucene.store.IOContext;
|
|
||||||
import org.apache.lucene.util.BytesRefBuilder;
|
|
||||||
import org.apache.lucene.util.IOUtils;
|
|
||||||
import org.apache.lucene.util.StringHelper;
|
|
||||||
|
|
||||||
import static org.apache.lucene.codecs.simpletext.SimpleTextFieldInfosWriter.*;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* reads plaintext field infos files
|
|
||||||
* <p>
|
|
||||||
* <b><font color="red">FOR RECREATIONAL USE ONLY</font></B>
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
public class SimpleTextFieldInfosReader extends FieldInfosReader {
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public FieldInfos read(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, IOContext iocontext) throws IOException {
|
|
||||||
final String fileName = IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, FIELD_INFOS_EXTENSION);
|
|
||||||
ChecksumIndexInput input = directory.openChecksumInput(fileName, iocontext);
|
|
||||||
BytesRefBuilder scratch = new BytesRefBuilder();
|
|
||||||
|
|
||||||
boolean success = false;
|
|
||||||
try {
|
|
||||||
|
|
||||||
SimpleTextUtil.readLine(input, scratch);
|
|
||||||
assert StringHelper.startsWith(scratch.get(), NUMFIELDS);
|
|
||||||
final int size = Integer.parseInt(readString(NUMFIELDS.length, scratch));
|
|
||||||
FieldInfo infos[] = new FieldInfo[size];
|
|
||||||
|
|
||||||
for (int i = 0; i < size; i++) {
|
|
||||||
SimpleTextUtil.readLine(input, scratch);
|
|
||||||
assert StringHelper.startsWith(scratch.get(), NAME);
|
|
||||||
String name = readString(NAME.length, scratch);
|
|
||||||
|
|
||||||
SimpleTextUtil.readLine(input, scratch);
|
|
||||||
assert StringHelper.startsWith(scratch.get(), NUMBER);
|
|
||||||
int fieldNumber = Integer.parseInt(readString(NUMBER.length, scratch));
|
|
||||||
|
|
||||||
SimpleTextUtil.readLine(input, scratch);
|
|
||||||
assert StringHelper.startsWith(scratch.get(), ISINDEXED);
|
|
||||||
boolean isIndexed = Boolean.parseBoolean(readString(ISINDEXED.length, scratch));
|
|
||||||
|
|
||||||
final IndexOptions indexOptions;
|
|
||||||
if (isIndexed) {
|
|
||||||
SimpleTextUtil.readLine(input, scratch);
|
|
||||||
assert StringHelper.startsWith(scratch.get(), INDEXOPTIONS);
|
|
||||||
indexOptions = IndexOptions.valueOf(readString(INDEXOPTIONS.length, scratch));
|
|
||||||
} else {
|
|
||||||
indexOptions = null;
|
|
||||||
}
|
|
||||||
|
|
||||||
SimpleTextUtil.readLine(input, scratch);
|
|
||||||
assert StringHelper.startsWith(scratch.get(), STORETV);
|
|
||||||
boolean storeTermVector = Boolean.parseBoolean(readString(STORETV.length, scratch));
|
|
||||||
|
|
||||||
SimpleTextUtil.readLine(input, scratch);
|
|
||||||
assert StringHelper.startsWith(scratch.get(), PAYLOADS);
|
|
||||||
boolean storePayloads = Boolean.parseBoolean(readString(PAYLOADS.length, scratch));
|
|
||||||
|
|
||||||
SimpleTextUtil.readLine(input, scratch);
|
|
||||||
assert StringHelper.startsWith(scratch.get(), NORMS);
|
|
||||||
boolean omitNorms = !Boolean.parseBoolean(readString(NORMS.length, scratch));
|
|
||||||
|
|
||||||
SimpleTextUtil.readLine(input, scratch);
|
|
||||||
assert StringHelper.startsWith(scratch.get(), NORMS_TYPE);
|
|
||||||
String nrmType = readString(NORMS_TYPE.length, scratch);
|
|
||||||
final DocValuesType normsType = docValuesType(nrmType);
|
|
||||||
|
|
||||||
SimpleTextUtil.readLine(input, scratch);
|
|
||||||
assert StringHelper.startsWith(scratch.get(), DOCVALUES);
|
|
||||||
String dvType = readString(DOCVALUES.length, scratch);
|
|
||||||
final DocValuesType docValuesType = docValuesType(dvType);
|
|
||||||
|
|
||||||
SimpleTextUtil.readLine(input, scratch);
|
|
||||||
assert StringHelper.startsWith(scratch.get(), DOCVALUES_GEN);
|
|
||||||
final long dvGen = Long.parseLong(readString(DOCVALUES_GEN.length, scratch));
|
|
||||||
|
|
||||||
SimpleTextUtil.readLine(input, scratch);
|
|
||||||
assert StringHelper.startsWith(scratch.get(), NUM_ATTS);
|
|
||||||
int numAtts = Integer.parseInt(readString(NUM_ATTS.length, scratch));
|
|
||||||
Map<String,String> atts = new HashMap<>();
|
|
||||||
|
|
||||||
for (int j = 0; j < numAtts; j++) {
|
|
||||||
SimpleTextUtil.readLine(input, scratch);
|
|
||||||
assert StringHelper.startsWith(scratch.get(), ATT_KEY);
|
|
||||||
String key = readString(ATT_KEY.length, scratch);
|
|
||||||
|
|
||||||
SimpleTextUtil.readLine(input, scratch);
|
|
||||||
assert StringHelper.startsWith(scratch.get(), ATT_VALUE);
|
|
||||||
String value = readString(ATT_VALUE.length, scratch);
|
|
||||||
atts.put(key, value);
|
|
||||||
}
|
|
||||||
|
|
||||||
infos[i] = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector,
|
|
||||||
omitNorms, storePayloads, indexOptions, docValuesType, normsType, dvGen, Collections.unmodifiableMap(atts));
|
|
||||||
}
|
|
||||||
|
|
||||||
SimpleTextUtil.checkFooter(input);
|
|
||||||
|
|
||||||
FieldInfos fieldInfos = new FieldInfos(infos);
|
|
||||||
success = true;
|
|
||||||
return fieldInfos;
|
|
||||||
} finally {
|
|
||||||
if (success) {
|
|
||||||
input.close();
|
|
||||||
} else {
|
|
||||||
IOUtils.closeWhileHandlingException(input);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public DocValuesType docValuesType(String dvType) {
|
|
||||||
if ("false".equals(dvType)) {
|
|
||||||
return null;
|
|
||||||
} else {
|
|
||||||
return DocValuesType.valueOf(dvType);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private String readString(int offset, BytesRefBuilder scratch) {
|
|
||||||
return new String(scratch.bytes(), offset, scratch.length()-offset, StandardCharsets.UTF_8);
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,151 +0,0 @@
|
||||||
package org.apache.lucene.codecs.simpletext;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
import org.apache.lucene.codecs.FieldInfosWriter;
|
|
||||||
import org.apache.lucene.index.FieldInfo;
|
|
||||||
import org.apache.lucene.index.FieldInfo.DocValuesType;
|
|
||||||
import org.apache.lucene.index.FieldInfos;
|
|
||||||
import org.apache.lucene.index.IndexFileNames;
|
|
||||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
|
||||||
import org.apache.lucene.index.SegmentInfo;
|
|
||||||
import org.apache.lucene.store.Directory;
|
|
||||||
import org.apache.lucene.store.IOContext;
|
|
||||||
import org.apache.lucene.store.IndexOutput;
|
|
||||||
import org.apache.lucene.util.BytesRef;
|
|
||||||
import org.apache.lucene.util.BytesRefBuilder;
|
|
||||||
import org.apache.lucene.util.IOUtils;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* writes plaintext field infos files
|
|
||||||
* <p>
|
|
||||||
* <b><font color="red">FOR RECREATIONAL USE ONLY</font></B>
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
public class SimpleTextFieldInfosWriter extends FieldInfosWriter {
|
|
||||||
|
|
||||||
/** Extension of field infos */
|
|
||||||
static final String FIELD_INFOS_EXTENSION = "inf";
|
|
||||||
|
|
||||||
static final BytesRef NUMFIELDS = new BytesRef("number of fields ");
|
|
||||||
static final BytesRef NAME = new BytesRef(" name ");
|
|
||||||
static final BytesRef NUMBER = new BytesRef(" number ");
|
|
||||||
static final BytesRef ISINDEXED = new BytesRef(" indexed ");
|
|
||||||
static final BytesRef STORETV = new BytesRef(" term vectors ");
|
|
||||||
static final BytesRef STORETVPOS = new BytesRef(" term vector positions ");
|
|
||||||
static final BytesRef STORETVOFF = new BytesRef(" term vector offsets ");
|
|
||||||
static final BytesRef PAYLOADS = new BytesRef(" payloads ");
|
|
||||||
static final BytesRef NORMS = new BytesRef(" norms ");
|
|
||||||
static final BytesRef NORMS_TYPE = new BytesRef(" norms type ");
|
|
||||||
static final BytesRef DOCVALUES = new BytesRef(" doc values ");
|
|
||||||
static final BytesRef DOCVALUES_GEN = new BytesRef(" doc values gen ");
|
|
||||||
static final BytesRef INDEXOPTIONS = new BytesRef(" index options ");
|
|
||||||
static final BytesRef NUM_ATTS = new BytesRef(" attributes ");
|
|
||||||
final static BytesRef ATT_KEY = new BytesRef(" key ");
|
|
||||||
final static BytesRef ATT_VALUE = new BytesRef(" value ");
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void write(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, FieldInfos infos, IOContext context) throws IOException {
|
|
||||||
final String fileName = IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, FIELD_INFOS_EXTENSION);
|
|
||||||
IndexOutput out = directory.createOutput(fileName, context);
|
|
||||||
BytesRefBuilder scratch = new BytesRefBuilder();
|
|
||||||
boolean success = false;
|
|
||||||
try {
|
|
||||||
SimpleTextUtil.write(out, NUMFIELDS);
|
|
||||||
SimpleTextUtil.write(out, Integer.toString(infos.size()), scratch);
|
|
||||||
SimpleTextUtil.writeNewline(out);
|
|
||||||
|
|
||||||
for (FieldInfo fi : infos) {
|
|
||||||
SimpleTextUtil.write(out, NAME);
|
|
||||||
SimpleTextUtil.write(out, fi.name, scratch);
|
|
||||||
SimpleTextUtil.writeNewline(out);
|
|
||||||
|
|
||||||
SimpleTextUtil.write(out, NUMBER);
|
|
||||||
SimpleTextUtil.write(out, Integer.toString(fi.number), scratch);
|
|
||||||
SimpleTextUtil.writeNewline(out);
|
|
||||||
|
|
||||||
SimpleTextUtil.write(out, ISINDEXED);
|
|
||||||
SimpleTextUtil.write(out, Boolean.toString(fi.isIndexed()), scratch);
|
|
||||||
SimpleTextUtil.writeNewline(out);
|
|
||||||
|
|
||||||
if (fi.isIndexed()) {
|
|
||||||
assert fi.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0 || !fi.hasPayloads();
|
|
||||||
SimpleTextUtil.write(out, INDEXOPTIONS);
|
|
||||||
SimpleTextUtil.write(out, fi.getIndexOptions().toString(), scratch);
|
|
||||||
SimpleTextUtil.writeNewline(out);
|
|
||||||
}
|
|
||||||
|
|
||||||
SimpleTextUtil.write(out, STORETV);
|
|
||||||
SimpleTextUtil.write(out, Boolean.toString(fi.hasVectors()), scratch);
|
|
||||||
SimpleTextUtil.writeNewline(out);
|
|
||||||
|
|
||||||
SimpleTextUtil.write(out, PAYLOADS);
|
|
||||||
SimpleTextUtil.write(out, Boolean.toString(fi.hasPayloads()), scratch);
|
|
||||||
SimpleTextUtil.writeNewline(out);
|
|
||||||
|
|
||||||
SimpleTextUtil.write(out, NORMS);
|
|
||||||
SimpleTextUtil.write(out, Boolean.toString(!fi.omitsNorms()), scratch);
|
|
||||||
SimpleTextUtil.writeNewline(out);
|
|
||||||
|
|
||||||
SimpleTextUtil.write(out, NORMS_TYPE);
|
|
||||||
SimpleTextUtil.write(out, getDocValuesType(fi.getNormType()), scratch);
|
|
||||||
SimpleTextUtil.writeNewline(out);
|
|
||||||
|
|
||||||
SimpleTextUtil.write(out, DOCVALUES);
|
|
||||||
SimpleTextUtil.write(out, getDocValuesType(fi.getDocValuesType()), scratch);
|
|
||||||
SimpleTextUtil.writeNewline(out);
|
|
||||||
|
|
||||||
SimpleTextUtil.write(out, DOCVALUES_GEN);
|
|
||||||
SimpleTextUtil.write(out, Long.toString(fi.getDocValuesGen()), scratch);
|
|
||||||
SimpleTextUtil.writeNewline(out);
|
|
||||||
|
|
||||||
Map<String,String> atts = fi.attributes();
|
|
||||||
int numAtts = atts == null ? 0 : atts.size();
|
|
||||||
SimpleTextUtil.write(out, NUM_ATTS);
|
|
||||||
SimpleTextUtil.write(out, Integer.toString(numAtts), scratch);
|
|
||||||
SimpleTextUtil.writeNewline(out);
|
|
||||||
|
|
||||||
if (numAtts > 0) {
|
|
||||||
for (Map.Entry<String,String> entry : atts.entrySet()) {
|
|
||||||
SimpleTextUtil.write(out, ATT_KEY);
|
|
||||||
SimpleTextUtil.write(out, entry.getKey(), scratch);
|
|
||||||
SimpleTextUtil.writeNewline(out);
|
|
||||||
|
|
||||||
SimpleTextUtil.write(out, ATT_VALUE);
|
|
||||||
SimpleTextUtil.write(out, entry.getValue(), scratch);
|
|
||||||
SimpleTextUtil.writeNewline(out);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
SimpleTextUtil.writeChecksum(out, scratch);
|
|
||||||
success = true;
|
|
||||||
} finally {
|
|
||||||
if (success) {
|
|
||||||
out.close();
|
|
||||||
} else {
|
|
||||||
IOUtils.closeWhileHandlingException(out);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private static String getDocValuesType(DocValuesType type) {
|
|
||||||
return type == null ? "false" : type.toString();
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -17,9 +17,28 @@ package org.apache.lucene.codecs.simpletext;
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
|
import java.text.ParseException;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
import org.apache.lucene.codecs.SegmentInfoFormat;
|
import org.apache.lucene.codecs.SegmentInfoFormat;
|
||||||
import org.apache.lucene.codecs.SegmentInfoReader;
|
import org.apache.lucene.index.CorruptIndexException;
|
||||||
import org.apache.lucene.codecs.SegmentInfoWriter;
|
import org.apache.lucene.index.IndexFileNames;
|
||||||
|
import org.apache.lucene.index.SegmentInfo;
|
||||||
|
import org.apache.lucene.store.ChecksumIndexInput;
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.store.IOContext;
|
||||||
|
import org.apache.lucene.store.IndexOutput;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.BytesRefBuilder;
|
||||||
|
import org.apache.lucene.util.IOUtils;
|
||||||
|
import org.apache.lucene.util.StringHelper;
|
||||||
|
import org.apache.lucene.util.Version;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* plain text segments file format.
|
* plain text segments file format.
|
||||||
|
@ -28,18 +47,163 @@ import org.apache.lucene.codecs.SegmentInfoWriter;
|
||||||
* @lucene.experimental
|
* @lucene.experimental
|
||||||
*/
|
*/
|
||||||
public class SimpleTextSegmentInfoFormat extends SegmentInfoFormat {
|
public class SimpleTextSegmentInfoFormat extends SegmentInfoFormat {
|
||||||
private final SegmentInfoReader reader = new SimpleTextSegmentInfoReader();
|
final static BytesRef SI_VERSION = new BytesRef(" version ");
|
||||||
private final SegmentInfoWriter writer = new SimpleTextSegmentInfoWriter();
|
final static BytesRef SI_DOCCOUNT = new BytesRef(" number of documents ");
|
||||||
|
final static BytesRef SI_USECOMPOUND = new BytesRef(" uses compound file ");
|
||||||
|
final static BytesRef SI_NUM_DIAG = new BytesRef(" diagnostics ");
|
||||||
|
final static BytesRef SI_DIAG_KEY = new BytesRef(" key ");
|
||||||
|
final static BytesRef SI_DIAG_VALUE = new BytesRef(" value ");
|
||||||
|
final static BytesRef SI_NUM_FILES = new BytesRef(" files ");
|
||||||
|
final static BytesRef SI_FILE = new BytesRef(" file ");
|
||||||
|
final static BytesRef SI_ID = new BytesRef(" id ");
|
||||||
|
|
||||||
public static final String SI_EXTENSION = "si";
|
public static final String SI_EXTENSION = "si";
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public SegmentInfoReader getSegmentInfoReader() {
|
public SegmentInfo read(Directory directory, String segmentName, IOContext context) throws IOException {
|
||||||
return reader;
|
BytesRefBuilder scratch = new BytesRefBuilder();
|
||||||
|
String segFileName = IndexFileNames.segmentFileName(segmentName, "", SimpleTextSegmentInfoFormat.SI_EXTENSION);
|
||||||
|
ChecksumIndexInput input = directory.openChecksumInput(segFileName, context);
|
||||||
|
boolean success = false;
|
||||||
|
try {
|
||||||
|
SimpleTextUtil.readLine(input, scratch);
|
||||||
|
assert StringHelper.startsWith(scratch.get(), SI_VERSION);
|
||||||
|
final Version version;
|
||||||
|
try {
|
||||||
|
version = Version.parse(readString(SI_VERSION.length, scratch));
|
||||||
|
} catch (ParseException pe) {
|
||||||
|
throw new CorruptIndexException("unable to parse version string: " + pe.getMessage(), input, pe);
|
||||||
|
}
|
||||||
|
|
||||||
|
SimpleTextUtil.readLine(input, scratch);
|
||||||
|
assert StringHelper.startsWith(scratch.get(), SI_DOCCOUNT);
|
||||||
|
final int docCount = Integer.parseInt(readString(SI_DOCCOUNT.length, scratch));
|
||||||
|
|
||||||
|
SimpleTextUtil.readLine(input, scratch);
|
||||||
|
assert StringHelper.startsWith(scratch.get(), SI_USECOMPOUND);
|
||||||
|
final boolean isCompoundFile = Boolean.parseBoolean(readString(SI_USECOMPOUND.length, scratch));
|
||||||
|
|
||||||
|
SimpleTextUtil.readLine(input, scratch);
|
||||||
|
assert StringHelper.startsWith(scratch.get(), SI_NUM_DIAG);
|
||||||
|
int numDiag = Integer.parseInt(readString(SI_NUM_DIAG.length, scratch));
|
||||||
|
Map<String,String> diagnostics = new HashMap<>();
|
||||||
|
|
||||||
|
for (int i = 0; i < numDiag; i++) {
|
||||||
|
SimpleTextUtil.readLine(input, scratch);
|
||||||
|
assert StringHelper.startsWith(scratch.get(), SI_DIAG_KEY);
|
||||||
|
String key = readString(SI_DIAG_KEY.length, scratch);
|
||||||
|
|
||||||
|
SimpleTextUtil.readLine(input, scratch);
|
||||||
|
assert StringHelper.startsWith(scratch.get(), SI_DIAG_VALUE);
|
||||||
|
String value = readString(SI_DIAG_VALUE.length, scratch);
|
||||||
|
diagnostics.put(key, value);
|
||||||
|
}
|
||||||
|
|
||||||
|
SimpleTextUtil.readLine(input, scratch);
|
||||||
|
assert StringHelper.startsWith(scratch.get(), SI_NUM_FILES);
|
||||||
|
int numFiles = Integer.parseInt(readString(SI_NUM_FILES.length, scratch));
|
||||||
|
Set<String> files = new HashSet<>();
|
||||||
|
|
||||||
|
for (int i = 0; i < numFiles; i++) {
|
||||||
|
SimpleTextUtil.readLine(input, scratch);
|
||||||
|
assert StringHelper.startsWith(scratch.get(), SI_FILE);
|
||||||
|
String fileName = readString(SI_FILE.length, scratch);
|
||||||
|
files.add(fileName);
|
||||||
|
}
|
||||||
|
|
||||||
|
SimpleTextUtil.readLine(input, scratch);
|
||||||
|
assert StringHelper.startsWith(scratch.get(), SI_ID);
|
||||||
|
final byte[] id = Arrays.copyOfRange(scratch.bytes(), SI_ID.length, scratch.length());
|
||||||
|
|
||||||
|
SimpleTextUtil.checkFooter(input);
|
||||||
|
|
||||||
|
SegmentInfo info = new SegmentInfo(directory, version, segmentName, docCount,
|
||||||
|
isCompoundFile, null, diagnostics, id);
|
||||||
|
info.setFiles(files);
|
||||||
|
success = true;
|
||||||
|
return info;
|
||||||
|
} finally {
|
||||||
|
if (!success) {
|
||||||
|
IOUtils.closeWhileHandlingException(input);
|
||||||
|
} else {
|
||||||
|
input.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private String readString(int offset, BytesRefBuilder scratch) {
|
||||||
|
return new String(scratch.bytes(), offset, scratch.length()-offset, StandardCharsets.UTF_8);
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public SegmentInfoWriter getSegmentInfoWriter() {
|
public void write(Directory dir, SegmentInfo si, IOContext ioContext) throws IOException {
|
||||||
return writer;
|
|
||||||
|
String segFileName = IndexFileNames.segmentFileName(si.name, "", SimpleTextSegmentInfoFormat.SI_EXTENSION);
|
||||||
|
si.addFile(segFileName);
|
||||||
|
|
||||||
|
boolean success = false;
|
||||||
|
IndexOutput output = dir.createOutput(segFileName, ioContext);
|
||||||
|
|
||||||
|
try {
|
||||||
|
BytesRefBuilder scratch = new BytesRefBuilder();
|
||||||
|
|
||||||
|
SimpleTextUtil.write(output, SI_VERSION);
|
||||||
|
SimpleTextUtil.write(output, si.getVersion().toString(), scratch);
|
||||||
|
SimpleTextUtil.writeNewline(output);
|
||||||
|
|
||||||
|
SimpleTextUtil.write(output, SI_DOCCOUNT);
|
||||||
|
SimpleTextUtil.write(output, Integer.toString(si.getDocCount()), scratch);
|
||||||
|
SimpleTextUtil.writeNewline(output);
|
||||||
|
|
||||||
|
SimpleTextUtil.write(output, SI_USECOMPOUND);
|
||||||
|
SimpleTextUtil.write(output, Boolean.toString(si.getUseCompoundFile()), scratch);
|
||||||
|
SimpleTextUtil.writeNewline(output);
|
||||||
|
|
||||||
|
Map<String,String> diagnostics = si.getDiagnostics();
|
||||||
|
int numDiagnostics = diagnostics == null ? 0 : diagnostics.size();
|
||||||
|
SimpleTextUtil.write(output, SI_NUM_DIAG);
|
||||||
|
SimpleTextUtil.write(output, Integer.toString(numDiagnostics), scratch);
|
||||||
|
SimpleTextUtil.writeNewline(output);
|
||||||
|
|
||||||
|
if (numDiagnostics > 0) {
|
||||||
|
for (Map.Entry<String,String> diagEntry : diagnostics.entrySet()) {
|
||||||
|
SimpleTextUtil.write(output, SI_DIAG_KEY);
|
||||||
|
SimpleTextUtil.write(output, diagEntry.getKey(), scratch);
|
||||||
|
SimpleTextUtil.writeNewline(output);
|
||||||
|
|
||||||
|
SimpleTextUtil.write(output, SI_DIAG_VALUE);
|
||||||
|
SimpleTextUtil.write(output, diagEntry.getValue(), scratch);
|
||||||
|
SimpleTextUtil.writeNewline(output);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Set<String> files = si.files();
|
||||||
|
int numFiles = files == null ? 0 : files.size();
|
||||||
|
SimpleTextUtil.write(output, SI_NUM_FILES);
|
||||||
|
SimpleTextUtil.write(output, Integer.toString(numFiles), scratch);
|
||||||
|
SimpleTextUtil.writeNewline(output);
|
||||||
|
|
||||||
|
if (numFiles > 0) {
|
||||||
|
for(String fileName : files) {
|
||||||
|
SimpleTextUtil.write(output, SI_FILE);
|
||||||
|
SimpleTextUtil.write(output, fileName, scratch);
|
||||||
|
SimpleTextUtil.writeNewline(output);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
SimpleTextUtil.write(output, SI_ID);
|
||||||
|
SimpleTextUtil.write(output, new BytesRef(si.getId()));
|
||||||
|
SimpleTextUtil.writeNewline(output);
|
||||||
|
|
||||||
|
SimpleTextUtil.writeChecksum(output, scratch);
|
||||||
|
success = true;
|
||||||
|
} finally {
|
||||||
|
if (!success) {
|
||||||
|
IOUtils.closeWhileHandlingException(output);
|
||||||
|
IOUtils.deleteFilesIgnoringExceptions(dir, segFileName);
|
||||||
|
} else {
|
||||||
|
output.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,134 +0,0 @@
|
||||||
package org.apache.lucene.codecs.simpletext;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.nio.charset.StandardCharsets;
|
|
||||||
import java.text.ParseException;
|
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.HashSet;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.Set;
|
|
||||||
|
|
||||||
import org.apache.lucene.codecs.SegmentInfoReader;
|
|
||||||
import org.apache.lucene.index.CorruptIndexException;
|
|
||||||
import org.apache.lucene.index.IndexFileNames;
|
|
||||||
import org.apache.lucene.index.SegmentInfo;
|
|
||||||
import org.apache.lucene.store.ChecksumIndexInput;
|
|
||||||
import org.apache.lucene.store.Directory;
|
|
||||||
import org.apache.lucene.store.IOContext;
|
|
||||||
import org.apache.lucene.util.BytesRefBuilder;
|
|
||||||
import org.apache.lucene.util.IOUtils;
|
|
||||||
import org.apache.lucene.util.StringHelper;
|
|
||||||
import org.apache.lucene.util.Version;
|
|
||||||
|
|
||||||
import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_DIAG_KEY;
|
|
||||||
import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_DIAG_VALUE;
|
|
||||||
import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_DOCCOUNT;
|
|
||||||
import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_FILE;
|
|
||||||
import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_NUM_DIAG;
|
|
||||||
import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_NUM_FILES;
|
|
||||||
import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_USECOMPOUND;
|
|
||||||
import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_ID;
|
|
||||||
import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_VERSION;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* reads plaintext segments files
|
|
||||||
* <p>
|
|
||||||
* <b><font color="red">FOR RECREATIONAL USE ONLY</font></B>
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
public class SimpleTextSegmentInfoReader extends SegmentInfoReader {
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public SegmentInfo read(Directory directory, String segmentName, IOContext context) throws IOException {
|
|
||||||
BytesRefBuilder scratch = new BytesRefBuilder();
|
|
||||||
String segFileName = IndexFileNames.segmentFileName(segmentName, "", SimpleTextSegmentInfoFormat.SI_EXTENSION);
|
|
||||||
ChecksumIndexInput input = directory.openChecksumInput(segFileName, context);
|
|
||||||
boolean success = false;
|
|
||||||
try {
|
|
||||||
SimpleTextUtil.readLine(input, scratch);
|
|
||||||
assert StringHelper.startsWith(scratch.get(), SI_VERSION);
|
|
||||||
final Version version;
|
|
||||||
try {
|
|
||||||
version = Version.parse(readString(SI_VERSION.length, scratch));
|
|
||||||
} catch (ParseException pe) {
|
|
||||||
throw new CorruptIndexException("unable to parse version string: " + pe.getMessage(), input, pe);
|
|
||||||
}
|
|
||||||
|
|
||||||
SimpleTextUtil.readLine(input, scratch);
|
|
||||||
assert StringHelper.startsWith(scratch.get(), SI_DOCCOUNT);
|
|
||||||
final int docCount = Integer.parseInt(readString(SI_DOCCOUNT.length, scratch));
|
|
||||||
|
|
||||||
SimpleTextUtil.readLine(input, scratch);
|
|
||||||
assert StringHelper.startsWith(scratch.get(), SI_USECOMPOUND);
|
|
||||||
final boolean isCompoundFile = Boolean.parseBoolean(readString(SI_USECOMPOUND.length, scratch));
|
|
||||||
|
|
||||||
SimpleTextUtil.readLine(input, scratch);
|
|
||||||
assert StringHelper.startsWith(scratch.get(), SI_NUM_DIAG);
|
|
||||||
int numDiag = Integer.parseInt(readString(SI_NUM_DIAG.length, scratch));
|
|
||||||
Map<String,String> diagnostics = new HashMap<>();
|
|
||||||
|
|
||||||
for (int i = 0; i < numDiag; i++) {
|
|
||||||
SimpleTextUtil.readLine(input, scratch);
|
|
||||||
assert StringHelper.startsWith(scratch.get(), SI_DIAG_KEY);
|
|
||||||
String key = readString(SI_DIAG_KEY.length, scratch);
|
|
||||||
|
|
||||||
SimpleTextUtil.readLine(input, scratch);
|
|
||||||
assert StringHelper.startsWith(scratch.get(), SI_DIAG_VALUE);
|
|
||||||
String value = readString(SI_DIAG_VALUE.length, scratch);
|
|
||||||
diagnostics.put(key, value);
|
|
||||||
}
|
|
||||||
|
|
||||||
SimpleTextUtil.readLine(input, scratch);
|
|
||||||
assert StringHelper.startsWith(scratch.get(), SI_NUM_FILES);
|
|
||||||
int numFiles = Integer.parseInt(readString(SI_NUM_FILES.length, scratch));
|
|
||||||
Set<String> files = new HashSet<>();
|
|
||||||
|
|
||||||
for (int i = 0; i < numFiles; i++) {
|
|
||||||
SimpleTextUtil.readLine(input, scratch);
|
|
||||||
assert StringHelper.startsWith(scratch.get(), SI_FILE);
|
|
||||||
String fileName = readString(SI_FILE.length, scratch);
|
|
||||||
files.add(fileName);
|
|
||||||
}
|
|
||||||
|
|
||||||
SimpleTextUtil.readLine(input, scratch);
|
|
||||||
assert StringHelper.startsWith(scratch.get(), SI_ID);
|
|
||||||
final byte[] id = Arrays.copyOfRange(scratch.bytes(), SI_ID.length, scratch.length());
|
|
||||||
|
|
||||||
SimpleTextUtil.checkFooter(input);
|
|
||||||
|
|
||||||
SegmentInfo info = new SegmentInfo(directory, version, segmentName, docCount,
|
|
||||||
isCompoundFile, null, diagnostics, id);
|
|
||||||
info.setFiles(files);
|
|
||||||
success = true;
|
|
||||||
return info;
|
|
||||||
} finally {
|
|
||||||
if (!success) {
|
|
||||||
IOUtils.closeWhileHandlingException(input);
|
|
||||||
} else {
|
|
||||||
input.close();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private String readString(int offset, BytesRefBuilder scratch) {
|
|
||||||
return new String(scratch.bytes(), offset, scratch.length()-offset, StandardCharsets.UTF_8);
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,124 +0,0 @@
|
||||||
package org.apache.lucene.codecs.simpletext;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.Set;
|
|
||||||
|
|
||||||
import org.apache.lucene.codecs.SegmentInfoWriter;
|
|
||||||
import org.apache.lucene.index.FieldInfos;
|
|
||||||
import org.apache.lucene.index.IndexFileNames;
|
|
||||||
import org.apache.lucene.index.SegmentInfo;
|
|
||||||
import org.apache.lucene.store.Directory;
|
|
||||||
import org.apache.lucene.store.IOContext;
|
|
||||||
import org.apache.lucene.store.IndexOutput;
|
|
||||||
import org.apache.lucene.util.BytesRef;
|
|
||||||
import org.apache.lucene.util.BytesRefBuilder;
|
|
||||||
import org.apache.lucene.util.IOUtils;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* writes plaintext segments files
|
|
||||||
* <p>
|
|
||||||
* <b><font color="red">FOR RECREATIONAL USE ONLY</font></B>
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
public class SimpleTextSegmentInfoWriter extends SegmentInfoWriter {
|
|
||||||
|
|
||||||
final static BytesRef SI_VERSION = new BytesRef(" version ");
|
|
||||||
final static BytesRef SI_DOCCOUNT = new BytesRef(" number of documents ");
|
|
||||||
final static BytesRef SI_USECOMPOUND = new BytesRef(" uses compound file ");
|
|
||||||
final static BytesRef SI_NUM_DIAG = new BytesRef(" diagnostics ");
|
|
||||||
final static BytesRef SI_DIAG_KEY = new BytesRef(" key ");
|
|
||||||
final static BytesRef SI_DIAG_VALUE = new BytesRef(" value ");
|
|
||||||
final static BytesRef SI_NUM_FILES = new BytesRef(" files ");
|
|
||||||
final static BytesRef SI_FILE = new BytesRef(" file ");
|
|
||||||
final static BytesRef SI_ID = new BytesRef(" id ");
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void write(Directory dir, SegmentInfo si, FieldInfos fis, IOContext ioContext) throws IOException {
|
|
||||||
|
|
||||||
String segFileName = IndexFileNames.segmentFileName(si.name, "", SimpleTextSegmentInfoFormat.SI_EXTENSION);
|
|
||||||
si.addFile(segFileName);
|
|
||||||
|
|
||||||
boolean success = false;
|
|
||||||
IndexOutput output = dir.createOutput(segFileName, ioContext);
|
|
||||||
|
|
||||||
try {
|
|
||||||
BytesRefBuilder scratch = new BytesRefBuilder();
|
|
||||||
|
|
||||||
SimpleTextUtil.write(output, SI_VERSION);
|
|
||||||
SimpleTextUtil.write(output, si.getVersion().toString(), scratch);
|
|
||||||
SimpleTextUtil.writeNewline(output);
|
|
||||||
|
|
||||||
SimpleTextUtil.write(output, SI_DOCCOUNT);
|
|
||||||
SimpleTextUtil.write(output, Integer.toString(si.getDocCount()), scratch);
|
|
||||||
SimpleTextUtil.writeNewline(output);
|
|
||||||
|
|
||||||
SimpleTextUtil.write(output, SI_USECOMPOUND);
|
|
||||||
SimpleTextUtil.write(output, Boolean.toString(si.getUseCompoundFile()), scratch);
|
|
||||||
SimpleTextUtil.writeNewline(output);
|
|
||||||
|
|
||||||
Map<String,String> diagnostics = si.getDiagnostics();
|
|
||||||
int numDiagnostics = diagnostics == null ? 0 : diagnostics.size();
|
|
||||||
SimpleTextUtil.write(output, SI_NUM_DIAG);
|
|
||||||
SimpleTextUtil.write(output, Integer.toString(numDiagnostics), scratch);
|
|
||||||
SimpleTextUtil.writeNewline(output);
|
|
||||||
|
|
||||||
if (numDiagnostics > 0) {
|
|
||||||
for (Map.Entry<String,String> diagEntry : diagnostics.entrySet()) {
|
|
||||||
SimpleTextUtil.write(output, SI_DIAG_KEY);
|
|
||||||
SimpleTextUtil.write(output, diagEntry.getKey(), scratch);
|
|
||||||
SimpleTextUtil.writeNewline(output);
|
|
||||||
|
|
||||||
SimpleTextUtil.write(output, SI_DIAG_VALUE);
|
|
||||||
SimpleTextUtil.write(output, diagEntry.getValue(), scratch);
|
|
||||||
SimpleTextUtil.writeNewline(output);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Set<String> files = si.files();
|
|
||||||
int numFiles = files == null ? 0 : files.size();
|
|
||||||
SimpleTextUtil.write(output, SI_NUM_FILES);
|
|
||||||
SimpleTextUtil.write(output, Integer.toString(numFiles), scratch);
|
|
||||||
SimpleTextUtil.writeNewline(output);
|
|
||||||
|
|
||||||
if (numFiles > 0) {
|
|
||||||
for(String fileName : files) {
|
|
||||||
SimpleTextUtil.write(output, SI_FILE);
|
|
||||||
SimpleTextUtil.write(output, fileName, scratch);
|
|
||||||
SimpleTextUtil.writeNewline(output);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
SimpleTextUtil.write(output, SI_ID);
|
|
||||||
SimpleTextUtil.write(output, new BytesRef(si.getId()));
|
|
||||||
SimpleTextUtil.writeNewline(output);
|
|
||||||
|
|
||||||
SimpleTextUtil.writeChecksum(output, scratch);
|
|
||||||
success = true;
|
|
||||||
} finally {
|
|
||||||
if (!success) {
|
|
||||||
IOUtils.closeWhileHandlingException(output);
|
|
||||||
IOUtils.deleteFilesIgnoringExceptions(dir, segFileName);
|
|
||||||
} else {
|
|
||||||
output.close();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,4 +1,4 @@
|
||||||
package org.apache.lucene.codecs;
|
package org.apache.lucene.codecs.simpletext;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
@ -17,24 +17,17 @@ package org.apache.lucene.codecs;
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import java.io.IOException;
|
import org.apache.lucene.codecs.Codec;
|
||||||
|
import org.apache.lucene.index.BaseFieldInfoFormatTestCase;
|
||||||
import org.apache.lucene.index.FieldInfos;
|
|
||||||
import org.apache.lucene.index.SegmentInfo;
|
|
||||||
import org.apache.lucene.store.Directory;
|
|
||||||
import org.apache.lucene.store.IOContext;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Codec API for reading {@link FieldInfos}.
|
* Tests SimpleTextFieldInfoFormat
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
*/
|
||||||
public abstract class FieldInfosReader {
|
public class TestSimpleTextFieldInfoFormat extends BaseFieldInfoFormatTestCase {
|
||||||
/** Sole constructor. (For invocation by subclass
|
private final Codec codec = new SimpleTextCodec();
|
||||||
* constructors, typically implicit.) */
|
|
||||||
protected FieldInfosReader() {
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Read the {@link FieldInfos} previously written with {@link
|
@Override
|
||||||
* FieldInfosWriter}. */
|
protected Codec getCodec() {
|
||||||
public abstract FieldInfos read(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, IOContext iocontext) throws IOException;
|
return codec;
|
||||||
|
}
|
||||||
}
|
}
|
|
@ -0,0 +1,39 @@
|
||||||
|
package org.apache.lucene.codecs.simpletext;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.codecs.Codec;
|
||||||
|
import org.apache.lucene.index.BaseSegmentInfoFormatTestCase;
|
||||||
|
import org.apache.lucene.util.Version;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tests SimpleTextSegmentInfoFormat
|
||||||
|
*/
|
||||||
|
public class TestSimpleTextSegmentInfoFormat extends BaseSegmentInfoFormatTestCase {
|
||||||
|
private final Codec codec = new SimpleTextCodec();
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Version[] getVersions() {
|
||||||
|
return new Version[] { Version.LATEST };
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Codec getCodec() {
|
||||||
|
return codec;
|
||||||
|
}
|
||||||
|
}
|
|
@ -20,6 +20,9 @@ package org.apache.lucene.codecs;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
import org.apache.lucene.index.FieldInfos; // javadocs
|
import org.apache.lucene.index.FieldInfos; // javadocs
|
||||||
|
import org.apache.lucene.index.SegmentInfo;
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.store.IOContext;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Encodes/decodes {@link FieldInfos}
|
* Encodes/decodes {@link FieldInfos}
|
||||||
|
@ -30,12 +33,11 @@ public abstract class FieldInfosFormat {
|
||||||
* constructors, typically implicit.) */
|
* constructors, typically implicit.) */
|
||||||
protected FieldInfosFormat() {
|
protected FieldInfosFormat() {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Read the {@link FieldInfos} previously written with {@link #write}. */
|
||||||
|
public abstract FieldInfos read(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, IOContext iocontext) throws IOException;
|
||||||
|
|
||||||
/** Returns a {@link FieldInfosReader} to read field infos
|
/** Writes the provided {@link FieldInfos} to the
|
||||||
* from the index */
|
* directory. */
|
||||||
public abstract FieldInfosReader getFieldInfosReader() throws IOException;
|
public abstract void write(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, FieldInfos infos, IOContext context) throws IOException;
|
||||||
|
|
||||||
/** Returns a {@link FieldInfosWriter} to write field infos
|
|
||||||
* to the index */
|
|
||||||
public abstract FieldInfosWriter getFieldInfosWriter() throws IOException;
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -17,7 +17,11 @@ package org.apache.lucene.codecs;
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
import org.apache.lucene.index.SegmentInfo;
|
import org.apache.lucene.index.SegmentInfo;
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.store.IOContext;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Expert: Controls the format of the
|
* Expert: Controls the format of the
|
||||||
|
@ -33,11 +37,18 @@ public abstract class SegmentInfoFormat {
|
||||||
protected SegmentInfoFormat() {
|
protected SegmentInfoFormat() {
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Returns the {@link SegmentInfoReader} for reading
|
/**
|
||||||
* {@link SegmentInfo} instances. */
|
* Read {@link SegmentInfo} data from a directory.
|
||||||
public abstract SegmentInfoReader getSegmentInfoReader();
|
* @param directory directory to read from
|
||||||
|
* @param segmentName name of the segment to read
|
||||||
|
* @return infos instance to be populated with data
|
||||||
|
* @throws IOException If an I/O error occurs
|
||||||
|
*/
|
||||||
|
public abstract SegmentInfo read(Directory directory, String segmentName, IOContext context) throws IOException;
|
||||||
|
|
||||||
/** Returns the {@link SegmentInfoWriter} for writing
|
/**
|
||||||
* {@link SegmentInfo} instances. */
|
* Write {@link SegmentInfo} data.
|
||||||
public abstract SegmentInfoWriter getSegmentInfoWriter();
|
* @throws IOException If an I/O error occurs
|
||||||
|
*/
|
||||||
|
public abstract void write(Directory dir, SegmentInfo info, IOContext ioContext) throws IOException;
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,46 +0,0 @@
|
||||||
package org.apache.lucene.codecs;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
import org.apache.lucene.index.SegmentInfo;
|
|
||||||
import org.apache.lucene.store.Directory;
|
|
||||||
import org.apache.lucene.store.IOContext;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Specifies an API for classes that can read {@link SegmentInfo} information.
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
|
|
||||||
public abstract class SegmentInfoReader {
|
|
||||||
|
|
||||||
/** Sole constructor. (For invocation by subclass
|
|
||||||
* constructors, typically implicit.) */
|
|
||||||
protected SegmentInfoReader() {
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Read {@link SegmentInfo} data from a directory.
|
|
||||||
* @param directory directory to read from
|
|
||||||
* @param segmentName name of the segment to read
|
|
||||||
* @return infos instance to be populated with data
|
|
||||||
* @throws IOException If an I/O error occurs
|
|
||||||
*/
|
|
||||||
public abstract SegmentInfo read(Directory directory, String segmentName, IOContext context) throws IOException;
|
|
||||||
}
|
|
|
@ -18,14 +18,25 @@ package org.apache.lucene.codecs.lucene50;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
import org.apache.lucene.codecs.CodecUtil;
|
import org.apache.lucene.codecs.CodecUtil;
|
||||||
import org.apache.lucene.codecs.DocValuesFormat;
|
import org.apache.lucene.codecs.DocValuesFormat;
|
||||||
import org.apache.lucene.codecs.FieldInfosFormat;
|
import org.apache.lucene.codecs.FieldInfosFormat;
|
||||||
import org.apache.lucene.codecs.FieldInfosReader;
|
import org.apache.lucene.index.CorruptIndexException;
|
||||||
import org.apache.lucene.codecs.FieldInfosWriter;
|
import org.apache.lucene.index.FieldInfo;
|
||||||
|
import org.apache.lucene.index.FieldInfos;
|
||||||
|
import org.apache.lucene.index.IndexFileNames;
|
||||||
|
import org.apache.lucene.index.SegmentInfo;
|
||||||
import org.apache.lucene.index.FieldInfo.DocValuesType;
|
import org.apache.lucene.index.FieldInfo.DocValuesType;
|
||||||
|
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||||
|
import org.apache.lucene.store.ChecksumIndexInput;
|
||||||
import org.apache.lucene.store.DataOutput;
|
import org.apache.lucene.store.DataOutput;
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.store.IOContext;
|
||||||
|
import org.apache.lucene.store.IndexInput;
|
||||||
|
import org.apache.lucene.store.IndexOutput;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Lucene 5.0 Field Infos format.
|
* Lucene 5.0 Field Infos format.
|
||||||
|
@ -91,21 +102,148 @@ import org.apache.lucene.store.DataOutput;
|
||||||
* @lucene.experimental
|
* @lucene.experimental
|
||||||
*/
|
*/
|
||||||
public final class Lucene50FieldInfosFormat extends FieldInfosFormat {
|
public final class Lucene50FieldInfosFormat extends FieldInfosFormat {
|
||||||
private final FieldInfosReader reader = new Lucene50FieldInfosReader();
|
|
||||||
private final FieldInfosWriter writer = new Lucene50FieldInfosWriter();
|
|
||||||
|
|
||||||
/** Sole constructor. */
|
/** Sole constructor. */
|
||||||
public Lucene50FieldInfosFormat() {
|
public Lucene50FieldInfosFormat() {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public FieldInfosReader getFieldInfosReader() throws IOException {
|
public FieldInfos read(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, IOContext context) throws IOException {
|
||||||
return reader;
|
final String fileName = IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, Lucene50FieldInfosFormat.EXTENSION);
|
||||||
|
try (ChecksumIndexInput input = directory.openChecksumInput(fileName, context)) {
|
||||||
|
Throwable priorE = null;
|
||||||
|
FieldInfo infos[] = null;
|
||||||
|
try {
|
||||||
|
CodecUtil.checkSegmentHeader(input, Lucene50FieldInfosFormat.CODEC_NAME,
|
||||||
|
Lucene50FieldInfosFormat.FORMAT_START,
|
||||||
|
Lucene50FieldInfosFormat.FORMAT_CURRENT,
|
||||||
|
segmentInfo.getId(), segmentSuffix);
|
||||||
|
|
||||||
|
final int size = input.readVInt(); //read in the size
|
||||||
|
infos = new FieldInfo[size];
|
||||||
|
|
||||||
|
for (int i = 0; i < size; i++) {
|
||||||
|
String name = input.readString();
|
||||||
|
final int fieldNumber = input.readVInt();
|
||||||
|
if (fieldNumber < 0) {
|
||||||
|
throw new CorruptIndexException("invalid field number for field: " + name + ", fieldNumber=" + fieldNumber, input);
|
||||||
|
}
|
||||||
|
byte bits = input.readByte();
|
||||||
|
boolean isIndexed = (bits & Lucene50FieldInfosFormat.IS_INDEXED) != 0;
|
||||||
|
boolean storeTermVector = (bits & Lucene50FieldInfosFormat.STORE_TERMVECTOR) != 0;
|
||||||
|
boolean omitNorms = (bits & Lucene50FieldInfosFormat.OMIT_NORMS) != 0;
|
||||||
|
boolean storePayloads = (bits & Lucene50FieldInfosFormat.STORE_PAYLOADS) != 0;
|
||||||
|
final IndexOptions indexOptions;
|
||||||
|
if (!isIndexed) {
|
||||||
|
indexOptions = null;
|
||||||
|
} else if ((bits & Lucene50FieldInfosFormat.OMIT_TERM_FREQ_AND_POSITIONS) != 0) {
|
||||||
|
indexOptions = IndexOptions.DOCS_ONLY;
|
||||||
|
} else if ((bits & Lucene50FieldInfosFormat.OMIT_POSITIONS) != 0) {
|
||||||
|
indexOptions = IndexOptions.DOCS_AND_FREQS;
|
||||||
|
} else if ((bits & Lucene50FieldInfosFormat.STORE_OFFSETS_IN_POSTINGS) != 0) {
|
||||||
|
indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
|
||||||
|
} else {
|
||||||
|
indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
|
||||||
|
}
|
||||||
|
|
||||||
|
// DV Types are packed in one byte
|
||||||
|
byte val = input.readByte();
|
||||||
|
final DocValuesType docValuesType = getDocValuesType(input, (byte) (val & 0x0F));
|
||||||
|
final DocValuesType normsType = getDocValuesType(input, (byte) ((val >>> 4) & 0x0F));
|
||||||
|
final long dvGen = input.readLong();
|
||||||
|
final Map<String,String> attributes = input.readStringStringMap();
|
||||||
|
try {
|
||||||
|
infos[i] = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector, omitNorms, storePayloads,
|
||||||
|
indexOptions, docValuesType, normsType, dvGen, Collections.unmodifiableMap(attributes));
|
||||||
|
infos[i].checkConsistency();
|
||||||
|
} catch (IllegalStateException e) {
|
||||||
|
throw new CorruptIndexException("invalid fieldinfo for field: " + name + ", fieldNumber=" + fieldNumber, input, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (Throwable exception) {
|
||||||
|
priorE = exception;
|
||||||
|
} finally {
|
||||||
|
CodecUtil.checkFooter(input, priorE);
|
||||||
|
}
|
||||||
|
return new FieldInfos(infos);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static DocValuesType getDocValuesType(IndexInput input, byte b) throws IOException {
|
||||||
|
if (b == 0) {
|
||||||
|
return null;
|
||||||
|
} else if (b == 1) {
|
||||||
|
return DocValuesType.NUMERIC;
|
||||||
|
} else if (b == 2) {
|
||||||
|
return DocValuesType.BINARY;
|
||||||
|
} else if (b == 3) {
|
||||||
|
return DocValuesType.SORTED;
|
||||||
|
} else if (b == 4) {
|
||||||
|
return DocValuesType.SORTED_SET;
|
||||||
|
} else if (b == 5) {
|
||||||
|
return DocValuesType.SORTED_NUMERIC;
|
||||||
|
} else {
|
||||||
|
throw new CorruptIndexException("invalid docvalues byte: " + b, input);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public FieldInfosWriter getFieldInfosWriter() throws IOException {
|
public void write(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, FieldInfos infos, IOContext context) throws IOException {
|
||||||
return writer;
|
final String fileName = IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, Lucene50FieldInfosFormat.EXTENSION);
|
||||||
|
try (IndexOutput output = directory.createOutput(fileName, context)) {
|
||||||
|
CodecUtil.writeSegmentHeader(output, Lucene50FieldInfosFormat.CODEC_NAME, Lucene50FieldInfosFormat.FORMAT_CURRENT, segmentInfo.getId(), segmentSuffix);
|
||||||
|
output.writeVInt(infos.size());
|
||||||
|
for (FieldInfo fi : infos) {
|
||||||
|
fi.checkConsistency();
|
||||||
|
IndexOptions indexOptions = fi.getIndexOptions();
|
||||||
|
byte bits = 0x0;
|
||||||
|
if (fi.hasVectors()) bits |= Lucene50FieldInfosFormat.STORE_TERMVECTOR;
|
||||||
|
if (fi.omitsNorms()) bits |= Lucene50FieldInfosFormat.OMIT_NORMS;
|
||||||
|
if (fi.hasPayloads()) bits |= Lucene50FieldInfosFormat.STORE_PAYLOADS;
|
||||||
|
if (fi.isIndexed()) {
|
||||||
|
bits |= Lucene50FieldInfosFormat.IS_INDEXED;
|
||||||
|
assert indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0 || !fi.hasPayloads();
|
||||||
|
if (indexOptions == IndexOptions.DOCS_ONLY) {
|
||||||
|
bits |= Lucene50FieldInfosFormat.OMIT_TERM_FREQ_AND_POSITIONS;
|
||||||
|
} else if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) {
|
||||||
|
bits |= Lucene50FieldInfosFormat.STORE_OFFSETS_IN_POSTINGS;
|
||||||
|
} else if (indexOptions == IndexOptions.DOCS_AND_FREQS) {
|
||||||
|
bits |= Lucene50FieldInfosFormat.OMIT_POSITIONS;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
output.writeString(fi.name);
|
||||||
|
output.writeVInt(fi.number);
|
||||||
|
output.writeByte(bits);
|
||||||
|
|
||||||
|
// pack the DV types in one byte
|
||||||
|
final byte dv = docValuesByte(fi.getDocValuesType());
|
||||||
|
final byte nrm = docValuesByte(fi.getNormType());
|
||||||
|
assert (dv & (~0xF)) == 0 && (nrm & (~0x0F)) == 0;
|
||||||
|
byte val = (byte) (0xff & ((nrm << 4) | dv));
|
||||||
|
output.writeByte(val);
|
||||||
|
output.writeLong(fi.getDocValuesGen());
|
||||||
|
output.writeStringStringMap(fi.attributes());
|
||||||
|
}
|
||||||
|
CodecUtil.writeFooter(output);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static byte docValuesByte(DocValuesType type) {
|
||||||
|
if (type == null) {
|
||||||
|
return 0;
|
||||||
|
} else if (type == DocValuesType.NUMERIC) {
|
||||||
|
return 1;
|
||||||
|
} else if (type == DocValuesType.BINARY) {
|
||||||
|
return 2;
|
||||||
|
} else if (type == DocValuesType.SORTED) {
|
||||||
|
return 3;
|
||||||
|
} else if (type == DocValuesType.SORTED_SET) {
|
||||||
|
return 4;
|
||||||
|
} else if (type == DocValuesType.SORTED_NUMERIC) {
|
||||||
|
return 5;
|
||||||
|
} else {
|
||||||
|
throw new AssertionError();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Extension of field infos */
|
/** Extension of field infos */
|
||||||
|
|
|
@ -1,129 +0,0 @@
|
||||||
package org.apache.lucene.codecs.lucene50;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
import org.apache.lucene.codecs.CodecUtil;
|
|
||||||
import org.apache.lucene.codecs.FieldInfosReader;
|
|
||||||
import org.apache.lucene.index.CorruptIndexException;
|
|
||||||
import org.apache.lucene.index.FieldInfo;
|
|
||||||
import org.apache.lucene.index.FieldInfos;
|
|
||||||
import org.apache.lucene.index.IndexFileNames;
|
|
||||||
import org.apache.lucene.index.FieldInfo.DocValuesType;
|
|
||||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
|
||||||
import org.apache.lucene.index.SegmentInfo;
|
|
||||||
import org.apache.lucene.store.ChecksumIndexInput;
|
|
||||||
import org.apache.lucene.store.Directory;
|
|
||||||
import org.apache.lucene.store.IOContext;
|
|
||||||
import org.apache.lucene.store.IndexInput;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Lucene 5.0 FieldInfos reader.
|
|
||||||
*
|
|
||||||
* @lucene.experimental
|
|
||||||
* @see Lucene50FieldInfosFormat
|
|
||||||
*/
|
|
||||||
final class Lucene50FieldInfosReader extends FieldInfosReader {
|
|
||||||
|
|
||||||
/** Sole constructor. */
|
|
||||||
public Lucene50FieldInfosReader() {
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public FieldInfos read(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, IOContext context) throws IOException {
|
|
||||||
final String fileName = IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, Lucene50FieldInfosFormat.EXTENSION);
|
|
||||||
try (ChecksumIndexInput input = directory.openChecksumInput(fileName, context)) {
|
|
||||||
Throwable priorE = null;
|
|
||||||
FieldInfo infos[] = null;
|
|
||||||
try {
|
|
||||||
CodecUtil.checkSegmentHeader(input, Lucene50FieldInfosFormat.CODEC_NAME,
|
|
||||||
Lucene50FieldInfosFormat.FORMAT_START,
|
|
||||||
Lucene50FieldInfosFormat.FORMAT_CURRENT,
|
|
||||||
segmentInfo.getId(), segmentSuffix);
|
|
||||||
|
|
||||||
final int size = input.readVInt(); //read in the size
|
|
||||||
infos = new FieldInfo[size];
|
|
||||||
|
|
||||||
for (int i = 0; i < size; i++) {
|
|
||||||
String name = input.readString();
|
|
||||||
final int fieldNumber = input.readVInt();
|
|
||||||
if (fieldNumber < 0) {
|
|
||||||
throw new CorruptIndexException("invalid field number for field: " + name + ", fieldNumber=" + fieldNumber, input);
|
|
||||||
}
|
|
||||||
byte bits = input.readByte();
|
|
||||||
boolean isIndexed = (bits & Lucene50FieldInfosFormat.IS_INDEXED) != 0;
|
|
||||||
boolean storeTermVector = (bits & Lucene50FieldInfosFormat.STORE_TERMVECTOR) != 0;
|
|
||||||
boolean omitNorms = (bits & Lucene50FieldInfosFormat.OMIT_NORMS) != 0;
|
|
||||||
boolean storePayloads = (bits & Lucene50FieldInfosFormat.STORE_PAYLOADS) != 0;
|
|
||||||
final IndexOptions indexOptions;
|
|
||||||
if (!isIndexed) {
|
|
||||||
indexOptions = null;
|
|
||||||
} else if ((bits & Lucene50FieldInfosFormat.OMIT_TERM_FREQ_AND_POSITIONS) != 0) {
|
|
||||||
indexOptions = IndexOptions.DOCS_ONLY;
|
|
||||||
} else if ((bits & Lucene50FieldInfosFormat.OMIT_POSITIONS) != 0) {
|
|
||||||
indexOptions = IndexOptions.DOCS_AND_FREQS;
|
|
||||||
} else if ((bits & Lucene50FieldInfosFormat.STORE_OFFSETS_IN_POSTINGS) != 0) {
|
|
||||||
indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
|
|
||||||
} else {
|
|
||||||
indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
|
|
||||||
}
|
|
||||||
|
|
||||||
// DV Types are packed in one byte
|
|
||||||
byte val = input.readByte();
|
|
||||||
final DocValuesType docValuesType = getDocValuesType(input, (byte) (val & 0x0F));
|
|
||||||
final DocValuesType normsType = getDocValuesType(input, (byte) ((val >>> 4) & 0x0F));
|
|
||||||
final long dvGen = input.readLong();
|
|
||||||
final Map<String,String> attributes = input.readStringStringMap();
|
|
||||||
try {
|
|
||||||
infos[i] = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector, omitNorms, storePayloads,
|
|
||||||
indexOptions, docValuesType, normsType, dvGen, Collections.unmodifiableMap(attributes));
|
|
||||||
infos[i].checkConsistency();
|
|
||||||
} catch (IllegalStateException e) {
|
|
||||||
throw new CorruptIndexException("invalid fieldinfo for field: " + name + ", fieldNumber=" + fieldNumber, input, e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} catch (Throwable exception) {
|
|
||||||
priorE = exception;
|
|
||||||
} finally {
|
|
||||||
CodecUtil.checkFooter(input, priorE);
|
|
||||||
}
|
|
||||||
return new FieldInfos(infos);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private static DocValuesType getDocValuesType(IndexInput input, byte b) throws IOException {
|
|
||||||
if (b == 0) {
|
|
||||||
return null;
|
|
||||||
} else if (b == 1) {
|
|
||||||
return DocValuesType.NUMERIC;
|
|
||||||
} else if (b == 2) {
|
|
||||||
return DocValuesType.BINARY;
|
|
||||||
} else if (b == 3) {
|
|
||||||
return DocValuesType.SORTED;
|
|
||||||
} else if (b == 4) {
|
|
||||||
return DocValuesType.SORTED_SET;
|
|
||||||
} else if (b == 5) {
|
|
||||||
return DocValuesType.SORTED_NUMERIC;
|
|
||||||
} else {
|
|
||||||
throw new CorruptIndexException("invalid docvalues byte: " + b, input);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,104 +0,0 @@
|
||||||
package org.apache.lucene.codecs.lucene50;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
import org.apache.lucene.codecs.CodecUtil;
|
|
||||||
import org.apache.lucene.codecs.FieldInfosWriter;
|
|
||||||
import org.apache.lucene.index.FieldInfo.DocValuesType;
|
|
||||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
|
||||||
import org.apache.lucene.index.FieldInfo;
|
|
||||||
import org.apache.lucene.index.FieldInfos;
|
|
||||||
import org.apache.lucene.index.IndexFileNames;
|
|
||||||
import org.apache.lucene.index.SegmentInfo;
|
|
||||||
import org.apache.lucene.store.IndexOutput;
|
|
||||||
import org.apache.lucene.store.Directory;
|
|
||||||
import org.apache.lucene.store.IOContext;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Lucene 5.0 FieldInfos writer.
|
|
||||||
*
|
|
||||||
* @see Lucene50FieldInfosFormat
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
final class Lucene50FieldInfosWriter extends FieldInfosWriter {
|
|
||||||
|
|
||||||
/** Sole constructor. */
|
|
||||||
public Lucene50FieldInfosWriter() {
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void write(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, FieldInfos infos, IOContext context) throws IOException {
|
|
||||||
final String fileName = IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, Lucene50FieldInfosFormat.EXTENSION);
|
|
||||||
try (IndexOutput output = directory.createOutput(fileName, context)) {
|
|
||||||
CodecUtil.writeSegmentHeader(output, Lucene50FieldInfosFormat.CODEC_NAME, Lucene50FieldInfosFormat.FORMAT_CURRENT, segmentInfo.getId(), segmentSuffix);
|
|
||||||
output.writeVInt(infos.size());
|
|
||||||
for (FieldInfo fi : infos) {
|
|
||||||
fi.checkConsistency();
|
|
||||||
IndexOptions indexOptions = fi.getIndexOptions();
|
|
||||||
byte bits = 0x0;
|
|
||||||
if (fi.hasVectors()) bits |= Lucene50FieldInfosFormat.STORE_TERMVECTOR;
|
|
||||||
if (fi.omitsNorms()) bits |= Lucene50FieldInfosFormat.OMIT_NORMS;
|
|
||||||
if (fi.hasPayloads()) bits |= Lucene50FieldInfosFormat.STORE_PAYLOADS;
|
|
||||||
if (fi.isIndexed()) {
|
|
||||||
bits |= Lucene50FieldInfosFormat.IS_INDEXED;
|
|
||||||
assert indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0 || !fi.hasPayloads();
|
|
||||||
if (indexOptions == IndexOptions.DOCS_ONLY) {
|
|
||||||
bits |= Lucene50FieldInfosFormat.OMIT_TERM_FREQ_AND_POSITIONS;
|
|
||||||
} else if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) {
|
|
||||||
bits |= Lucene50FieldInfosFormat.STORE_OFFSETS_IN_POSTINGS;
|
|
||||||
} else if (indexOptions == IndexOptions.DOCS_AND_FREQS) {
|
|
||||||
bits |= Lucene50FieldInfosFormat.OMIT_POSITIONS;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
output.writeString(fi.name);
|
|
||||||
output.writeVInt(fi.number);
|
|
||||||
output.writeByte(bits);
|
|
||||||
|
|
||||||
// pack the DV types in one byte
|
|
||||||
final byte dv = docValuesByte(fi.getDocValuesType());
|
|
||||||
final byte nrm = docValuesByte(fi.getNormType());
|
|
||||||
assert (dv & (~0xF)) == 0 && (nrm & (~0x0F)) == 0;
|
|
||||||
byte val = (byte) (0xff & ((nrm << 4) | dv));
|
|
||||||
output.writeByte(val);
|
|
||||||
output.writeLong(fi.getDocValuesGen());
|
|
||||||
output.writeStringStringMap(fi.attributes());
|
|
||||||
}
|
|
||||||
CodecUtil.writeFooter(output);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private static byte docValuesByte(DocValuesType type) {
|
|
||||||
if (type == null) {
|
|
||||||
return 0;
|
|
||||||
} else if (type == DocValuesType.NUMERIC) {
|
|
||||||
return 1;
|
|
||||||
} else if (type == DocValuesType.BINARY) {
|
|
||||||
return 2;
|
|
||||||
} else if (type == DocValuesType.SORTED) {
|
|
||||||
return 3;
|
|
||||||
} else if (type == DocValuesType.SORTED_SET) {
|
|
||||||
return 4;
|
|
||||||
} else if (type == DocValuesType.SORTED_NUMERIC) {
|
|
||||||
return 5;
|
|
||||||
} else {
|
|
||||||
throw new AssertionError();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -17,14 +17,25 @@ package org.apache.lucene.codecs.lucene50;
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
import org.apache.lucene.codecs.CodecUtil;
|
import org.apache.lucene.codecs.CodecUtil;
|
||||||
import org.apache.lucene.codecs.SegmentInfoFormat;
|
import org.apache.lucene.codecs.SegmentInfoFormat;
|
||||||
import org.apache.lucene.codecs.SegmentInfoReader;
|
import org.apache.lucene.index.CorruptIndexException;
|
||||||
import org.apache.lucene.codecs.SegmentInfoWriter;
|
import org.apache.lucene.index.IndexFileNames;
|
||||||
import org.apache.lucene.index.IndexWriter; // javadocs
|
import org.apache.lucene.index.IndexWriter; // javadocs
|
||||||
import org.apache.lucene.index.SegmentInfo; // javadocs
|
import org.apache.lucene.index.SegmentInfo; // javadocs
|
||||||
import org.apache.lucene.index.SegmentInfos; // javadocs
|
import org.apache.lucene.index.SegmentInfos; // javadocs
|
||||||
|
import org.apache.lucene.store.ChecksumIndexInput;
|
||||||
import org.apache.lucene.store.DataOutput; // javadocs
|
import org.apache.lucene.store.DataOutput; // javadocs
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.store.IOContext;
|
||||||
|
import org.apache.lucene.store.IndexOutput;
|
||||||
|
import org.apache.lucene.util.IOUtils;
|
||||||
|
import org.apache.lucene.util.StringHelper;
|
||||||
|
import org.apache.lucene.util.Version;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Lucene 5.0 Segment info format.
|
* Lucene 5.0 Segment info format.
|
||||||
|
@ -67,21 +78,86 @@ import org.apache.lucene.store.DataOutput; // javadocs
|
||||||
* @lucene.experimental
|
* @lucene.experimental
|
||||||
*/
|
*/
|
||||||
public class Lucene50SegmentInfoFormat extends SegmentInfoFormat {
|
public class Lucene50SegmentInfoFormat extends SegmentInfoFormat {
|
||||||
private final SegmentInfoReader reader = new Lucene50SegmentInfoReader();
|
|
||||||
private final SegmentInfoWriter writer = new Lucene50SegmentInfoWriter();
|
|
||||||
|
|
||||||
/** Sole constructor. */
|
/** Sole constructor. */
|
||||||
public Lucene50SegmentInfoFormat() {
|
public Lucene50SegmentInfoFormat() {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public SegmentInfoReader getSegmentInfoReader() {
|
public SegmentInfo read(Directory dir, String segment, IOContext context) throws IOException {
|
||||||
return reader;
|
final String fileName = IndexFileNames.segmentFileName(segment, "", Lucene50SegmentInfoFormat.SI_EXTENSION);
|
||||||
|
try (ChecksumIndexInput input = dir.openChecksumInput(fileName, context)) {
|
||||||
|
Throwable priorE = null;
|
||||||
|
SegmentInfo si = null;
|
||||||
|
try {
|
||||||
|
CodecUtil.checkHeader(input, Lucene50SegmentInfoFormat.CODEC_NAME,
|
||||||
|
Lucene50SegmentInfoFormat.VERSION_START,
|
||||||
|
Lucene50SegmentInfoFormat.VERSION_CURRENT);
|
||||||
|
final Version version = Version.fromBits(input.readInt(), input.readInt(), input.readInt());
|
||||||
|
|
||||||
|
final int docCount = input.readInt();
|
||||||
|
if (docCount < 0) {
|
||||||
|
throw new CorruptIndexException("invalid docCount: " + docCount, input);
|
||||||
|
}
|
||||||
|
final boolean isCompoundFile = input.readByte() == SegmentInfo.YES;
|
||||||
|
final Map<String,String> diagnostics = input.readStringStringMap();
|
||||||
|
final Set<String> files = input.readStringSet();
|
||||||
|
|
||||||
|
byte[] id = new byte[StringHelper.ID_LENGTH];
|
||||||
|
input.readBytes(id, 0, id.length);
|
||||||
|
|
||||||
|
si = new SegmentInfo(dir, version, segment, docCount, isCompoundFile, null, diagnostics, id);
|
||||||
|
si.setFiles(files);
|
||||||
|
} catch (Throwable exception) {
|
||||||
|
priorE = exception;
|
||||||
|
} finally {
|
||||||
|
CodecUtil.checkFooter(input, priorE);
|
||||||
|
}
|
||||||
|
return si;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public SegmentInfoWriter getSegmentInfoWriter() {
|
public void write(Directory dir, SegmentInfo si, IOContext ioContext) throws IOException {
|
||||||
return writer;
|
final String fileName = IndexFileNames.segmentFileName(si.name, "", Lucene50SegmentInfoFormat.SI_EXTENSION);
|
||||||
|
si.addFile(fileName);
|
||||||
|
|
||||||
|
boolean success = false;
|
||||||
|
try (IndexOutput output = dir.createOutput(fileName, ioContext)) {
|
||||||
|
CodecUtil.writeHeader(output, Lucene50SegmentInfoFormat.CODEC_NAME, Lucene50SegmentInfoFormat.VERSION_CURRENT);
|
||||||
|
Version version = si.getVersion();
|
||||||
|
if (version.major < 5) {
|
||||||
|
throw new IllegalArgumentException("invalid major version: should be >= 5 but got: " + version.major + " segment=" + si);
|
||||||
|
}
|
||||||
|
// Write the Lucene version that created this segment, since 3.1
|
||||||
|
output.writeInt(version.major);
|
||||||
|
output.writeInt(version.minor);
|
||||||
|
output.writeInt(version.bugfix);
|
||||||
|
assert version.prerelease == 0;
|
||||||
|
output.writeInt(si.getDocCount());
|
||||||
|
|
||||||
|
output.writeByte((byte) (si.getUseCompoundFile() ? SegmentInfo.YES : SegmentInfo.NO));
|
||||||
|
output.writeStringStringMap(si.getDiagnostics());
|
||||||
|
Set<String> files = si.files();
|
||||||
|
for (String file : files) {
|
||||||
|
if (!IndexFileNames.parseSegmentName(file).equals(si.name)) {
|
||||||
|
throw new IllegalArgumentException("invalid files: expected segment=" + si.name + ", got=" + files);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
output.writeStringSet(files);
|
||||||
|
byte[] id = si.getId();
|
||||||
|
if (id.length != StringHelper.ID_LENGTH) {
|
||||||
|
throw new IllegalArgumentException("invalid id, got=" + StringHelper.idToString(id));
|
||||||
|
}
|
||||||
|
output.writeBytes(id, 0, id.length);
|
||||||
|
CodecUtil.writeFooter(output);
|
||||||
|
success = true;
|
||||||
|
} finally {
|
||||||
|
if (!success) {
|
||||||
|
// TODO: are we doing this outside of the tracking wrapper? why must SIWriter cleanup like this?
|
||||||
|
IOUtils.deleteFilesIgnoringExceptions(si.dir, fileName);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** File extension used to store {@link SegmentInfo}. */
|
/** File extension used to store {@link SegmentInfo}. */
|
||||||
|
|
|
@ -1,86 +0,0 @@
|
||||||
package org.apache.lucene.codecs.lucene50;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.text.ParseException;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.Set;
|
|
||||||
|
|
||||||
import org.apache.lucene.codecs.CodecUtil;
|
|
||||||
import org.apache.lucene.codecs.SegmentInfoReader;
|
|
||||||
import org.apache.lucene.index.CorruptIndexException;
|
|
||||||
import org.apache.lucene.index.IndexFileNames;
|
|
||||||
import org.apache.lucene.index.SegmentInfo;
|
|
||||||
import org.apache.lucene.store.ChecksumIndexInput;
|
|
||||||
import org.apache.lucene.store.Directory;
|
|
||||||
import org.apache.lucene.store.IOContext;
|
|
||||||
import org.apache.lucene.util.StringHelper;
|
|
||||||
import org.apache.lucene.util.Version;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Lucene 5.0 implementation of {@link SegmentInfoReader}.
|
|
||||||
*
|
|
||||||
* @see Lucene50SegmentInfoFormat
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
public class Lucene50SegmentInfoReader extends SegmentInfoReader {
|
|
||||||
|
|
||||||
/** Sole constructor. */
|
|
||||||
public Lucene50SegmentInfoReader() {
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public SegmentInfo read(Directory dir, String segment, IOContext context) throws IOException {
|
|
||||||
final String fileName = IndexFileNames.segmentFileName(segment, "", Lucene50SegmentInfoFormat.SI_EXTENSION);
|
|
||||||
try (ChecksumIndexInput input = dir.openChecksumInput(fileName, context)) {
|
|
||||||
Throwable priorE = null;
|
|
||||||
SegmentInfo si = null;
|
|
||||||
try {
|
|
||||||
CodecUtil.checkHeader(input, Lucene50SegmentInfoFormat.CODEC_NAME,
|
|
||||||
Lucene50SegmentInfoFormat.VERSION_START,
|
|
||||||
Lucene50SegmentInfoFormat.VERSION_CURRENT);
|
|
||||||
final Version version;
|
|
||||||
try {
|
|
||||||
version = Version.parse(input.readString());
|
|
||||||
} catch (ParseException pe) {
|
|
||||||
throw new CorruptIndexException("unable to parse version string: " + pe.getMessage(), input, pe);
|
|
||||||
}
|
|
||||||
|
|
||||||
final int docCount = input.readInt();
|
|
||||||
if (docCount < 0) {
|
|
||||||
throw new CorruptIndexException("invalid docCount: " + docCount, input);
|
|
||||||
}
|
|
||||||
final boolean isCompoundFile = input.readByte() == SegmentInfo.YES;
|
|
||||||
final Map<String,String> diagnostics = input.readStringStringMap();
|
|
||||||
final Set<String> files = input.readStringSet();
|
|
||||||
|
|
||||||
byte[] id = new byte[StringHelper.ID_LENGTH];
|
|
||||||
input.readBytes(id, 0, id.length);
|
|
||||||
|
|
||||||
si = new SegmentInfo(dir, version, segment, docCount, isCompoundFile, null, diagnostics, id);
|
|
||||||
si.setFiles(files);
|
|
||||||
} catch (Throwable exception) {
|
|
||||||
priorE = exception;
|
|
||||||
} finally {
|
|
||||||
CodecUtil.checkFooter(input, priorE);
|
|
||||||
}
|
|
||||||
return si;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,87 +0,0 @@
|
||||||
package org.apache.lucene.codecs.lucene50;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.Set;
|
|
||||||
|
|
||||||
import org.apache.lucene.codecs.CodecUtil;
|
|
||||||
import org.apache.lucene.codecs.SegmentInfoWriter;
|
|
||||||
import org.apache.lucene.index.FieldInfos;
|
|
||||||
import org.apache.lucene.index.IndexFileNames;
|
|
||||||
import org.apache.lucene.index.SegmentInfo;
|
|
||||||
import org.apache.lucene.store.Directory;
|
|
||||||
import org.apache.lucene.store.IOContext;
|
|
||||||
import org.apache.lucene.store.IndexOutput;
|
|
||||||
import org.apache.lucene.util.IOUtils;
|
|
||||||
import org.apache.lucene.util.StringHelper;
|
|
||||||
import org.apache.lucene.util.Version;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Lucene 5.0 implementation of {@link SegmentInfoWriter}.
|
|
||||||
*
|
|
||||||
* @see Lucene50SegmentInfoFormat
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
public class Lucene50SegmentInfoWriter extends SegmentInfoWriter {
|
|
||||||
|
|
||||||
/** Sole constructor. */
|
|
||||||
public Lucene50SegmentInfoWriter() {
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Save a single segment's info. */
|
|
||||||
@Override
|
|
||||||
public void write(Directory dir, SegmentInfo si, FieldInfos fis, IOContext ioContext) throws IOException {
|
|
||||||
final String fileName = IndexFileNames.segmentFileName(si.name, "", Lucene50SegmentInfoFormat.SI_EXTENSION);
|
|
||||||
si.addFile(fileName);
|
|
||||||
|
|
||||||
boolean success = false;
|
|
||||||
try (IndexOutput output = dir.createOutput(fileName, ioContext)) {
|
|
||||||
CodecUtil.writeHeader(output, Lucene50SegmentInfoFormat.CODEC_NAME, Lucene50SegmentInfoFormat.VERSION_CURRENT);
|
|
||||||
Version version = si.getVersion();
|
|
||||||
if (version.major < 5) {
|
|
||||||
throw new IllegalArgumentException("invalid major version: should be >= 5 but got: " + version.major + " segment=" + si);
|
|
||||||
}
|
|
||||||
// Write the Lucene version that created this segment, since 3.1
|
|
||||||
output.writeString(version.toString());
|
|
||||||
output.writeInt(si.getDocCount());
|
|
||||||
|
|
||||||
output.writeByte((byte) (si.getUseCompoundFile() ? SegmentInfo.YES : SegmentInfo.NO));
|
|
||||||
output.writeStringStringMap(si.getDiagnostics());
|
|
||||||
Set<String> files = si.files();
|
|
||||||
for (String file : files) {
|
|
||||||
if (!IndexFileNames.parseSegmentName(file).equals(si.name)) {
|
|
||||||
throw new IllegalArgumentException("invalid files: expected segment=" + si.name + ", got=" + files);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
output.writeStringSet(files);
|
|
||||||
byte[] id = si.getId();
|
|
||||||
if (id.length != StringHelper.ID_LENGTH) {
|
|
||||||
throw new IllegalArgumentException("invalid id, got=" + StringHelper.idToString(id));
|
|
||||||
}
|
|
||||||
output.writeBytes(id, 0, id.length);
|
|
||||||
CodecUtil.writeFooter(output);
|
|
||||||
success = true;
|
|
||||||
} finally {
|
|
||||||
if (!success) {
|
|
||||||
// TODO: are we doing this outside of the tracking wrapper? why must SIWriter cleanup like this?
|
|
||||||
IOUtils.deleteFilesIgnoringExceptions(si.dir, fileName);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -25,7 +25,6 @@ import java.util.Map;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.codecs.DocValuesConsumer;
|
import org.apache.lucene.codecs.DocValuesConsumer;
|
||||||
import org.apache.lucene.codecs.DocValuesFormat;
|
import org.apache.lucene.codecs.DocValuesFormat;
|
||||||
import org.apache.lucene.codecs.FieldInfosWriter;
|
|
||||||
import org.apache.lucene.codecs.NormsConsumer;
|
import org.apache.lucene.codecs.NormsConsumer;
|
||||||
import org.apache.lucene.codecs.NormsFormat;
|
import org.apache.lucene.codecs.NormsFormat;
|
||||||
import org.apache.lucene.codecs.StoredFieldsWriter;
|
import org.apache.lucene.codecs.StoredFieldsWriter;
|
||||||
|
@ -118,8 +117,7 @@ final class DefaultIndexingChain extends DocConsumer {
|
||||||
// consumer can alter the FieldInfo* if necessary. EG,
|
// consumer can alter the FieldInfo* if necessary. EG,
|
||||||
// FreqProxTermsWriter does this with
|
// FreqProxTermsWriter does this with
|
||||||
// FieldInfo.storePayload.
|
// FieldInfo.storePayload.
|
||||||
FieldInfosWriter infosWriter = docWriter.codec.fieldInfosFormat().getFieldInfosWriter();
|
docWriter.codec.fieldInfosFormat().write(state.directory, state.segmentInfo, "", state.fieldInfos, IOContext.DEFAULT);
|
||||||
infosWriter.write(state.directory, state.segmentInfo, "", state.fieldInfos, IOContext.DEFAULT);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Writes all buffered doc values (called from {@link #flush}). */
|
/** Writes all buffered doc values (called from {@link #flush}). */
|
||||||
|
@ -622,6 +620,10 @@ final class DefaultIndexingChain extends DocConsumer {
|
||||||
invertState.lastStartOffset = startOffset;
|
invertState.lastStartOffset = startOffset;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
invertState.length++;
|
||||||
|
if (invertState.length < 0) {
|
||||||
|
throw new IllegalArgumentException("too many tokens in field '" + field.name() + "'");
|
||||||
|
}
|
||||||
//System.out.println(" term=" + invertState.termAttribute);
|
//System.out.println(" term=" + invertState.termAttribute);
|
||||||
|
|
||||||
// If we hit an exception in here, we abort
|
// If we hit an exception in here, we abort
|
||||||
|
@ -633,8 +635,6 @@ final class DefaultIndexingChain extends DocConsumer {
|
||||||
aborting = true;
|
aborting = true;
|
||||||
termsHashPerField.add();
|
termsHashPerField.add();
|
||||||
aborting = false;
|
aborting = false;
|
||||||
|
|
||||||
invertState.length++;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// trigger streams to perform end-of-stream operations
|
// trigger streams to perform end-of-stream operations
|
||||||
|
|
|
@ -517,7 +517,7 @@ class DocumentsWriterPerThread {
|
||||||
// creating CFS so that 1) .si isn't slurped into CFS,
|
// creating CFS so that 1) .si isn't slurped into CFS,
|
||||||
// and 2) .si reflects useCompoundFile=true change
|
// and 2) .si reflects useCompoundFile=true change
|
||||||
// above:
|
// above:
|
||||||
codec.segmentInfoFormat().getSegmentInfoWriter().write(directory, newSegment.info, flushedSegment.fieldInfos, context);
|
codec.segmentInfoFormat().write(directory, newSegment.info, context);
|
||||||
|
|
||||||
// TODO: ideally we would freeze newSegment here!!
|
// TODO: ideally we would freeze newSegment here!!
|
||||||
// because any changes after writing the .si will be
|
// because any changes after writing the .si will be
|
||||||
|
|
|
@ -25,9 +25,11 @@ import java.util.ArrayList;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
import java.util.HashSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
|
import java.util.Set;
|
||||||
import java.util.regex.Matcher;
|
import java.util.regex.Matcher;
|
||||||
|
|
||||||
import org.apache.lucene.store.AlreadyClosedException;
|
import org.apache.lucene.store.AlreadyClosedException;
|
||||||
|
@ -78,7 +80,7 @@ final class IndexFileDeleter implements Closeable {
|
||||||
/* Files that we tried to delete but failed (likely
|
/* Files that we tried to delete but failed (likely
|
||||||
* because they are open and we are running on Windows),
|
* because they are open and we are running on Windows),
|
||||||
* so we will retry them again later: */
|
* so we will retry them again later: */
|
||||||
private List<String> deletable;
|
private Set<String> deletable;
|
||||||
|
|
||||||
/* Reference count for all files in the index.
|
/* Reference count for all files in the index.
|
||||||
* Counts how many existing commits reference a file.
|
* Counts how many existing commits reference a file.
|
||||||
|
@ -359,7 +361,7 @@ final class IndexFileDeleter implements Closeable {
|
||||||
* Remove the CommitPoints in the commitsToDelete List by
|
* Remove the CommitPoints in the commitsToDelete List by
|
||||||
* DecRef'ing all files from each SegmentInfos.
|
* DecRef'ing all files from each SegmentInfos.
|
||||||
*/
|
*/
|
||||||
private void deleteCommits() throws IOException {
|
private void deleteCommits() {
|
||||||
|
|
||||||
int size = commitsToDelete.size();
|
int size = commitsToDelete.size();
|
||||||
|
|
||||||
|
@ -384,7 +386,7 @@ final class IndexFileDeleter implements Closeable {
|
||||||
commitsToDelete.clear();
|
commitsToDelete.clear();
|
||||||
|
|
||||||
// NOTE: does nothing if firstThrowable is null
|
// NOTE: does nothing if firstThrowable is null
|
||||||
IOUtils.reThrow(firstThrowable);
|
IOUtils.reThrowUnchecked(firstThrowable);
|
||||||
|
|
||||||
// Now compact commits to remove deleted ones (preserving the sort):
|
// Now compact commits to remove deleted ones (preserving the sort):
|
||||||
size = commits.size();
|
size = commits.size();
|
||||||
|
@ -462,7 +464,7 @@ final class IndexFileDeleter implements Closeable {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void close() throws IOException {
|
public void close() {
|
||||||
// DecRef old files from the last checkpoint, if any:
|
// DecRef old files from the last checkpoint, if any:
|
||||||
assert locked();
|
assert locked();
|
||||||
|
|
||||||
|
@ -498,14 +500,12 @@ final class IndexFileDeleter implements Closeable {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void deletePendingFiles() throws IOException {
|
public void deletePendingFiles() {
|
||||||
assert locked();
|
assert locked();
|
||||||
if (deletable != null) {
|
if (deletable != null) {
|
||||||
List<String> oldDeletable = deletable;
|
Set<String> oldDeletable = deletable;
|
||||||
deletable = null;
|
deletable = null;
|
||||||
int size = oldDeletable.size();
|
for(String fileName : oldDeletable) {
|
||||||
for(int i=0;i<size;i++) {
|
|
||||||
String fileName = oldDeletable.get(i);
|
|
||||||
if (infoStream.isEnabled("IFD")) {
|
if (infoStream.isEnabled("IFD")) {
|
||||||
infoStream.message("IFD", "delete pending file " + fileName);
|
infoStream.message("IFD", "delete pending file " + fileName);
|
||||||
}
|
}
|
||||||
|
@ -611,7 +611,7 @@ final class IndexFileDeleter implements Closeable {
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Decrefs all provided files, even on exception; throws first exception hit, if any. */
|
/** Decrefs all provided files, even on exception; throws first exception hit, if any. */
|
||||||
void decRef(Collection<String> files) throws IOException {
|
void decRef(Collection<String> files) {
|
||||||
assert locked();
|
assert locked();
|
||||||
Throwable firstThrowable = null;
|
Throwable firstThrowable = null;
|
||||||
for(final String file : files) {
|
for(final String file : files) {
|
||||||
|
@ -626,12 +626,12 @@ final class IndexFileDeleter implements Closeable {
|
||||||
}
|
}
|
||||||
|
|
||||||
// NOTE: does nothing if firstThrowable is null
|
// NOTE: does nothing if firstThrowable is null
|
||||||
IOUtils.reThrow(firstThrowable);
|
IOUtils.reThrowUnchecked(firstThrowable);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Decrefs all provided files, ignoring any exceptions hit; call this if
|
/** Decrefs all provided files, ignoring any exceptions hit; call this if
|
||||||
* you are already handling an exception. */
|
* you are already handling an exception. */
|
||||||
void decRefWhileHandlingException(Collection<String> files) throws IOException {
|
void decRefWhileHandlingException(Collection<String> files) {
|
||||||
assert locked();
|
assert locked();
|
||||||
for(final String file : files) {
|
for(final String file : files) {
|
||||||
try {
|
try {
|
||||||
|
@ -641,7 +641,7 @@ final class IndexFileDeleter implements Closeable {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void decRef(String fileName) throws IOException {
|
void decRef(String fileName) {
|
||||||
assert locked();
|
assert locked();
|
||||||
RefCount rc = getRefCount(fileName);
|
RefCount rc = getRefCount(fileName);
|
||||||
if (infoStream.isEnabled("IFD")) {
|
if (infoStream.isEnabled("IFD")) {
|
||||||
|
@ -679,6 +679,8 @@ final class IndexFileDeleter implements Closeable {
|
||||||
RefCount rc;
|
RefCount rc;
|
||||||
if (!refCounts.containsKey(fileName)) {
|
if (!refCounts.containsKey(fileName)) {
|
||||||
rc = new RefCount(fileName);
|
rc = new RefCount(fileName);
|
||||||
|
// We should never incRef a file we are already wanting to delete:
|
||||||
|
assert deletable == null || deletable.contains(fileName) == false: "file \"" + fileName + "\" cannot be incRef'd: it's already pending delete";
|
||||||
refCounts.put(fileName, rc);
|
refCounts.put(fileName, rc);
|
||||||
} else {
|
} else {
|
||||||
rc = refCounts.get(fileName);
|
rc = refCounts.get(fileName);
|
||||||
|
@ -686,7 +688,7 @@ final class IndexFileDeleter implements Closeable {
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
void deleteFiles(List<String> files) throws IOException {
|
void deleteFiles(List<String> files) {
|
||||||
assert locked();
|
assert locked();
|
||||||
for(final String file: files) {
|
for(final String file: files) {
|
||||||
deleteFile(file);
|
deleteFile(file);
|
||||||
|
@ -695,7 +697,7 @@ final class IndexFileDeleter implements Closeable {
|
||||||
|
|
||||||
/** Deletes the specified files, but only if they are new
|
/** Deletes the specified files, but only if they are new
|
||||||
* (have not yet been incref'd). */
|
* (have not yet been incref'd). */
|
||||||
void deleteNewFiles(Collection<String> files) throws IOException {
|
void deleteNewFiles(Collection<String> files) {
|
||||||
assert locked();
|
assert locked();
|
||||||
for (final String fileName: files) {
|
for (final String fileName: files) {
|
||||||
// NOTE: it's very unusual yet possible for the
|
// NOTE: it's very unusual yet possible for the
|
||||||
|
@ -713,8 +715,7 @@ final class IndexFileDeleter implements Closeable {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void deleteFile(String fileName)
|
void deleteFile(String fileName) {
|
||||||
throws IOException {
|
|
||||||
assert locked();
|
assert locked();
|
||||||
ensureOpen();
|
ensureOpen();
|
||||||
try {
|
try {
|
||||||
|
@ -734,7 +735,7 @@ final class IndexFileDeleter implements Closeable {
|
||||||
infoStream.message("IFD", "unable to remove file \"" + fileName + "\": " + e.toString() + "; Will re-try later.");
|
infoStream.message("IFD", "unable to remove file \"" + fileName + "\": " + e.toString() + "; Will re-try later.");
|
||||||
}
|
}
|
||||||
if (deletable == null) {
|
if (deletable == null) {
|
||||||
deletable = new ArrayList<>();
|
deletable = new HashSet<>();
|
||||||
}
|
}
|
||||||
deletable.add(fileName); // add to deletable
|
deletable.add(fileName); // add to deletable
|
||||||
}
|
}
|
||||||
|
|
|
@ -41,7 +41,7 @@ import java.util.concurrent.atomic.AtomicLong;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.codecs.Codec;
|
import org.apache.lucene.codecs.Codec;
|
||||||
import org.apache.lucene.codecs.FieldInfosReader;
|
import org.apache.lucene.codecs.FieldInfosFormat;
|
||||||
import org.apache.lucene.document.Field;
|
import org.apache.lucene.document.Field;
|
||||||
import org.apache.lucene.index.DocValuesUpdate.BinaryDocValuesUpdate;
|
import org.apache.lucene.index.DocValuesUpdate.BinaryDocValuesUpdate;
|
||||||
import org.apache.lucene.index.DocValuesUpdate.NumericDocValuesUpdate;
|
import org.apache.lucene.index.DocValuesUpdate.NumericDocValuesUpdate;
|
||||||
|
@ -871,7 +871,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
|
||||||
// TODO: fix tests abusing this method!
|
// TODO: fix tests abusing this method!
|
||||||
static FieldInfos readFieldInfos(SegmentCommitInfo si) throws IOException {
|
static FieldInfos readFieldInfos(SegmentCommitInfo si) throws IOException {
|
||||||
Codec codec = si.info.getCodec();
|
Codec codec = si.info.getCodec();
|
||||||
FieldInfosReader reader = codec.fieldInfosFormat().getFieldInfosReader();
|
FieldInfosFormat reader = codec.fieldInfosFormat();
|
||||||
|
|
||||||
if (si.hasFieldUpdates()) {
|
if (si.hasFieldUpdates()) {
|
||||||
// there are updates, we read latest (always outside of CFS)
|
// there are updates, we read latest (always outside of CFS)
|
||||||
|
@ -2024,6 +2024,10 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
|
||||||
|
|
||||||
deleter.close();
|
deleter.close();
|
||||||
|
|
||||||
|
// Must set closed while inside same sync block where we call deleter.refresh, else concurrent threads may try to sneak a flush in,
|
||||||
|
// after we leave this sync block and before we enter the sync block in the finally clause below that sets closed:
|
||||||
|
closed = true;
|
||||||
|
|
||||||
IOUtils.close(writeLock); // release write lock
|
IOUtils.close(writeLock); // release write lock
|
||||||
writeLock = null;
|
writeLock = null;
|
||||||
|
|
||||||
|
@ -2267,6 +2271,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
|
||||||
try {
|
try {
|
||||||
synchronized (this) {
|
synchronized (this) {
|
||||||
// Lock order IW -> BDS
|
// Lock order IW -> BDS
|
||||||
|
ensureOpen(false);
|
||||||
synchronized (bufferedUpdatesStream) {
|
synchronized (bufferedUpdatesStream) {
|
||||||
if (infoStream.isEnabled("IW")) {
|
if (infoStream.isEnabled("IW")) {
|
||||||
infoStream.message("IW", "publishFlushedSegment");
|
infoStream.message("IW", "publishFlushedSegment");
|
||||||
|
@ -2542,10 +2547,9 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
MergeState mergeState;
|
|
||||||
boolean success = false;
|
boolean success = false;
|
||||||
try {
|
try {
|
||||||
mergeState = merger.merge(); // merge 'em
|
merger.merge(); // merge 'em
|
||||||
success = true;
|
success = true;
|
||||||
} finally {
|
} finally {
|
||||||
if (!success) {
|
if (!success) {
|
||||||
|
@ -2594,7 +2598,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
|
||||||
// above:
|
// above:
|
||||||
success = false;
|
success = false;
|
||||||
try {
|
try {
|
||||||
codec.segmentInfoFormat().getSegmentInfoWriter().write(trackingDir, info, mergeState.mergeFieldInfos, context);
|
codec.segmentInfoFormat().write(trackingDir, info, context);
|
||||||
success = true;
|
success = true;
|
||||||
} finally {
|
} finally {
|
||||||
if (!success) {
|
if (!success) {
|
||||||
|
@ -3845,8 +3849,6 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
|
||||||
|
|
||||||
merge.checkAborted(directory);
|
merge.checkAborted(directory);
|
||||||
|
|
||||||
final String mergedName = merge.info.info.name;
|
|
||||||
|
|
||||||
List<SegmentCommitInfo> sourceSegments = merge.segments;
|
List<SegmentCommitInfo> sourceSegments = merge.segments;
|
||||||
|
|
||||||
IOContext context = new IOContext(merge.getMergeInfo());
|
IOContext context = new IOContext(merge.getMergeInfo());
|
||||||
|
@ -4060,7 +4062,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
|
||||||
// above:
|
// above:
|
||||||
boolean success2 = false;
|
boolean success2 = false;
|
||||||
try {
|
try {
|
||||||
codec.segmentInfoFormat().getSegmentInfoWriter().write(directory, merge.info.info, mergeState.mergeFieldInfos, context);
|
codec.segmentInfoFormat().write(directory, merge.info.info, context);
|
||||||
success2 = true;
|
success2 = true;
|
||||||
} finally {
|
} finally {
|
||||||
if (!success2) {
|
if (!success2) {
|
||||||
|
@ -4511,7 +4513,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
|
||||||
Set<String> siFiles = new HashSet<>();
|
Set<String> siFiles = new HashSet<>();
|
||||||
for (String cfsFile : cfsFiles) {
|
for (String cfsFile : cfsFiles) {
|
||||||
siFiles.add(cfsFile);
|
siFiles.add(cfsFile);
|
||||||
};
|
}
|
||||||
info.setFiles(siFiles);
|
info.setFiles(siFiles);
|
||||||
|
|
||||||
return files;
|
return files;
|
||||||
|
|
|
@ -117,8 +117,9 @@ public abstract class MergePolicy {
|
||||||
* @param segments List of {@link SegmentCommitInfo}s
|
* @param segments List of {@link SegmentCommitInfo}s
|
||||||
* to be merged. */
|
* to be merged. */
|
||||||
public OneMerge(List<SegmentCommitInfo> segments) {
|
public OneMerge(List<SegmentCommitInfo> segments) {
|
||||||
if (0 == segments.size())
|
if (0 == segments.size()) {
|
||||||
throw new RuntimeException("segments must include at least one segment");
|
throw new RuntimeException("segments must include at least one segment");
|
||||||
|
}
|
||||||
// clone the list, as the in list may be based off original SegmentInfos and may be modified
|
// clone the list, as the in list may be based off original SegmentInfos and may be modified
|
||||||
this.segments = new ArrayList<>(segments);
|
this.segments = new ArrayList<>(segments);
|
||||||
int count = 0;
|
int count = 0;
|
||||||
|
@ -239,14 +240,17 @@ public abstract class MergePolicy {
|
||||||
StringBuilder b = new StringBuilder();
|
StringBuilder b = new StringBuilder();
|
||||||
final int numSegments = segments.size();
|
final int numSegments = segments.size();
|
||||||
for(int i=0;i<numSegments;i++) {
|
for(int i=0;i<numSegments;i++) {
|
||||||
if (i > 0) b.append(' ');
|
if (i > 0) {
|
||||||
|
b.append(' ');
|
||||||
|
}
|
||||||
b.append(segments.get(i).toString(dir, 0));
|
b.append(segments.get(i).toString(dir, 0));
|
||||||
}
|
}
|
||||||
if (info != null) {
|
if (info != null) {
|
||||||
b.append(" into ").append(info.info.name);
|
b.append(" into ").append(info.info.name);
|
||||||
}
|
}
|
||||||
if (maxNumSegments != -1)
|
if (maxNumSegments != -1) {
|
||||||
b.append(" [maxNumSegments=" + maxNumSegments + "]");
|
b.append(" [maxNumSegments=" + maxNumSegments + "]");
|
||||||
|
}
|
||||||
if (aborted) {
|
if (aborted) {
|
||||||
b.append(" [ABORTED]");
|
b.append(" [ABORTED]");
|
||||||
}
|
}
|
||||||
|
@ -312,8 +316,9 @@ public abstract class MergePolicy {
|
||||||
StringBuilder b = new StringBuilder();
|
StringBuilder b = new StringBuilder();
|
||||||
b.append("MergeSpec:\n");
|
b.append("MergeSpec:\n");
|
||||||
final int count = merges.size();
|
final int count = merges.size();
|
||||||
for(int i=0;i<count;i++)
|
for(int i=0;i<count;i++) {
|
||||||
b.append(" ").append(1 + i).append(": ").append(merges.get(i).segString(dir));
|
b.append(" ").append(1 + i).append(": ").append(merges.get(i).segString(dir));
|
||||||
|
}
|
||||||
return b.toString();
|
return b.toString();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -477,9 +482,9 @@ public abstract class MergePolicy {
|
||||||
protected long size(SegmentCommitInfo info, IndexWriter writer) throws IOException {
|
protected long size(SegmentCommitInfo info, IndexWriter writer) throws IOException {
|
||||||
long byteSize = info.sizeInBytes();
|
long byteSize = info.sizeInBytes();
|
||||||
int delCount = writer.numDeletedDocs(info);
|
int delCount = writer.numDeletedDocs(info);
|
||||||
double delRatio = (info.info.getDocCount() <= 0 ? 0.0f : ((float)delCount / (float)info.info.getDocCount()));
|
double delRatio = info.info.getDocCount() <= 0 ? 0.0f : (float) delCount / (float) info.info.getDocCount();
|
||||||
assert delRatio <= 1.0;
|
assert delRatio <= 1.0;
|
||||||
return (info.info.getDocCount() <= 0 ? byteSize : (long)(byteSize * (1.0 - delRatio)));
|
return (info.info.getDocCount() <= 0 ? byteSize : (long) (byteSize * (1.0 - delRatio)));
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Returns true if this single info is already fully merged (has no
|
/** Returns true if this single info is already fully merged (has no
|
||||||
|
@ -527,7 +532,7 @@ public abstract class MergePolicy {
|
||||||
throw new IllegalArgumentException("maxCFSSegmentSizeMB must be >=0 (got " + v + ")");
|
throw new IllegalArgumentException("maxCFSSegmentSizeMB must be >=0 (got " + v + ")");
|
||||||
}
|
}
|
||||||
v *= 1024 * 1024;
|
v *= 1024 * 1024;
|
||||||
this.maxCFSSegmentSize = (v > Long.MAX_VALUE) ? Long.MAX_VALUE : (long) v;
|
this.maxCFSSegmentSize = v > Long.MAX_VALUE ? Long.MAX_VALUE : (long) v;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -451,7 +451,7 @@ class ReadersAndUpdates {
|
||||||
final IOContext infosContext = new IOContext(new FlushInfo(info.info.getDocCount(), estInfosSize));
|
final IOContext infosContext = new IOContext(new FlushInfo(info.info.getDocCount(), estInfosSize));
|
||||||
// separately also track which files were created for this gen
|
// separately also track which files were created for this gen
|
||||||
final TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(dir);
|
final TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(dir);
|
||||||
infosFormat.getFieldInfosWriter().write(trackingDir, info.info, segmentSuffix, fieldInfos, infosContext);
|
infosFormat.write(trackingDir, info.info, segmentSuffix, fieldInfos, infosContext);
|
||||||
info.advanceFieldInfosGen();
|
info.advanceFieldInfosGen();
|
||||||
return trackingDir.getCreatedFiles();
|
return trackingDir.getCreatedFiles();
|
||||||
}
|
}
|
||||||
|
|
|
@ -109,7 +109,7 @@ final class SegmentCoreReaders implements Accountable {
|
||||||
cfsDir = dir;
|
cfsDir = dir;
|
||||||
}
|
}
|
||||||
|
|
||||||
coreFieldInfos = codec.fieldInfosFormat().getFieldInfosReader().read(cfsDir, si.info, "", context);
|
coreFieldInfos = codec.fieldInfosFormat().read(cfsDir, si.info, "", context);
|
||||||
|
|
||||||
final SegmentReadState segmentReadState = new SegmentReadState(cfsDir, si.info, coreFieldInfos, context);
|
final SegmentReadState segmentReadState = new SegmentReadState(cfsDir, si.info, coreFieldInfos, context);
|
||||||
final PostingsFormat format = codec.postingsFormat();
|
final PostingsFormat format = codec.postingsFormat();
|
||||||
|
|
|
@ -307,7 +307,7 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
|
||||||
String segName = input.readString();
|
String segName = input.readString();
|
||||||
Codec codec = Codec.forName(input.readString());
|
Codec codec = Codec.forName(input.readString());
|
||||||
//System.out.println("SIS.read seg=" + seg + " codec=" + codec);
|
//System.out.println("SIS.read seg=" + seg + " codec=" + codec);
|
||||||
SegmentInfo info = codec.segmentInfoFormat().getSegmentInfoReader().read(directory, segName, IOContext.READ);
|
SegmentInfo info = codec.segmentInfoFormat().read(directory, segName, IOContext.READ);
|
||||||
info.setCodec(codec);
|
info.setCodec(codec);
|
||||||
long delGen = input.readLong();
|
long delGen = input.readLong();
|
||||||
int delCount = input.readInt();
|
int delCount = input.readInt();
|
||||||
|
|
|
@ -22,7 +22,6 @@ import java.util.List;
|
||||||
|
|
||||||
import org.apache.lucene.codecs.Codec;
|
import org.apache.lucene.codecs.Codec;
|
||||||
import org.apache.lucene.codecs.DocValuesConsumer;
|
import org.apache.lucene.codecs.DocValuesConsumer;
|
||||||
import org.apache.lucene.codecs.FieldInfosWriter;
|
|
||||||
import org.apache.lucene.codecs.FieldsConsumer;
|
import org.apache.lucene.codecs.FieldsConsumer;
|
||||||
import org.apache.lucene.codecs.NormsConsumer;
|
import org.apache.lucene.codecs.NormsConsumer;
|
||||||
import org.apache.lucene.codecs.StoredFieldsWriter;
|
import org.apache.lucene.codecs.StoredFieldsWriter;
|
||||||
|
@ -147,8 +146,7 @@ final class SegmentMerger {
|
||||||
}
|
}
|
||||||
|
|
||||||
// write the merged infos
|
// write the merged infos
|
||||||
FieldInfosWriter fieldInfosWriter = codec.fieldInfosFormat().getFieldInfosWriter();
|
codec.fieldInfosFormat().write(directory, mergeState.segmentInfo, "", mergeState.mergeFieldInfos, context);
|
||||||
fieldInfosWriter.write(directory, mergeState.segmentInfo, "", mergeState.mergeFieldInfos, context);
|
|
||||||
|
|
||||||
return mergeState;
|
return mergeState;
|
||||||
}
|
}
|
||||||
|
|
|
@ -182,7 +182,7 @@ public final class SegmentReader extends LeafReader implements Accountable {
|
||||||
// updates always outside of CFS
|
// updates always outside of CFS
|
||||||
FieldInfosFormat fisFormat = si.info.getCodec().fieldInfosFormat();
|
FieldInfosFormat fisFormat = si.info.getCodec().fieldInfosFormat();
|
||||||
final String segmentSuffix = Long.toString(si.getFieldInfosGen(), Character.MAX_RADIX);
|
final String segmentSuffix = Long.toString(si.getFieldInfosGen(), Character.MAX_RADIX);
|
||||||
return fisFormat.getFieldInfosReader().read(si.info.dir, si.info, segmentSuffix, IOContext.READONCE);
|
return fisFormat.read(si.info.dir, si.info, segmentSuffix, IOContext.READONCE);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -142,7 +142,7 @@ public class TieredMergePolicy extends MergePolicy {
|
||||||
throw new IllegalArgumentException("maxMergedSegmentMB must be >=0 (got " + v + ")");
|
throw new IllegalArgumentException("maxMergedSegmentMB must be >=0 (got " + v + ")");
|
||||||
}
|
}
|
||||||
v *= 1024 * 1024;
|
v *= 1024 * 1024;
|
||||||
maxMergedSegmentBytes = (v > Long.MAX_VALUE) ? Long.MAX_VALUE : (long) v;
|
maxMergedSegmentBytes = v > Long.MAX_VALUE ? Long.MAX_VALUE : (long) v;
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -183,7 +183,7 @@ public class TieredMergePolicy extends MergePolicy {
|
||||||
throw new IllegalArgumentException("floorSegmentMB must be >= 0.0 (got " + v + ")");
|
throw new IllegalArgumentException("floorSegmentMB must be >= 0.0 (got " + v + ")");
|
||||||
}
|
}
|
||||||
v *= 1024 * 1024;
|
v *= 1024 * 1024;
|
||||||
floorSegmentBytes = (v > Long.MAX_VALUE) ? Long.MAX_VALUE : (long) v;
|
floorSegmentBytes = v > Long.MAX_VALUE ? Long.MAX_VALUE : (long) v;
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -314,8 +314,12 @@ public class TieredMergePolicy extends MergePolicy {
|
||||||
// If we have too-large segments, grace them out
|
// If we have too-large segments, grace them out
|
||||||
// of the maxSegmentCount:
|
// of the maxSegmentCount:
|
||||||
int tooBigCount = 0;
|
int tooBigCount = 0;
|
||||||
while (tooBigCount < infosSorted.size() && size(infosSorted.get(tooBigCount), writer) >= maxMergedSegmentBytes/2.0) {
|
while (tooBigCount < infosSorted.size()) {
|
||||||
totIndexBytes -= size(infosSorted.get(tooBigCount), writer);
|
long segBytes = size(infosSorted.get(tooBigCount), writer);
|
||||||
|
if (segBytes < maxMergedSegmentBytes/2.0) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
totIndexBytes -= segBytes;
|
||||||
tooBigCount++;
|
tooBigCount++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -351,7 +355,7 @@ public class TieredMergePolicy extends MergePolicy {
|
||||||
for(int idx = tooBigCount; idx<infosSorted.size(); idx++) {
|
for(int idx = tooBigCount; idx<infosSorted.size(); idx++) {
|
||||||
final SegmentCommitInfo info = infosSorted.get(idx);
|
final SegmentCommitInfo info = infosSorted.get(idx);
|
||||||
if (merging.contains(info)) {
|
if (merging.contains(info)) {
|
||||||
mergingBytes += info.sizeInBytes();
|
mergingBytes += size(info, writer);
|
||||||
} else if (!toBeMerged.contains(info)) {
|
} else if (!toBeMerged.contains(info)) {
|
||||||
eligible.add(info);
|
eligible.add(info);
|
||||||
}
|
}
|
||||||
|
@ -400,6 +404,10 @@ public class TieredMergePolicy extends MergePolicy {
|
||||||
totAfterMergeBytes += segBytes;
|
totAfterMergeBytes += segBytes;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// We should never see an empty candidate: we iterated over maxMergeAtOnce
|
||||||
|
// segments, and already pre-excluded the too-large segments:
|
||||||
|
assert candidate.size() > 0;
|
||||||
|
|
||||||
final MergeScore score = score(candidate, hitTooLarge, mergingBytes, writer);
|
final MergeScore score = score(candidate, hitTooLarge, mergingBytes, writer);
|
||||||
if (verbose(writer)) {
|
if (verbose(writer)) {
|
||||||
message(" maybe=" + writer.segString(candidate) + " score=" + score.getScore() + " " + score.getExplanation() + " tooLarge=" + hitTooLarge + " size=" + String.format(Locale.ROOT, "%.3f MB", totAfterMergeBytes/1024./1024.), writer);
|
message(" maybe=" + writer.segString(candidate) + " score=" + score.getScore() + " " + score.getExplanation() + " tooLarge=" + hitTooLarge + " size=" + String.format(Locale.ROOT, "%.3f MB", totAfterMergeBytes/1024./1024.), writer);
|
||||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue