merge trunk

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5969@1631928 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2014-10-15 01:26:26 +00:00
commit 45d882f470
211 changed files with 4878 additions and 4523 deletions

View File

@ -2,7 +2,7 @@
<library name="JUnit"> <library name="JUnit">
<CLASSES> <CLASSES>
<root url="jar://$PROJECT_DIR$/lucene/test-framework/lib/junit-4.10.jar!/" /> <root url="jar://$PROJECT_DIR$/lucene/test-framework/lib/junit-4.10.jar!/" />
<root url="jar://$PROJECT_DIR$/lucene/test-framework/lib/randomizedtesting-runner-2.1.6.jar!/" /> <root url="jar://$PROJECT_DIR$/lucene/test-framework/lib/randomizedtesting-runner-2.1.9.jar!/" />
</CLASSES> </CLASSES>
<JAVADOC /> <JAVADOC />
<SOURCES /> <SOURCES />

View File

@ -151,6 +151,15 @@ API Changes
to return an instance optimized for merging instead of searching. to return an instance optimized for merging instead of searching.
(Mike McCandless, Robert Muir) (Mike McCandless, Robert Muir)
* LUCENE-5992: Remove FieldInfos from SegmentInfosWriter.write API. (Robert Muir, Mike McCandless)
* LUCENE-5998: Simplify Field/SegmentInfoFormat to read+write methods.
(Robert Muir)
* LUCENE-6000: Removed StandardTokenizerInterface. Tokenizers now use
their jflex impl directly.
(Ryan Ernst)
Bug Fixes Bug Fixes
* LUCENE-5650: Enforce read-only access to any path outside the temporary * LUCENE-5650: Enforce read-only access to any path outside the temporary
@ -169,6 +178,8 @@ Bug Fixes
not have the regular "spinlock" of DirectoryReader.open. It now implements not have the regular "spinlock" of DirectoryReader.open. It now implements
Closeable and you must close it to release the lock. (Mike McCandless, Robert Muir) Closeable and you must close it to release the lock. (Mike McCandless, Robert Muir)
* LUCENE-5980: Don't let document length overflow. (Robert Muir)
Documentation Documentation
* LUCENE-5392: Add/improve analysis package documentation to reflect * LUCENE-5392: Add/improve analysis package documentation to reflect
@ -187,6 +198,12 @@ Tests
index files to use Version.toString() in filename. index files to use Version.toString() in filename.
(Ryan Ernst) (Ryan Ernst)
* LUCENE-6002: Monster tests no longer fail. Most of them now have an 80 hour
timeout, effectively removing the timeout. The tests that operate near the 2
billion limit now use IndexWriter.MAX_DOCS instead of Integer.MAX_VALUE.
Some of the slow Monster tests now explicitly choose the default codec.
(Mike McCandless, Shawn Heisey)
Optimizations Optimizations
* LUCENE-5960: Use a more efficient bitset, not a Set<Integer>, to * LUCENE-5960: Use a more efficient bitset, not a Set<Integer>, to
@ -206,6 +223,9 @@ Optimizations
per-segment/per-producer, and norms and doc values merging no longer cause per-segment/per-producer, and norms and doc values merging no longer cause
RAM spikes for latent fields. (Mike McCandless, Robert Muir) RAM spikes for latent fields. (Mike McCandless, Robert Muir)
* LUCENE-5983: CachingWrapperFilter now uses a new DocIdSet implementation
called RoaringDocIdSet instead of WAH8DocIdSet. (Adrien Grand)
Build Build
* LUCENE-5909: Smoke tester now has better command line parsing and * LUCENE-5909: Smoke tester now has better command line parsing and
@ -216,6 +236,8 @@ Build
* LUCENE-5962: Rename diffSources.py to createPatch.py and make it work with all text file types. * LUCENE-5962: Rename diffSources.py to createPatch.py and make it work with all text file types.
(Ryan Ernst) (Ryan Ernst)
* LUCENE-5995: Upgrade ICU to 54.1 (Robert Muir)
Other Other
* LUCENE-5563: Removed sep layout: which has fallen behind on features and doesn't * LUCENE-5563: Removed sep layout: which has fallen behind on features and doesn't
@ -1250,6 +1272,10 @@ New Features
approximate value of the diameter of the earth at the given latitude. approximate value of the diameter of the earth at the given latitude.
(Adrien Grand) (Adrien Grand)
* LUCENE-5979: FilteredQuery uses the cost API to decide on whether to use
random-access or leap-frog to intersect the filter with the query.
(Adrien Grand)
Build Build
* LUCENE-5217,LUCENE-5420: Maven config: get dependencies from Ant+Ivy config; * LUCENE-5217,LUCENE-5420: Maven config: get dependencies from Ant+Ivy config;

View File

@ -49,7 +49,7 @@ import org.apache.lucene.util.AttributeFactory;
public final class ClassicTokenizer extends Tokenizer { public final class ClassicTokenizer extends Tokenizer {
/** A private instance of the JFlex-constructed scanner */ /** A private instance of the JFlex-constructed scanner */
private StandardTokenizerInterface scanner; private ClassicTokenizerImpl scanner;
public static final int ALPHANUM = 0; public static final int ALPHANUM = 0;
public static final int APOSTROPHE = 1; public static final int APOSTROPHE = 1;
@ -135,7 +135,7 @@ public final class ClassicTokenizer extends Tokenizer {
while(true) { while(true) {
int tokenType = scanner.getNextToken(); int tokenType = scanner.getNextToken();
if (tokenType == StandardTokenizerInterface.YYEOF) { if (tokenType == ClassicTokenizerImpl.YYEOF) {
return false; return false;
} }

View File

@ -33,7 +33,7 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
* This class implements the classic lucene StandardTokenizer up until 3.0 * This class implements the classic lucene StandardTokenizer up until 3.0
*/ */
class ClassicTokenizerImpl implements StandardTokenizerInterface { class ClassicTokenizerImpl {
/** This character denotes the end of file */ /** This character denotes the end of file */
public static final int YYEOF = -1; public static final int YYEOF = -1;

View File

@ -33,7 +33,6 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
%% %%
%class ClassicTokenizerImpl %class ClassicTokenizerImpl
%implements StandardTokenizerInterface
%unicode 3.0 %unicode 3.0
%integer %integer
%function getNextToken %function getNextToken

View File

@ -39,7 +39,7 @@ import org.apache.lucene.util.AttributeFactory;
public final class StandardTokenizer extends Tokenizer { public final class StandardTokenizer extends Tokenizer {
/** A private instance of the JFlex-constructed scanner */ /** A private instance of the JFlex-constructed scanner */
private StandardTokenizerInterface scanner; private StandardTokenizerImpl scanner;
// TODO: how can we remove these old types?! // TODO: how can we remove these old types?!
public static final int ALPHANUM = 0; public static final int ALPHANUM = 0;
@ -150,7 +150,7 @@ public final class StandardTokenizer extends Tokenizer {
while(true) { while(true) {
int tokenType = scanner.getNextToken(); int tokenType = scanner.getNextToken();
if (tokenType == StandardTokenizerInterface.YYEOF) { if (tokenType == StandardTokenizerImpl.YYEOF) {
return false; return false;
} }

View File

@ -39,7 +39,7 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
* </ul> * </ul>
*/ */
public final class StandardTokenizerImpl implements StandardTokenizerInterface { public final class StandardTokenizerImpl {
/** This character denotes the end of file */ /** This character denotes the end of file */
public static final int YYEOF = -1; public static final int YYEOF = -1;
@ -804,7 +804,7 @@ public final class StandardTokenizerImpl implements StandardTokenizerInterface {
if (zzInput == YYEOF && zzStartRead == zzCurrentPos) { if (zzInput == YYEOF && zzStartRead == zzCurrentPos) {
zzAtEOF = true; zzAtEOF = true;
{ {
return StandardTokenizerInterface.YYEOF; return YYEOF;
} }
} }
else { else {

View File

@ -43,7 +43,6 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
%final %final
%public %public
%class StandardTokenizerImpl %class StandardTokenizerImpl
%implements StandardTokenizerInterface
%function getNextToken %function getNextToken
%char %char
%buffer 255 %buffer 255
@ -118,7 +117,7 @@ ComplexContextEx = \p{LB:Complex_Context}
// UAX#29 WB1. sot ÷ // UAX#29 WB1. sot ÷
// WB2. ÷ eot // WB2. ÷ eot
// //
<<EOF>> { return StandardTokenizerInterface.YYEOF; } <<EOF>> { return YYEOF; }
// UAX#29 WB8. Numeric × Numeric // UAX#29 WB8. Numeric × Numeric
// WB11. Numeric (MidNum | MidNumLet | Single_Quote) × Numeric // WB11. Numeric (MidNum | MidNumLet | Single_Quote) × Numeric

View File

@ -1,74 +0,0 @@
package org.apache.lucene.analysis.standard;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import java.io.Reader;
import java.io.IOException;
/**
* Internal interface for supporting versioned grammars.
* @lucene.internal
*/
public interface StandardTokenizerInterface {
/** This token type, as returned from {@link #getNextToken()}, denotes the end of file */
public static final int YYEOF = -1;
/**
* Copies the matched text into the CharTermAttribute
*/
public void getText(CharTermAttribute t);
/**
* Returns the current position.
*/
public int yychar();
/**
* Resets the scanner to read from a new input stream.
* Does not close the old reader.
*
* All internal variables are reset, the old input stream
* <b>cannot</b> be reused (internal buffer is discarded and lost).
* Lexical state is set to <tt>ZZ_INITIAL</tt>.
*
* @param reader the new input stream
*/
public void yyreset(Reader reader);
/**
* Returns the length of the matched text region.
*/
public int yylength();
/**
* Resumes scanning until the next regular expression is matched,
* the end of input is encountered or an I/O-Error occurs.
*
* @return the next token, {@link #YYEOF} on end of stream
* @exception IOException if any I/O-Error occurs
*/
public int getNextToken() throws IOException;
/**
* Sets the scanner buffer size in chars
*/
public void setBufferSize(int numChars);
}

View File

@ -47,7 +47,7 @@ import org.apache.lucene.util.AttributeFactory;
public final class UAX29URLEmailTokenizer extends Tokenizer { public final class UAX29URLEmailTokenizer extends Tokenizer {
/** A private instance of the JFlex-constructed scanner */ /** A private instance of the JFlex-constructed scanner */
private final StandardTokenizerInterface scanner; private final UAX29URLEmailTokenizerImpl scanner;
public static final int ALPHANUM = 0; public static final int ALPHANUM = 0;
public static final int NUM = 1; public static final int NUM = 1;
@ -108,7 +108,7 @@ public final class UAX29URLEmailTokenizer extends Tokenizer {
this.scanner = getScanner(); this.scanner = getScanner();
} }
private StandardTokenizerInterface getScanner() { private UAX29URLEmailTokenizerImpl getScanner() {
return new UAX29URLEmailTokenizerImpl(input); return new UAX29URLEmailTokenizerImpl(input);
} }
@ -127,7 +127,7 @@ public final class UAX29URLEmailTokenizer extends Tokenizer {
while(true) { while(true) {
int tokenType = scanner.getNextToken(); int tokenType = scanner.getNextToken();
if (tokenType == StandardTokenizerInterface.YYEOF) { if (tokenType == UAX29URLEmailTokenizerImpl.YYEOF) {
return false; return false;
} }

View File

@ -42,7 +42,7 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
* </ul> * </ul>
*/ */
public final class UAX29URLEmailTokenizerImpl implements StandardTokenizerInterface { public final class UAX29URLEmailTokenizerImpl {
/** This character denotes the end of file */ /** This character denotes the end of file */
public static final int YYEOF = -1; public static final int YYEOF = -1;
@ -7204,11 +7204,11 @@ public final class UAX29URLEmailTokenizerImpl implements StandardTokenizerInterf
zzAtEOF = true; zzAtEOF = true;
switch (zzLexicalState) { switch (zzLexicalState) {
case YYINITIAL: { case YYINITIAL: {
return StandardTokenizerInterface.YYEOF; return YYEOF;
} }
case 2910: break; case 2910: break;
case AVOID_BAD_URL: { case AVOID_BAD_URL: {
return StandardTokenizerInterface.YYEOF; return YYEOF;
} }
case 2911: break; case 2911: break;
default: default:

View File

@ -46,7 +46,6 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
%final %final
%public %public
%class UAX29URLEmailTokenizerImpl %class UAX29URLEmailTokenizerImpl
%implements StandardTokenizerInterface
%function getNextToken %function getNextToken
%char %char
%xstate AVOID_BAD_URL %xstate AVOID_BAD_URL
@ -208,7 +207,7 @@ EMAIL = {EMAILlocalPart} "@" ({DomainNameStrict} | {EMAILbracketedHost})
// UAX#29 WB1. sot ÷ // UAX#29 WB1. sot ÷
// WB2. ÷ eot // WB2. ÷ eot
// //
<<EOF>> { return StandardTokenizerInterface.YYEOF; } <<EOF>> { return YYEOF; }
{URL} { yybegin(YYINITIAL); return URL_TYPE; } {URL} { yybegin(YYINITIAL); return URL_TYPE; }

View File

@ -35,7 +35,7 @@ import org.apache.lucene.analysis.util.CharArraySet;
* It is very similar to the snowball portuguese algorithm but not exactly the same. * It is very similar to the snowball portuguese algorithm but not exactly the same.
* *
*/ */
public class TestBrazilianStemmer extends BaseTokenStreamTestCase { public class TestBrazilianAnalyzer extends BaseTokenStreamTestCase {
public void testWithSnowballExamples() throws Exception { public void testWithSnowballExamples() throws Exception {
check("boa", "boa"); check("boa", "boa");

View File

@ -30,7 +30,7 @@ import org.apache.lucene.analysis.util.CharArraySet;
* The code states that it uses the snowball algorithm, but tests reveal some differences. * The code states that it uses the snowball algorithm, but tests reveal some differences.
* *
*/ */
public class TestDutchStemmer extends BaseTokenStreamTestCase { public class TestDutchAnalyzer extends BaseTokenStreamTestCase {
public void testWithSnowballExamples() throws Exception { public void testWithSnowballExamples() throws Exception {
check("lichaamsziek", "lichaamsziek"); check("lichaamsziek", "lichaamsziek");

View File

@ -1,4 +1,4 @@
package org.apache.lucene.analysis.core; package org.apache.lucene.analysis.standard;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.BaseTokenStreamTestCase;

View File

@ -1,4 +1,4 @@
package org.apache.lucene.analysis.core; package org.apache.lucene.analysis.standard;
/* /*
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more
@ -27,8 +27,6 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockGraphTokenFilter; import org.apache.lucene.analysis.MockGraphTokenFilter;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.util.TestUtil; import org.apache.lucene.util.TestUtil;
public class TestStandardAnalyzer extends BaseTokenStreamTestCase { public class TestStandardAnalyzer extends BaseTokenStreamTestCase {

View File

@ -1,4 +1,4 @@
package org.apache.lucene.analysis.core; package org.apache.lucene.analysis.standard;
/* /*
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more

View File

@ -1,4 +1,4 @@
package org.apache.lucene.analysis.core; package org.apache.lucene.analysis.standard;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.BaseTokenStreamTestCase;
@ -6,11 +6,11 @@ import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer; import org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer;
import org.apache.lucene.analysis.standard.WordBreakTestUnicode_6_3_0;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.util.TestUtil; import org.apache.lucene.util.TestUtil;
import java.io.BufferedReader; import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.IOException; import java.io.IOException;
import java.io.InputStreamReader; import java.io.InputStreamReader;
import java.io.Reader; import java.io.Reader;
@ -20,7 +20,6 @@ import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.List; import java.util.List;
import java.util.Random; import java.util.Random;
import java.util.regex.Pattern;
/* /*
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more

View File

@ -1,4 +1,4 @@
package org.apache.lucene.analysis.core; package org.apache.lucene.analysis.standard;
/** /**
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more

View File

@ -1,50 +0,0 @@
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
#
# Parses Myanmar text, with syllable as token.
#
$Cons = [[:Other_Letter:]&[:Myanmar:]];
$Virama = [\u1039];
$Asat = [\u103A];
$WordJoin = [:Line_Break=Word_Joiner:];
#
# default numerical definitions
#
$Extend = [\p{Word_Break = Extend}];
$Format = [\p{Word_Break = Format}];
$MidNumLet = [\p{Word_Break = MidNumLet}];
$MidNum = [\p{Word_Break = MidNum}];
$Numeric = [\p{Word_Break = Numeric}];
$ExtendNumLet = [\p{Word_Break = ExtendNumLet}];
$MidNumLetEx = $MidNumLet ($Extend | $Format)*;
$MidNumEx = $MidNum ($Extend | $Format)*;
$NumericEx = $Numeric ($Extend | $Format)*;
$ExtendNumLetEx = $ExtendNumLet ($Extend | $Format)*;
$ConsEx = $Cons ($Extend | $Format)*;
$AsatEx = $Cons $Asat ($Virama $ConsEx)? ($Extend | $Format)*;
$MyanmarSyllableEx = $ConsEx ($Virama $ConsEx)? ($AsatEx)*;
$MyanmarJoinedSyllableEx = $MyanmarSyllableEx ($WordJoin $MyanmarSyllableEx)*;
!!forward;
$MyanmarJoinedSyllableEx {200};
# default numeric rules
$NumericEx $ExtendNumLetEx? (($MidNumEx | $MidNumLetEx)? $NumericEx $ExtendNumLetEx?)* {100};

View File

@ -49,6 +49,7 @@
2E17>002D 2E17>002D
2E1A>002D 2E1A>002D
2E3A..2E3B>002D 2E3A..2E3B>002D
2E40>002D
301C>002D 301C>002D
3030>002D 3030>002D
30A0>002D 30A0>002D

View File

@ -102,6 +102,7 @@
1939..193B> 1939..193B>
1A75..1A7C> 1A75..1A7C>
1A7F> 1A7F>
1AB0..1ABD>
1B34> 1B34>
1B44> 1B44>
1B6B..1B73> 1B6B..1B73>
@ -111,8 +112,10 @@
1CD0..1CE8> 1CD0..1CE8>
1CED> 1CED>
1CF4> 1CF4>
1CF8..1CF9>
1D2C..1D6A> 1D2C..1D6A>
1DC4..1DCF> 1DC4..1DCF>
1DF5>
1DFD..1DFF> 1DFD..1DFF>
1FBD> 1FBD>
1FBF..1FC1> 1FBF..1FC1>
@ -128,6 +131,7 @@
A66F> A66F>
A67C..A67D> A67C..A67D>
A67F> A67F>
A69C..A69D>
A6F0..A6F1> A6F0..A6F1>
A717..A721> A717..A721>
A788> A788>
@ -138,27 +142,43 @@ A92B..A92E>
A953> A953>
A9B3> A9B3>
A9C0> A9C0>
AA7B> A9E5>
AA7B..AA7D>
AABF..AAC2> AABF..AAC2>
AAF6> AAF6>
AB5B..AB5F>
ABEC..ABED> ABEC..ABED>
FB1E> FB1E>
FE20..FE26> FE20..FE2D>
FF3E> FF3E>
FF40> FF40>
FF70> FF70>
FF9E..FF9F> FF9E..FF9F>
FFE3> FFE3>
102E0>
10AE5..10AE6>
110B9..110BA> 110B9..110BA>
11133..11134> 11133..11134>
11173>
111C0> 111C0>
11235..11236>
112E9..112EA>
1133C>
1134D>
11366..1136C>
11370..11374>
114C2..114C3>
115BF..115C0>
1163F>
116B6..116B7> 116B6..116B7>
16AF0..16AF4>
16F8F..16F9F> 16F8F..16F9F>
1D167..1D169> 1D167..1D169>
1D16D..1D172> 1D16D..1D172>
1D17B..1D182> 1D17B..1D182>
1D185..1D18B> 1D185..1D18B>
1D1AA..1D1AD> 1D1AA..1D1AD>
1E8D0..1E8D6>
# Latin script "composed" that do not further decompose, so decompose here # Latin script "composed" that do not further decompose, so decompose here
# These are from AsciiFoldingFilter # These are from AsciiFoldingFilter

View File

@ -151,6 +151,16 @@
0D6D>0037 # MALAYALAM DIGIT SEVEN 0D6D>0037 # MALAYALAM DIGIT SEVEN
0D6E>0038 # MALAYALAM DIGIT EIGHT 0D6E>0038 # MALAYALAM DIGIT EIGHT
0D6F>0039 # MALAYALAM DIGIT NINE 0D6F>0039 # MALAYALAM DIGIT NINE
0DE6>0030 # SINHALA LITH DIGIT ZERO
0DE7>0031 # SINHALA LITH DIGIT ONE
0DE8>0032 # SINHALA LITH DIGIT TWO
0DE9>0033 # SINHALA LITH DIGIT THREE
0DEA>0034 # SINHALA LITH DIGIT FOUR
0DEB>0035 # SINHALA LITH DIGIT FIVE
0DEC>0036 # SINHALA LITH DIGIT SIX
0DED>0037 # SINHALA LITH DIGIT SEVEN
0DEE>0038 # SINHALA LITH DIGIT EIGHT
0DEF>0039 # SINHALA LITH DIGIT NINE
0E50>0030 # THAI DIGIT ZERO 0E50>0030 # THAI DIGIT ZERO
0E51>0031 # THAI DIGIT ONE 0E51>0031 # THAI DIGIT ONE
0E52>0032 # THAI DIGIT TWO 0E52>0032 # THAI DIGIT TWO
@ -388,6 +398,16 @@ A9D6>0036 # JAVANESE DIGIT SIX
A9D7>0037 # JAVANESE DIGIT SEVEN A9D7>0037 # JAVANESE DIGIT SEVEN
A9D8>0038 # JAVANESE DIGIT EIGHT A9D8>0038 # JAVANESE DIGIT EIGHT
A9D9>0039 # JAVANESE DIGIT NINE A9D9>0039 # JAVANESE DIGIT NINE
A9F0>0030 # MYANMAR TAI LAING DIGIT ZERO
A9F1>0031 # MYANMAR TAI LAING DIGIT ONE
A9F2>0032 # MYANMAR TAI LAING DIGIT TWO
A9F3>0033 # MYANMAR TAI LAING DIGIT THREE
A9F4>0034 # MYANMAR TAI LAING DIGIT FOUR
A9F5>0035 # MYANMAR TAI LAING DIGIT FIVE
A9F6>0036 # MYANMAR TAI LAING DIGIT SIX
A9F7>0037 # MYANMAR TAI LAING DIGIT SEVEN
A9F8>0038 # MYANMAR TAI LAING DIGIT EIGHT
A9F9>0039 # MYANMAR TAI LAING DIGIT NINE
AA50>0030 # CHAM DIGIT ZERO AA50>0030 # CHAM DIGIT ZERO
AA51>0031 # CHAM DIGIT ONE AA51>0031 # CHAM DIGIT ONE
AA52>0032 # CHAM DIGIT TWO AA52>0032 # CHAM DIGIT TWO
@ -480,6 +500,36 @@ ABF9>0039 # MEETEI MAYEK DIGIT NINE
111D7>0037 # SHARADA DIGIT SEVEN 111D7>0037 # SHARADA DIGIT SEVEN
111D8>0038 # SHARADA DIGIT EIGHT 111D8>0038 # SHARADA DIGIT EIGHT
111D9>0039 # SHARADA DIGIT NINE 111D9>0039 # SHARADA DIGIT NINE
112F0>0030 # KHUDAWADI DIGIT ZERO
112F1>0031 # KHUDAWADI DIGIT ONE
112F2>0032 # KHUDAWADI DIGIT TWO
112F3>0033 # KHUDAWADI DIGIT THREE
112F4>0034 # KHUDAWADI DIGIT FOUR
112F5>0035 # KHUDAWADI DIGIT FIVE
112F6>0036 # KHUDAWADI DIGIT SIX
112F7>0037 # KHUDAWADI DIGIT SEVEN
112F8>0038 # KHUDAWADI DIGIT EIGHT
112F9>0039 # KHUDAWADI DIGIT NINE
114D0>0030 # TIRHUTA DIGIT ZERO
114D1>0031 # TIRHUTA DIGIT ONE
114D2>0032 # TIRHUTA DIGIT TWO
114D3>0033 # TIRHUTA DIGIT THREE
114D4>0034 # TIRHUTA DIGIT FOUR
114D5>0035 # TIRHUTA DIGIT FIVE
114D6>0036 # TIRHUTA DIGIT SIX
114D7>0037 # TIRHUTA DIGIT SEVEN
114D8>0038 # TIRHUTA DIGIT EIGHT
114D9>0039 # TIRHUTA DIGIT NINE
11650>0030 # MODI DIGIT ZERO
11651>0031 # MODI DIGIT ONE
11652>0032 # MODI DIGIT TWO
11653>0033 # MODI DIGIT THREE
11654>0034 # MODI DIGIT FOUR
11655>0035 # MODI DIGIT FIVE
11656>0036 # MODI DIGIT SIX
11657>0037 # MODI DIGIT SEVEN
11658>0038 # MODI DIGIT EIGHT
11659>0039 # MODI DIGIT NINE
116C0>0030 # TAKRI DIGIT ZERO 116C0>0030 # TAKRI DIGIT ZERO
116C1>0031 # TAKRI DIGIT ONE 116C1>0031 # TAKRI DIGIT ONE
116C2>0032 # TAKRI DIGIT TWO 116C2>0032 # TAKRI DIGIT TWO
@ -490,4 +540,34 @@ ABF9>0039 # MEETEI MAYEK DIGIT NINE
116C7>0037 # TAKRI DIGIT SEVEN 116C7>0037 # TAKRI DIGIT SEVEN
116C8>0038 # TAKRI DIGIT EIGHT 116C8>0038 # TAKRI DIGIT EIGHT
116C9>0039 # TAKRI DIGIT NINE 116C9>0039 # TAKRI DIGIT NINE
118E0>0030 # WARANG CITI DIGIT ZERO
118E1>0031 # WARANG CITI DIGIT ONE
118E2>0032 # WARANG CITI DIGIT TWO
118E3>0033 # WARANG CITI DIGIT THREE
118E4>0034 # WARANG CITI DIGIT FOUR
118E5>0035 # WARANG CITI DIGIT FIVE
118E6>0036 # WARANG CITI DIGIT SIX
118E7>0037 # WARANG CITI DIGIT SEVEN
118E8>0038 # WARANG CITI DIGIT EIGHT
118E9>0039 # WARANG CITI DIGIT NINE
16A60>0030 # MRO DIGIT ZERO
16A61>0031 # MRO DIGIT ONE
16A62>0032 # MRO DIGIT TWO
16A63>0033 # MRO DIGIT THREE
16A64>0034 # MRO DIGIT FOUR
16A65>0035 # MRO DIGIT FIVE
16A66>0036 # MRO DIGIT SIX
16A67>0037 # MRO DIGIT SEVEN
16A68>0038 # MRO DIGIT EIGHT
16A69>0039 # MRO DIGIT NINE
16B50>0030 # PAHAWH HMONG DIGIT ZERO
16B51>0031 # PAHAWH HMONG DIGIT ONE
16B52>0032 # PAHAWH HMONG DIGIT TWO
16B53>0033 # PAHAWH HMONG DIGIT THREE
16B54>0034 # PAHAWH HMONG DIGIT FOUR
16B55>0035 # PAHAWH HMONG DIGIT FIVE
16B56>0036 # PAHAWH HMONG DIGIT SIX
16B57>0037 # PAHAWH HMONG DIGIT SEVEN
16B58>0038 # PAHAWH HMONG DIGIT EIGHT
16B59>0039 # PAHAWH HMONG DIGIT NINE

View File

@ -1,4 +1,4 @@
# Copyright (C) 1999-2013, International Business Machines # Copyright (C) 1999-2014, International Business Machines
# Corporation and others. All Rights Reserved. # Corporation and others. All Rights Reserved.
# #
# file name: nfc.txt # file name: nfc.txt
@ -7,7 +7,7 @@
# #
# Complete data for Unicode NFC normalization. # Complete data for Unicode NFC normalization.
* Unicode 6.3.0 * Unicode 7.0.0
# Canonical_Combining_Class (ccc) values # Canonical_Combining_Class (ccc) values
0300..0314:230 0300..0314:230
@ -142,7 +142,7 @@
08F6:220 08F6:220
08F7..08F8:230 08F7..08F8:230
08F9..08FA:220 08F9..08FA:220
08FB..08FE:230 08FB..08FF:230
093C:7 093C:7
094D:9 094D:9
0951:230 0951:230
@ -199,6 +199,10 @@
1A60:9 1A60:9
1A75..1A7C:230 1A75..1A7C:230
1A7F:220 1A7F:220
1AB0..1AB4:230
1AB5..1ABA:220
1ABB..1ABC:230
1ABD:220
1B34:7 1B34:7
1B44:9 1B44:9
1B6B:230 1B6B:230
@ -217,6 +221,7 @@
1CE2..1CE8:1 1CE2..1CE8:1
1CED:220 1CED:220
1CF4:230 1CF4:230
1CF8..1CF9:230
1DC0..1DC1:230 1DC0..1DC1:230
1DC2:220 1DC2:220
1DC3..1DC9:230 1DC3..1DC9:230
@ -226,7 +231,7 @@
1DCE:214 1DCE:214
1DCF:220 1DCF:220
1DD0:202 1DD0:202
1DD1..1DE6:230 1DD1..1DF5:230
1DFC:233 1DFC:233
1DFD:220 1DFD:220
1DFE:230 1DFE:230
@ -274,21 +279,44 @@ AAF6:9
ABED:9 ABED:9
FB1E:26 FB1E:26
FE20..FE26:230 FE20..FE26:230
FE27..FE2D:220
101FD:220 101FD:220
102E0:220
10376..1037A:230
10A0D:220 10A0D:220
10A0F:230 10A0F:230
10A38:230 10A38:230
10A39:1 10A39:1
10A3A:220 10A3A:220
10A3F:9 10A3F:9
10AE5:230
10AE6:220
11046:9 11046:9
1107F:9
110B9:9 110B9:9
110BA:7 110BA:7
11100..11102:230 11100..11102:230
11133..11134:9 11133..11134:9
11173:7
111C0:9 111C0:9
11235:9
11236:7
112E9:7
112EA:9
1133C:7
1134D:9
11366..1136C:230
11370..11374:230
114C2:9
114C3:7
115BF:9
115C0:7
1163F:9
116B6:9 116B6:9
116B7:7 116B7:7
16AF0..16AF4:1
16B30..16B36:230
1BC9E:1
1D165..1D166:216 1D165..1D166:216
1D167..1D169:1 1D167..1D169:1
1D16D:226 1D16D:226
@ -298,6 +326,7 @@ FE20..FE26:230
1D18A..1D18B:220 1D18A..1D18B:220
1D1AA..1D1AD:230 1D1AA..1D1AD:230
1D242..1D244:230 1D242..1D244:230
1E8D0..1E8D6:220
# Canonical decomposition mappings # Canonical decomposition mappings
00C0>0041 0300 # one-way: diacritic 0300 00C0>0041 0300 # one-way: diacritic 0300
@ -1798,6 +1827,13 @@ FB4E>05E4 05BF
110AB>110A5 110BA # one-way: diacritic 110BA 110AB>110A5 110BA # one-way: diacritic 110BA
1112E=11131 11127 1112E=11131 11127
1112F=11132 11127 1112F=11132 11127
1134B=11347 1133E
1134C=11347 11357
114BB=114B9 114BA
114BC=114B9 114B0
114BE=114B9 114BD
115BA=115B8 115AF
115BB=115B9 115AF
1D15E>1D157 1D165 1D15E>1D157 1D165
1D15F>1D158 1D165 1D15F>1D158 1D165
1D160>1D15F 1D16E 1D160>1D15F 1D16E

View File

@ -1,4 +1,4 @@
# Copyright (C) 1999-2013, International Business Machines # Copyright (C) 1999-2014, International Business Machines
# Corporation and others. All Rights Reserved. # Corporation and others. All Rights Reserved.
# #
# file name: nfkc.txt # file name: nfkc.txt
@ -11,7 +11,7 @@
# to NFKC one-way mappings. # to NFKC one-way mappings.
# Use this file as the second gennorm2 input file after nfc.txt. # Use this file as the second gennorm2 input file after nfc.txt.
* Unicode 6.3.0 * Unicode 7.0.0
00A0>0020 00A0>0020
00A8>0020 0308 00A8>0020 0308
@ -1361,9 +1361,15 @@
33FD>0033 0030 65E5 33FD>0033 0030 65E5
33FE>0033 0031 65E5 33FE>0033 0031 65E5
33FF>0067 0061 006C 33FF>0067 0061 006C
A69C>044A
A69D>044C
A770>A76F A770>A76F
A7F8>0126 A7F8>0126
A7F9>0153 A7F9>0153
AB5C>A727
AB5D>AB37
AB5E>026B
AB5F>AB52
FB00>0066 0066 FB00>0066 0066
FB01>0066 0069 FB01>0066 0069
FB02>0066 006C FB02>0066 006C

View File

@ -1,5 +1,5 @@
# Unicode Character Database # Unicode Character Database
# Copyright (c) 1991-2013 Unicode, Inc. # Copyright (c) 1991-2014 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html # For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see http://www.unicode.org/reports/tr44/ # For documentation, see http://www.unicode.org/reports/tr44/
# #
@ -12,7 +12,7 @@
# and reformatted into syntax for the gennorm2 Normalizer2 data generator tool. # and reformatted into syntax for the gennorm2 Normalizer2 data generator tool.
# Use this file as the third gennorm2 input file after nfc.txt and nfkc.txt. # Use this file as the third gennorm2 input file after nfc.txt and nfkc.txt.
* Unicode 6.3.0 * Unicode 7.0.0
0041>0061 0041>0061
0042>0062 0042>0062
@ -286,6 +286,7 @@
0376>0377 0376>0377
037A>0020 03B9 037A>0020 03B9
037E>003B 037E>003B
037F>03F3
0384>0020 0301 0384>0020 0301
0385>0020 0308 0301 0385>0020 0308 0301
0386>03AC 0386>03AC
@ -498,6 +499,10 @@
0522>0523 0522>0523
0524>0525 0524>0525
0526>0527 0526>0527
0528>0529
052A>052B
052C>052D
052E>052F
0531>0561 0531>0561
0532>0562 0532>0562
0533>0563 0533>0563
@ -2308,6 +2313,10 @@ A690>A691
A692>A693 A692>A693
A694>A695 A694>A695
A696>A697 A696>A697
A698>A699
A69A>A69B
A69C>044A
A69D>044C
A722>A723 A722>A723
A724>A725 A724>A725
A726>A727 A726>A727
@ -2359,14 +2368,28 @@ A78B>A78C
A78D>0265 A78D>0265
A790>A791 A790>A791
A792>A793 A792>A793
A796>A797
A798>A799
A79A>A79B
A79C>A79D
A79E>A79F
A7A0>A7A1 A7A0>A7A1
A7A2>A7A3 A7A2>A7A3
A7A4>A7A5 A7A4>A7A5
A7A6>A7A7 A7A6>A7A7
A7A8>A7A9 A7A8>A7A9
A7AA>0266 A7AA>0266
A7AB>025C
A7AC>0261
A7AD>026C
A7B0>029E
A7B1>0287
A7F8>0127 A7F8>0127
A7F9>0153 A7F9>0153
AB5C>A727
AB5D>AB37
AB5E>026B
AB5F>AB52
F900>8C48 F900>8C48
F901>66F4 F901>66F4
F902>8ECA F902>8ECA
@ -3743,6 +3766,39 @@ FFF0..FFF8>
10425>1044D 10425>1044D
10426>1044E 10426>1044E
10427>1044F 10427>1044F
118A0>118C0
118A1>118C1
118A2>118C2
118A3>118C3
118A4>118C4
118A5>118C5
118A6>118C6
118A7>118C7
118A8>118C8
118A9>118C9
118AA>118CA
118AB>118CB
118AC>118CC
118AD>118CD
118AE>118CE
118AF>118CF
118B0>118D0
118B1>118D1
118B2>118D2
118B3>118D3
118B4>118D4
118B5>118D5
118B6>118D6
118B7>118D7
118B8>118D8
118B9>118D9
118BA>118DA
118BB>118DB
118BC>118DC
118BD>118DD
118BE>118DE
118BF>118DF
1BCA0..1BCA3>
1D15E>1D157 1D165 1D15E>1D157 1D165
1D15F>1D158 1D165 1D15F>1D158 1D165
1D160>1D158 1D165 1D16E 1D160>1D158 1D165 1D16E

View File

@ -35,8 +35,8 @@ import com.ibm.icu.util.ULocale;
* ({@link BreakIterator#getWordInstance(ULocale) BreakIterator.getWordInstance(ULocale.ROOT)}), * ({@link BreakIterator#getWordInstance(ULocale) BreakIterator.getWordInstance(ULocale.ROOT)}),
* but with the following tailorings: * but with the following tailorings:
* <ul> * <ul>
* <li>Thai, Lao, and CJK text is broken into words with a dictionary. * <li>Thai, Lao, Myanmar, and CJK text is broken into words with a dictionary.
* <li>Myanmar, and Khmer text is broken into syllables * <li>Khmer text is broken into syllables
* based on custom BreakIterator rules. * based on custom BreakIterator rules.
* </ul> * </ul>
* @lucene.experimental * @lucene.experimental
@ -67,8 +67,6 @@ public class DefaultICUTokenizerConfig extends ICUTokenizerConfig {
readBreakIterator("Default.brk"); readBreakIterator("Default.brk");
private static final BreakIterator khmerBreakIterator = private static final BreakIterator khmerBreakIterator =
readBreakIterator("Khmer.brk"); readBreakIterator("Khmer.brk");
private static final BreakIterator myanmarBreakIterator =
readBreakIterator("Myanmar.brk");
// TODO: deprecate this boolean? you only care if you are doing super-expert stuff... // TODO: deprecate this boolean? you only care if you are doing super-expert stuff...
private final boolean cjkAsWords; private final boolean cjkAsWords;
@ -94,7 +92,6 @@ public class DefaultICUTokenizerConfig extends ICUTokenizerConfig {
public BreakIterator getBreakIterator(int script) { public BreakIterator getBreakIterator(int script) {
switch(script) { switch(script) {
case UScript.KHMER: return (BreakIterator)khmerBreakIterator.clone(); case UScript.KHMER: return (BreakIterator)khmerBreakIterator.clone();
case UScript.MYANMAR: return (BreakIterator)myanmarBreakIterator.clone();
case UScript.JAPANESE: return (BreakIterator)cjkBreakIterator.clone(); case UScript.JAPANESE: return (BreakIterator)cjkBreakIterator.clone();
default: return (BreakIterator)defaultBreakIterator.clone(); default: return (BreakIterator)defaultBreakIterator.clone();
} }

View File

@ -122,6 +122,10 @@ public class TestICUTokenizer extends BaseTokenStreamTestCase {
assertAnalyzesTo(a, "ພາສາລາວ", new String[] { "ພາສາ", "ລາວ"}, new String[] { "<ALPHANUM>", "<ALPHANUM>" }); assertAnalyzesTo(a, "ພາສາລາວ", new String[] { "ພາສາ", "ລາວ"}, new String[] { "<ALPHANUM>", "<ALPHANUM>" });
} }
public void testMyanmar() throws Exception {
assertAnalyzesTo(a, "သက်ဝင်လှုပ်ရှားစေပြီး", new String[] { "သက်ဝင်", "လှုပ်ရှား", "စေ", "ပြီး" });
}
public void testThai() throws Exception { public void testThai() throws Exception {
assertAnalyzesTo(a, "การที่ได้ต้องแสดงว่างานดี. แล้วเธอจะไปไหน? ๑๒๓๔", assertAnalyzesTo(a, "การที่ได้ต้องแสดงว่างานดี. แล้วเธอจะไปไหน? ๑๒๓๔",
new String[] { "การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี", "แล้ว", "เธอ", "จะ", "ไป", "ไหน", "๑๒๓๔"}); new String[] { "การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี", "แล้ว", "เธอ", "จะ", "ไป", "ไหน", "๑๒๓๔"});

View File

@ -63,7 +63,7 @@ import java.util.regex.Pattern;
public class GenerateUTR30DataFiles { public class GenerateUTR30DataFiles {
private static final String ICU_SVN_TAG_URL private static final String ICU_SVN_TAG_URL
= "http://source.icu-project.org/repos/icu/icu/tags"; = "http://source.icu-project.org/repos/icu/icu/tags";
private static final String ICU_RELEASE_TAG = "release-52-1"; private static final String ICU_RELEASE_TAG = "release-54-1";
private static final String ICU_DATA_NORM2_PATH = "source/data/unidata/norm2"; private static final String ICU_DATA_NORM2_PATH = "source/data/unidata/norm2";
private static final String NFC_TXT = "nfc.txt"; private static final String NFC_TXT = "nfc.txt";
private static final String NFKC_TXT = "nfkc.txt"; private static final String NFKC_TXT = "nfkc.txt";

View File

@ -51,7 +51,7 @@ public class Lucene40DocValuesFormat extends DocValuesFormat {
String filename = IndexFileNames.segmentFileName(state.segmentInfo.name, String filename = IndexFileNames.segmentFileName(state.segmentInfo.name,
"dv", "dv",
Lucene40CompoundFormat.COMPOUND_FILE_EXTENSION); Lucene40CompoundFormat.COMPOUND_FILE_EXTENSION);
return new Lucene40DocValuesReader(state, filename, Lucene40FieldInfosReader.LEGACY_DV_TYPE_KEY); return new Lucene40DocValuesReader(state, filename, Lucene40FieldInfosFormat.LEGACY_DV_TYPE_KEY);
} }
// constants for VAR_INTS // constants for VAR_INTS

View File

@ -24,7 +24,7 @@ import java.util.concurrent.atomic.AtomicLong;
import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.DocValuesProducer; import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.codecs.lucene40.Lucene40FieldInfosReader.LegacyDocValuesType; import org.apache.lucene.codecs.lucene40.Lucene40FieldInfosFormat.LegacyDocValuesType;
import org.apache.lucene.index.BinaryDocValues; import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfo;

View File

@ -18,10 +18,22 @@ package org.apache.lucene.codecs.lucene40;
*/ */
import java.io.IOException; import java.io.IOException;
import java.util.Collections;
import java.util.Map;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.FieldInfosFormat; import org.apache.lucene.codecs.FieldInfosFormat;
import org.apache.lucene.codecs.FieldInfosReader; import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.codecs.FieldInfosWriter; import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.FieldInfo.DocValuesType;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.IOUtils;
/** /**
* Lucene 4.0 Field Infos format. * Lucene 4.0 Field Infos format.
@ -29,22 +41,119 @@ import org.apache.lucene.codecs.FieldInfosWriter;
*/ */
@Deprecated @Deprecated
public class Lucene40FieldInfosFormat extends FieldInfosFormat { public class Lucene40FieldInfosFormat extends FieldInfosFormat {
private final FieldInfosReader reader = new Lucene40FieldInfosReader();
/** Sole constructor. */ /** Sole constructor. */
public Lucene40FieldInfosFormat() { public Lucene40FieldInfosFormat() {
} }
@Override @Override
public final FieldInfosReader getFieldInfosReader() throws IOException { public final FieldInfos read(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, IOContext iocontext) throws IOException {
return reader; final String fileName = IndexFileNames.segmentFileName(segmentInfo.name, "", Lucene40FieldInfosFormat.FIELD_INFOS_EXTENSION);
} IndexInput input = directory.openInput(fileName, iocontext);
boolean success = false;
try {
CodecUtil.checkHeader(input, Lucene40FieldInfosFormat.CODEC_NAME,
Lucene40FieldInfosFormat.FORMAT_START,
Lucene40FieldInfosFormat.FORMAT_CURRENT);
@Override final int size = input.readVInt(); //read in the size
public FieldInfosWriter getFieldInfosWriter() throws IOException { FieldInfo infos[] = new FieldInfo[size];
throw new UnsupportedOperationException("this codec can only be used for reading");
for (int i = 0; i < size; i++) {
String name = input.readString();
final int fieldNumber = input.readVInt();
byte bits = input.readByte();
boolean isIndexed = (bits & Lucene40FieldInfosFormat.IS_INDEXED) != 0;
boolean storeTermVector = (bits & Lucene40FieldInfosFormat.STORE_TERMVECTOR) != 0;
boolean omitNorms = (bits & Lucene40FieldInfosFormat.OMIT_NORMS) != 0;
boolean storePayloads = (bits & Lucene40FieldInfosFormat.STORE_PAYLOADS) != 0;
final IndexOptions indexOptions;
if (!isIndexed) {
indexOptions = null;
} else if ((bits & Lucene40FieldInfosFormat.OMIT_TERM_FREQ_AND_POSITIONS) != 0) {
indexOptions = IndexOptions.DOCS_ONLY;
} else if ((bits & Lucene40FieldInfosFormat.OMIT_POSITIONS) != 0) {
indexOptions = IndexOptions.DOCS_AND_FREQS;
} else if ((bits & Lucene40FieldInfosFormat.STORE_OFFSETS_IN_POSTINGS) != 0) {
indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
} else {
indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
}
// LUCENE-3027: past indices were able to write
// storePayloads=true when omitTFAP is also true,
// which is invalid. We correct that, here:
if (isIndexed && indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
storePayloads = false;
}
// DV Types are packed in one byte
byte val = input.readByte();
final LegacyDocValuesType oldValuesType = getDocValuesType((byte) (val & 0x0F));
final LegacyDocValuesType oldNormsType = getDocValuesType((byte) ((val >>> 4) & 0x0F));
final Map<String,String> attributes = input.readStringStringMap();;
if (oldValuesType.mapping != null) {
attributes.put(LEGACY_DV_TYPE_KEY, oldValuesType.name());
}
if (oldNormsType.mapping != null) {
if (oldNormsType.mapping != DocValuesType.NUMERIC) {
throw new CorruptIndexException("invalid norm type: " + oldNormsType, input);
}
attributes.put(LEGACY_NORM_TYPE_KEY, oldNormsType.name());
}
infos[i] = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector,
omitNorms, storePayloads, indexOptions, oldValuesType.mapping, oldNormsType.mapping, -1, Collections.unmodifiableMap(attributes));
}
CodecUtil.checkEOF(input);
FieldInfos fieldInfos = new FieldInfos(infos);
success = true;
return fieldInfos;
} finally {
if (success) {
input.close();
} else {
IOUtils.closeWhileHandlingException(input);
}
}
} }
static final String LEGACY_DV_TYPE_KEY = Lucene40FieldInfosFormat.class.getSimpleName() + ".dvtype";
static final String LEGACY_NORM_TYPE_KEY = Lucene40FieldInfosFormat.class.getSimpleName() + ".normtype";
// mapping of 4.0 types -> 4.2 types
static enum LegacyDocValuesType {
NONE(null),
VAR_INTS(DocValuesType.NUMERIC),
FLOAT_32(DocValuesType.NUMERIC),
FLOAT_64(DocValuesType.NUMERIC),
BYTES_FIXED_STRAIGHT(DocValuesType.BINARY),
BYTES_FIXED_DEREF(DocValuesType.BINARY),
BYTES_VAR_STRAIGHT(DocValuesType.BINARY),
BYTES_VAR_DEREF(DocValuesType.BINARY),
FIXED_INTS_16(DocValuesType.NUMERIC),
FIXED_INTS_32(DocValuesType.NUMERIC),
FIXED_INTS_64(DocValuesType.NUMERIC),
FIXED_INTS_8(DocValuesType.NUMERIC),
BYTES_FIXED_SORTED(DocValuesType.SORTED),
BYTES_VAR_SORTED(DocValuesType.SORTED);
final DocValuesType mapping;
LegacyDocValuesType(DocValuesType mapping) {
this.mapping = mapping;
}
}
// decodes a 4.0 type
private static LegacyDocValuesType getDocValuesType(byte b) {
return LegacyDocValuesType.values()[b];
}
@Override
public void write(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, FieldInfos infos, IOContext context) throws IOException {
throw new UnsupportedOperationException("this codec can only be used for reading");
}
/** Extension of field infos */ /** Extension of field infos */
static final String FIELD_INFOS_EXTENSION = "fnm"; static final String FIELD_INFOS_EXTENSION = "fnm";

View File

@ -1,151 +0,0 @@
package org.apache.lucene.codecs.lucene40;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.Collections;
import java.util.Map;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.FieldInfosReader;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.FieldInfo.DocValuesType;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.IOUtils;
/**
* Lucene 4.0 FieldInfos reader.
* @deprecated Only for reading old 4.0 and 4.1 segments
*/
@Deprecated
final class Lucene40FieldInfosReader extends FieldInfosReader {
/** Sole constructor. */
public Lucene40FieldInfosReader() {
}
@Override
public FieldInfos read(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, IOContext iocontext) throws IOException {
final String fileName = IndexFileNames.segmentFileName(segmentInfo.name, "", Lucene40FieldInfosFormat.FIELD_INFOS_EXTENSION);
IndexInput input = directory.openInput(fileName, iocontext);
boolean success = false;
try {
CodecUtil.checkHeader(input, Lucene40FieldInfosFormat.CODEC_NAME,
Lucene40FieldInfosFormat.FORMAT_START,
Lucene40FieldInfosFormat.FORMAT_CURRENT);
final int size = input.readVInt(); //read in the size
FieldInfo infos[] = new FieldInfo[size];
for (int i = 0; i < size; i++) {
String name = input.readString();
final int fieldNumber = input.readVInt();
byte bits = input.readByte();
boolean isIndexed = (bits & Lucene40FieldInfosFormat.IS_INDEXED) != 0;
boolean storeTermVector = (bits & Lucene40FieldInfosFormat.STORE_TERMVECTOR) != 0;
boolean omitNorms = (bits & Lucene40FieldInfosFormat.OMIT_NORMS) != 0;
boolean storePayloads = (bits & Lucene40FieldInfosFormat.STORE_PAYLOADS) != 0;
final IndexOptions indexOptions;
if (!isIndexed) {
indexOptions = null;
} else if ((bits & Lucene40FieldInfosFormat.OMIT_TERM_FREQ_AND_POSITIONS) != 0) {
indexOptions = IndexOptions.DOCS_ONLY;
} else if ((bits & Lucene40FieldInfosFormat.OMIT_POSITIONS) != 0) {
indexOptions = IndexOptions.DOCS_AND_FREQS;
} else if ((bits & Lucene40FieldInfosFormat.STORE_OFFSETS_IN_POSTINGS) != 0) {
indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
} else {
indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
}
// LUCENE-3027: past indices were able to write
// storePayloads=true when omitTFAP is also true,
// which is invalid. We correct that, here:
if (isIndexed && indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
storePayloads = false;
}
// DV Types are packed in one byte
byte val = input.readByte();
final LegacyDocValuesType oldValuesType = getDocValuesType((byte) (val & 0x0F));
final LegacyDocValuesType oldNormsType = getDocValuesType((byte) ((val >>> 4) & 0x0F));
final Map<String,String> attributes = input.readStringStringMap();;
if (oldValuesType.mapping != null) {
attributes.put(LEGACY_DV_TYPE_KEY, oldValuesType.name());
}
if (oldNormsType.mapping != null) {
if (oldNormsType.mapping != DocValuesType.NUMERIC) {
throw new CorruptIndexException("invalid norm type: " + oldNormsType, input);
}
attributes.put(LEGACY_NORM_TYPE_KEY, oldNormsType.name());
}
infos[i] = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector,
omitNorms, storePayloads, indexOptions, oldValuesType.mapping, oldNormsType.mapping, -1, Collections.unmodifiableMap(attributes));
}
CodecUtil.checkEOF(input);
FieldInfos fieldInfos = new FieldInfos(infos);
success = true;
return fieldInfos;
} finally {
if (success) {
input.close();
} else {
IOUtils.closeWhileHandlingException(input);
}
}
}
static final String LEGACY_DV_TYPE_KEY = Lucene40FieldInfosReader.class.getSimpleName() + ".dvtype";
static final String LEGACY_NORM_TYPE_KEY = Lucene40FieldInfosReader.class.getSimpleName() + ".normtype";
// mapping of 4.0 types -> 4.2 types
static enum LegacyDocValuesType {
NONE(null),
VAR_INTS(DocValuesType.NUMERIC),
FLOAT_32(DocValuesType.NUMERIC),
FLOAT_64(DocValuesType.NUMERIC),
BYTES_FIXED_STRAIGHT(DocValuesType.BINARY),
BYTES_FIXED_DEREF(DocValuesType.BINARY),
BYTES_VAR_STRAIGHT(DocValuesType.BINARY),
BYTES_VAR_DEREF(DocValuesType.BINARY),
FIXED_INTS_16(DocValuesType.NUMERIC),
FIXED_INTS_32(DocValuesType.NUMERIC),
FIXED_INTS_64(DocValuesType.NUMERIC),
FIXED_INTS_8(DocValuesType.NUMERIC),
BYTES_FIXED_SORTED(DocValuesType.SORTED),
BYTES_VAR_SORTED(DocValuesType.SORTED);
final DocValuesType mapping;
LegacyDocValuesType(DocValuesType mapping) {
this.mapping = mapping;
}
}
// decodes a 4.0 type
private static LegacyDocValuesType getDocValuesType(byte b) {
return LegacyDocValuesType.values()[b];
}
}

View File

@ -40,7 +40,7 @@ final class Lucene40NormsReader extends NormsProducer {
} }
Lucene40NormsReader(SegmentReadState state, String filename) throws IOException { Lucene40NormsReader(SegmentReadState state, String filename) throws IOException {
impl = new Lucene40DocValuesReader(state, filename, Lucene40FieldInfosReader.LEGACY_NORM_TYPE_KEY); impl = new Lucene40DocValuesReader(state, filename, Lucene40FieldInfosFormat.LEGACY_NORM_TYPE_KEY);
} }
@Override @Override

View File

@ -17,10 +17,21 @@ package org.apache.lucene.codecs.lucene40;
* limitations under the License. * limitations under the License.
*/ */
import java.io.IOException;
import java.text.ParseException;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.SegmentInfoFormat; import org.apache.lucene.codecs.SegmentInfoFormat;
import org.apache.lucene.codecs.SegmentInfoReader; import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.codecs.SegmentInfoWriter; import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.Version;
/** /**
* Lucene 4.0 Segment info format. * Lucene 4.0 Segment info format.
@ -28,19 +39,55 @@ import org.apache.lucene.index.SegmentInfo;
*/ */
@Deprecated @Deprecated
public class Lucene40SegmentInfoFormat extends SegmentInfoFormat { public class Lucene40SegmentInfoFormat extends SegmentInfoFormat {
private final SegmentInfoReader reader = new Lucene40SegmentInfoReader();
/** Sole constructor. */ /** Sole constructor. */
public Lucene40SegmentInfoFormat() { public Lucene40SegmentInfoFormat() {
} }
@Override @Override
public final SegmentInfoReader getSegmentInfoReader() { public final SegmentInfo read(Directory dir, String segment, IOContext context) throws IOException {
return reader; final String fileName = IndexFileNames.segmentFileName(segment, "", Lucene40SegmentInfoFormat.SI_EXTENSION);
final IndexInput input = dir.openInput(fileName, context);
boolean success = false;
try {
CodecUtil.checkHeader(input, Lucene40SegmentInfoFormat.CODEC_NAME,
Lucene40SegmentInfoFormat.VERSION_START,
Lucene40SegmentInfoFormat.VERSION_CURRENT);
final Version version;
try {
version = Version.parse(input.readString());
} catch (ParseException pe) {
throw new CorruptIndexException("unable to parse version string: " + pe.getMessage(), input, pe);
}
final int docCount = input.readInt();
if (docCount < 0) {
throw new CorruptIndexException("invalid docCount: " + docCount, input);
}
final boolean isCompoundFile = input.readByte() == SegmentInfo.YES;
final Map<String,String> diagnostics = input.readStringStringMap();
input.readStringStringMap(); // read deprecated attributes
final Set<String> files = input.readStringSet();
CodecUtil.checkEOF(input);
final SegmentInfo si = new SegmentInfo(dir, version, segment, docCount, isCompoundFile, null, diagnostics, null);
si.setFiles(files);
success = true;
return si;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(input);
} else {
input.close();
}
}
} }
@Override @Override
public SegmentInfoWriter getSegmentInfoWriter() { public void write(Directory dir, SegmentInfo info, IOContext ioContext) throws IOException {
throw new UnsupportedOperationException("this codec can only be used for reading"); throw new UnsupportedOperationException("this codec can only be used for reading");
} }

View File

@ -1,88 +0,0 @@
package org.apache.lucene.codecs.lucene40;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.text.ParseException;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.SegmentInfoReader;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.Version;
/**
* Lucene 4.0 SI reader
* @deprecated Only for reading old 4.0-4.5 segments
*/
@Deprecated
final class Lucene40SegmentInfoReader extends SegmentInfoReader {
/** Sole constructor. */
public Lucene40SegmentInfoReader() {
}
@Override
public SegmentInfo read(Directory dir, String segment, IOContext context) throws IOException {
final String fileName = IndexFileNames.segmentFileName(segment, "", Lucene40SegmentInfoFormat.SI_EXTENSION);
final IndexInput input = dir.openInput(fileName, context);
boolean success = false;
try {
CodecUtil.checkHeader(input, Lucene40SegmentInfoFormat.CODEC_NAME,
Lucene40SegmentInfoFormat.VERSION_START,
Lucene40SegmentInfoFormat.VERSION_CURRENT);
final Version version;
try {
version = Version.parse(input.readString());
} catch (ParseException pe) {
throw new CorruptIndexException("unable to parse version string: " + pe.getMessage(), input, pe);
}
final int docCount = input.readInt();
if (docCount < 0) {
throw new CorruptIndexException("invalid docCount: " + docCount, input);
}
final boolean isCompoundFile = input.readByte() == SegmentInfo.YES;
final Map<String,String> diagnostics = input.readStringStringMap();
input.readStringStringMap(); // read deprecated attributes
final Set<String> files = input.readStringSet();
CodecUtil.checkEOF(input);
final SegmentInfo si = new SegmentInfo(dir, version, segment, docCount, isCompoundFile, null, diagnostics, null);
si.setFiles(files);
success = true;
return si;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(input);
} else {
input.close();
}
}
}
}

View File

@ -18,10 +18,22 @@ package org.apache.lucene.codecs.lucene42;
*/ */
import java.io.IOException; import java.io.IOException;
import java.util.Collections;
import java.util.Map;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.FieldInfosFormat; import org.apache.lucene.codecs.FieldInfosFormat;
import org.apache.lucene.codecs.FieldInfosReader; import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.codecs.FieldInfosWriter; import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.FieldInfo.DocValuesType;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.IOUtils;
/** /**
* Lucene 4.2 Field Infos format. * Lucene 4.2 Field Infos format.
@ -29,22 +41,89 @@ import org.apache.lucene.codecs.FieldInfosWriter;
*/ */
@Deprecated @Deprecated
public class Lucene42FieldInfosFormat extends FieldInfosFormat { public class Lucene42FieldInfosFormat extends FieldInfosFormat {
private final FieldInfosReader reader = new Lucene42FieldInfosReader();
/** Sole constructor. */ /** Sole constructor. */
public Lucene42FieldInfosFormat() { public Lucene42FieldInfosFormat() {
} }
@Override @Override
public final FieldInfosReader getFieldInfosReader() throws IOException { public FieldInfos read(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, IOContext iocontext) throws IOException {
return reader; final String fileName = IndexFileNames.segmentFileName(segmentInfo.name, "", Lucene42FieldInfosFormat.EXTENSION);
} IndexInput input = directory.openInput(fileName, iocontext);
boolean success = false;
try {
CodecUtil.checkHeader(input, Lucene42FieldInfosFormat.CODEC_NAME,
Lucene42FieldInfosFormat.FORMAT_START,
Lucene42FieldInfosFormat.FORMAT_CURRENT);
@Override final int size = input.readVInt(); //read in the size
public FieldInfosWriter getFieldInfosWriter() throws IOException { FieldInfo infos[] = new FieldInfo[size];
throw new UnsupportedOperationException("this codec can only be used for reading");
for (int i = 0; i < size; i++) {
String name = input.readString();
final int fieldNumber = input.readVInt();
byte bits = input.readByte();
boolean isIndexed = (bits & Lucene42FieldInfosFormat.IS_INDEXED) != 0;
boolean storeTermVector = (bits & Lucene42FieldInfosFormat.STORE_TERMVECTOR) != 0;
boolean omitNorms = (bits & Lucene42FieldInfosFormat.OMIT_NORMS) != 0;
boolean storePayloads = (bits & Lucene42FieldInfosFormat.STORE_PAYLOADS) != 0;
final IndexOptions indexOptions;
if (!isIndexed) {
indexOptions = null;
} else if ((bits & Lucene42FieldInfosFormat.OMIT_TERM_FREQ_AND_POSITIONS) != 0) {
indexOptions = IndexOptions.DOCS_ONLY;
} else if ((bits & Lucene42FieldInfosFormat.OMIT_POSITIONS) != 0) {
indexOptions = IndexOptions.DOCS_AND_FREQS;
} else if ((bits & Lucene42FieldInfosFormat.STORE_OFFSETS_IN_POSTINGS) != 0) {
indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
} else {
indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
}
// DV Types are packed in one byte
byte val = input.readByte();
final DocValuesType docValuesType = getDocValuesType(input, (byte) (val & 0x0F));
final DocValuesType normsType = getDocValuesType(input, (byte) ((val >>> 4) & 0x0F));
final Map<String,String> attributes = input.readStringStringMap();
infos[i] = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector,
omitNorms, storePayloads, indexOptions, docValuesType, normsType, -1, Collections.unmodifiableMap(attributes));
}
CodecUtil.checkEOF(input);
FieldInfos fieldInfos = new FieldInfos(infos);
success = true;
return fieldInfos;
} finally {
if (success) {
input.close();
} else {
IOUtils.closeWhileHandlingException(input);
}
}
} }
private static DocValuesType getDocValuesType(IndexInput input, byte b) throws IOException {
if (b == 0) {
return null;
} else if (b == 1) {
return DocValuesType.NUMERIC;
} else if (b == 2) {
return DocValuesType.BINARY;
} else if (b == 3) {
return DocValuesType.SORTED;
} else if (b == 4) {
return DocValuesType.SORTED_SET;
} else {
throw new CorruptIndexException("invalid docvalues byte: " + b, input);
}
}
@Override
public void write(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, FieldInfos infos, IOContext context) throws IOException {
throw new UnsupportedOperationException("this codec can only be used for reading");
}
/** Extension of field infos */ /** Extension of field infos */
static final String EXTENSION = "fnm"; static final String EXTENSION = "fnm";

View File

@ -1,122 +0,0 @@
package org.apache.lucene.codecs.lucene42;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.Collections;
import java.util.Map;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.FieldInfosReader;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.FieldInfo.DocValuesType;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.IOUtils;
/**
* Lucene 4.2 FieldInfos reader.
*
* @deprecated Only for reading old 4.2-4.5 segments
*/
@Deprecated
final class Lucene42FieldInfosReader extends FieldInfosReader {
/** Sole constructor. */
public Lucene42FieldInfosReader() {
}
@Override
public FieldInfos read(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, IOContext iocontext) throws IOException {
final String fileName = IndexFileNames.segmentFileName(segmentInfo.name, "", Lucene42FieldInfosFormat.EXTENSION);
IndexInput input = directory.openInput(fileName, iocontext);
boolean success = false;
try {
CodecUtil.checkHeader(input, Lucene42FieldInfosFormat.CODEC_NAME,
Lucene42FieldInfosFormat.FORMAT_START,
Lucene42FieldInfosFormat.FORMAT_CURRENT);
final int size = input.readVInt(); //read in the size
FieldInfo infos[] = new FieldInfo[size];
for (int i = 0; i < size; i++) {
String name = input.readString();
final int fieldNumber = input.readVInt();
byte bits = input.readByte();
boolean isIndexed = (bits & Lucene42FieldInfosFormat.IS_INDEXED) != 0;
boolean storeTermVector = (bits & Lucene42FieldInfosFormat.STORE_TERMVECTOR) != 0;
boolean omitNorms = (bits & Lucene42FieldInfosFormat.OMIT_NORMS) != 0;
boolean storePayloads = (bits & Lucene42FieldInfosFormat.STORE_PAYLOADS) != 0;
final IndexOptions indexOptions;
if (!isIndexed) {
indexOptions = null;
} else if ((bits & Lucene42FieldInfosFormat.OMIT_TERM_FREQ_AND_POSITIONS) != 0) {
indexOptions = IndexOptions.DOCS_ONLY;
} else if ((bits & Lucene42FieldInfosFormat.OMIT_POSITIONS) != 0) {
indexOptions = IndexOptions.DOCS_AND_FREQS;
} else if ((bits & Lucene42FieldInfosFormat.STORE_OFFSETS_IN_POSTINGS) != 0) {
indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
} else {
indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
}
// DV Types are packed in one byte
byte val = input.readByte();
final DocValuesType docValuesType = getDocValuesType(input, (byte) (val & 0x0F));
final DocValuesType normsType = getDocValuesType(input, (byte) ((val >>> 4) & 0x0F));
final Map<String,String> attributes = input.readStringStringMap();
infos[i] = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector,
omitNorms, storePayloads, indexOptions, docValuesType, normsType, -1, Collections.unmodifiableMap(attributes));
}
CodecUtil.checkEOF(input);
FieldInfos fieldInfos = new FieldInfos(infos);
success = true;
return fieldInfos;
} finally {
if (success) {
input.close();
} else {
IOUtils.closeWhileHandlingException(input);
}
}
}
private static DocValuesType getDocValuesType(IndexInput input, byte b) throws IOException {
if (b == 0) {
return null;
} else if (b == 1) {
return DocValuesType.NUMERIC;
} else if (b == 2) {
return DocValuesType.BINARY;
} else if (b == 3) {
return DocValuesType.SORTED;
} else if (b == 4) {
return DocValuesType.SORTED_SET;
} else {
throw new CorruptIndexException("invalid docvalues byte: " + b, input);
}
}
}

View File

@ -18,10 +18,23 @@ package org.apache.lucene.codecs.lucene46;
*/ */
import java.io.IOException; import java.io.IOException;
import java.util.Collections;
import java.util.Map;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.FieldInfosFormat; import org.apache.lucene.codecs.FieldInfosFormat;
import org.apache.lucene.codecs.FieldInfosReader; import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.codecs.FieldInfosWriter; import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.FieldInfo.DocValuesType;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
/** /**
* Lucene 4.6 Field Infos format. * Lucene 4.6 Field Infos format.
@ -29,21 +42,139 @@ import org.apache.lucene.codecs.FieldInfosWriter;
*/ */
@Deprecated @Deprecated
public final class Lucene46FieldInfosFormat extends FieldInfosFormat { public final class Lucene46FieldInfosFormat extends FieldInfosFormat {
private final FieldInfosReader reader = new Lucene46FieldInfosReader();
private final FieldInfosWriter writer = new Lucene46FieldInfosWriter();
/** Sole constructor. */ /** Sole constructor. */
public Lucene46FieldInfosFormat() { public Lucene46FieldInfosFormat() {
} }
@Override @Override
public final FieldInfosReader getFieldInfosReader() throws IOException { public FieldInfos read(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, IOContext context) throws IOException {
return reader; final String fileName = IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, Lucene46FieldInfosFormat.EXTENSION);
try (ChecksumIndexInput input = directory.openChecksumInput(fileName, context)) {
int codecVersion = CodecUtil.checkHeader(input, Lucene46FieldInfosFormat.CODEC_NAME,
Lucene46FieldInfosFormat.FORMAT_START,
Lucene46FieldInfosFormat.FORMAT_CURRENT);
final int size = input.readVInt(); //read in the size
FieldInfo infos[] = new FieldInfo[size];
for (int i = 0; i < size; i++) {
String name = input.readString();
final int fieldNumber = input.readVInt();
if (fieldNumber < 0) {
throw new CorruptIndexException("invalid field number for field: " + name + ", fieldNumber=" + fieldNumber, input);
}
byte bits = input.readByte();
boolean isIndexed = (bits & Lucene46FieldInfosFormat.IS_INDEXED) != 0;
boolean storeTermVector = (bits & Lucene46FieldInfosFormat.STORE_TERMVECTOR) != 0;
boolean omitNorms = (bits & Lucene46FieldInfosFormat.OMIT_NORMS) != 0;
boolean storePayloads = (bits & Lucene46FieldInfosFormat.STORE_PAYLOADS) != 0;
final IndexOptions indexOptions;
if (!isIndexed) {
indexOptions = null;
} else if ((bits & Lucene46FieldInfosFormat.OMIT_TERM_FREQ_AND_POSITIONS) != 0) {
indexOptions = IndexOptions.DOCS_ONLY;
} else if ((bits & Lucene46FieldInfosFormat.OMIT_POSITIONS) != 0) {
indexOptions = IndexOptions.DOCS_AND_FREQS;
} else if ((bits & Lucene46FieldInfosFormat.STORE_OFFSETS_IN_POSTINGS) != 0) {
indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
} else {
indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
}
// DV Types are packed in one byte
byte val = input.readByte();
final DocValuesType docValuesType = getDocValuesType(input, (byte) (val & 0x0F));
final DocValuesType normsType = getDocValuesType(input, (byte) ((val >>> 4) & 0x0F));
final long dvGen = input.readLong();
final Map<String,String> attributes = input.readStringStringMap();
infos[i] = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector,
omitNorms, storePayloads, indexOptions, docValuesType, normsType, dvGen, Collections.unmodifiableMap(attributes));
}
if (codecVersion >= Lucene46FieldInfosFormat.FORMAT_CHECKSUM) {
CodecUtil.checkFooter(input);
} else {
CodecUtil.checkEOF(input);
}
return new FieldInfos(infos);
}
}
private static DocValuesType getDocValuesType(IndexInput input, byte b) throws IOException {
if (b == 0) {
return null;
} else if (b == 1) {
return DocValuesType.NUMERIC;
} else if (b == 2) {
return DocValuesType.BINARY;
} else if (b == 3) {
return DocValuesType.SORTED;
} else if (b == 4) {
return DocValuesType.SORTED_SET;
} else if (b == 5) {
return DocValuesType.SORTED_NUMERIC;
} else {
throw new CorruptIndexException("invalid docvalues byte: " + b, input);
}
} }
@Override @Override
public FieldInfosWriter getFieldInfosWriter() throws IOException { public void write(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, FieldInfos infos, IOContext context) throws IOException {
return writer; final String fileName = IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, Lucene46FieldInfosFormat.EXTENSION);
try (IndexOutput output = directory.createOutput(fileName, context)) {
CodecUtil.writeHeader(output, Lucene46FieldInfosFormat.CODEC_NAME, Lucene46FieldInfosFormat.FORMAT_CURRENT);
output.writeVInt(infos.size());
for (FieldInfo fi : infos) {
IndexOptions indexOptions = fi.getIndexOptions();
byte bits = 0x0;
if (fi.hasVectors()) bits |= Lucene46FieldInfosFormat.STORE_TERMVECTOR;
if (fi.omitsNorms()) bits |= Lucene46FieldInfosFormat.OMIT_NORMS;
if (fi.hasPayloads()) bits |= Lucene46FieldInfosFormat.STORE_PAYLOADS;
if (fi.isIndexed()) {
bits |= Lucene46FieldInfosFormat.IS_INDEXED;
assert indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0 || !fi.hasPayloads();
if (indexOptions == IndexOptions.DOCS_ONLY) {
bits |= Lucene46FieldInfosFormat.OMIT_TERM_FREQ_AND_POSITIONS;
} else if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) {
bits |= Lucene46FieldInfosFormat.STORE_OFFSETS_IN_POSTINGS;
} else if (indexOptions == IndexOptions.DOCS_AND_FREQS) {
bits |= Lucene46FieldInfosFormat.OMIT_POSITIONS;
}
}
output.writeString(fi.name);
output.writeVInt(fi.number);
output.writeByte(bits);
// pack the DV types in one byte
final byte dv = docValuesByte(fi.getDocValuesType());
final byte nrm = docValuesByte(fi.getNormType());
assert (dv & (~0xF)) == 0 && (nrm & (~0x0F)) == 0;
byte val = (byte) (0xff & ((nrm << 4) | dv));
output.writeByte(val);
output.writeLong(fi.getDocValuesGen());
output.writeStringStringMap(fi.attributes());
}
CodecUtil.writeFooter(output);
}
}
private static byte docValuesByte(DocValuesType type) {
if (type == null) {
return 0;
} else if (type == DocValuesType.NUMERIC) {
return 1;
} else if (type == DocValuesType.BINARY) {
return 2;
} else if (type == DocValuesType.SORTED) {
return 3;
} else if (type == DocValuesType.SORTED_SET) {
return 4;
} else if (type == DocValuesType.SORTED_NUMERIC) {
return 5;
} else {
throw new AssertionError();
}
} }
/** Extension of field infos */ /** Extension of field infos */

View File

@ -1,121 +0,0 @@
package org.apache.lucene.codecs.lucene46;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.Collections;
import java.util.Map;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.FieldInfosReader;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.FieldInfo.DocValuesType;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
/**
* Lucene 4.6 FieldInfos reader.
*
* @deprecated only for old 4.x segments
*/
@Deprecated
final class Lucene46FieldInfosReader extends FieldInfosReader {
/** Sole constructor. */
public Lucene46FieldInfosReader() {
}
@Override
public FieldInfos read(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, IOContext context) throws IOException {
final String fileName = IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, Lucene46FieldInfosFormat.EXTENSION);
try (ChecksumIndexInput input = directory.openChecksumInput(fileName, context)) {
int codecVersion = CodecUtil.checkHeader(input, Lucene46FieldInfosFormat.CODEC_NAME,
Lucene46FieldInfosFormat.FORMAT_START,
Lucene46FieldInfosFormat.FORMAT_CURRENT);
final int size = input.readVInt(); //read in the size
FieldInfo infos[] = new FieldInfo[size];
for (int i = 0; i < size; i++) {
String name = input.readString();
final int fieldNumber = input.readVInt();
if (fieldNumber < 0) {
throw new CorruptIndexException("invalid field number for field: " + name + ", fieldNumber=" + fieldNumber, input);
}
byte bits = input.readByte();
boolean isIndexed = (bits & Lucene46FieldInfosFormat.IS_INDEXED) != 0;
boolean storeTermVector = (bits & Lucene46FieldInfosFormat.STORE_TERMVECTOR) != 0;
boolean omitNorms = (bits & Lucene46FieldInfosFormat.OMIT_NORMS) != 0;
boolean storePayloads = (bits & Lucene46FieldInfosFormat.STORE_PAYLOADS) != 0;
final IndexOptions indexOptions;
if (!isIndexed) {
indexOptions = null;
} else if ((bits & Lucene46FieldInfosFormat.OMIT_TERM_FREQ_AND_POSITIONS) != 0) {
indexOptions = IndexOptions.DOCS_ONLY;
} else if ((bits & Lucene46FieldInfosFormat.OMIT_POSITIONS) != 0) {
indexOptions = IndexOptions.DOCS_AND_FREQS;
} else if ((bits & Lucene46FieldInfosFormat.STORE_OFFSETS_IN_POSTINGS) != 0) {
indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
} else {
indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
}
// DV Types are packed in one byte
byte val = input.readByte();
final DocValuesType docValuesType = getDocValuesType(input, (byte) (val & 0x0F));
final DocValuesType normsType = getDocValuesType(input, (byte) ((val >>> 4) & 0x0F));
final long dvGen = input.readLong();
final Map<String,String> attributes = input.readStringStringMap();
infos[i] = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector,
omitNorms, storePayloads, indexOptions, docValuesType, normsType, dvGen, Collections.unmodifiableMap(attributes));
}
if (codecVersion >= Lucene46FieldInfosFormat.FORMAT_CHECKSUM) {
CodecUtil.checkFooter(input);
} else {
CodecUtil.checkEOF(input);
}
return new FieldInfos(infos);
}
}
private static DocValuesType getDocValuesType(IndexInput input, byte b) throws IOException {
if (b == 0) {
return null;
} else if (b == 1) {
return DocValuesType.NUMERIC;
} else if (b == 2) {
return DocValuesType.BINARY;
} else if (b == 3) {
return DocValuesType.SORTED;
} else if (b == 4) {
return DocValuesType.SORTED_SET;
} else if (b == 5) {
return DocValuesType.SORTED_NUMERIC;
} else {
throw new CorruptIndexException("invalid docvalues byte: " + b, input);
}
}
}

View File

@ -1,103 +0,0 @@
package org.apache.lucene.codecs.lucene46;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.FieldInfosWriter;
import org.apache.lucene.index.FieldInfo.DocValuesType;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
/**
* Lucene 4.6 FieldInfos writer.
*
* @deprecated only for old 4.x segments
*/
@Deprecated
final class Lucene46FieldInfosWriter extends FieldInfosWriter {
/** Sole constructor. */
public Lucene46FieldInfosWriter() {
}
@Override
public void write(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, FieldInfos infos, IOContext context) throws IOException {
final String fileName = IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, Lucene46FieldInfosFormat.EXTENSION);
try (IndexOutput output = directory.createOutput(fileName, context)) {
CodecUtil.writeHeader(output, Lucene46FieldInfosFormat.CODEC_NAME, Lucene46FieldInfosFormat.FORMAT_CURRENT);
output.writeVInt(infos.size());
for (FieldInfo fi : infos) {
IndexOptions indexOptions = fi.getIndexOptions();
byte bits = 0x0;
if (fi.hasVectors()) bits |= Lucene46FieldInfosFormat.STORE_TERMVECTOR;
if (fi.omitsNorms()) bits |= Lucene46FieldInfosFormat.OMIT_NORMS;
if (fi.hasPayloads()) bits |= Lucene46FieldInfosFormat.STORE_PAYLOADS;
if (fi.isIndexed()) {
bits |= Lucene46FieldInfosFormat.IS_INDEXED;
assert indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0 || !fi.hasPayloads();
if (indexOptions == IndexOptions.DOCS_ONLY) {
bits |= Lucene46FieldInfosFormat.OMIT_TERM_FREQ_AND_POSITIONS;
} else if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) {
bits |= Lucene46FieldInfosFormat.STORE_OFFSETS_IN_POSTINGS;
} else if (indexOptions == IndexOptions.DOCS_AND_FREQS) {
bits |= Lucene46FieldInfosFormat.OMIT_POSITIONS;
}
}
output.writeString(fi.name);
output.writeVInt(fi.number);
output.writeByte(bits);
// pack the DV types in one byte
final byte dv = docValuesByte(fi.getDocValuesType());
final byte nrm = docValuesByte(fi.getNormType());
assert (dv & (~0xF)) == 0 && (nrm & (~0x0F)) == 0;
byte val = (byte) (0xff & ((nrm << 4) | dv));
output.writeByte(val);
output.writeLong(fi.getDocValuesGen());
output.writeStringStringMap(fi.attributes());
}
CodecUtil.writeFooter(output);
}
}
private static byte docValuesByte(DocValuesType type) {
if (type == null) {
return 0;
} else if (type == DocValuesType.NUMERIC) {
return 1;
} else if (type == DocValuesType.BINARY) {
return 2;
} else if (type == DocValuesType.SORTED) {
return 3;
} else if (type == DocValuesType.SORTED_SET) {
return 4;
} else if (type == DocValuesType.SORTED_NUMERIC) {
return 5;
} else {
throw new AssertionError();
}
}
}

View File

@ -17,10 +17,20 @@ package org.apache.lucene.codecs.lucene46;
* limitations under the License. * limitations under the License.
*/ */
import java.io.IOException;
import java.text.ParseException;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.SegmentInfoFormat; import org.apache.lucene.codecs.SegmentInfoFormat;
import org.apache.lucene.codecs.SegmentInfoReader; import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.codecs.SegmentInfoWriter; import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.util.Version;
/** /**
* Lucene 4.6 Segment info format. * Lucene 4.6 Segment info format.
@ -28,19 +38,48 @@ import org.apache.lucene.index.SegmentInfo;
*/ */
@Deprecated @Deprecated
public class Lucene46SegmentInfoFormat extends SegmentInfoFormat { public class Lucene46SegmentInfoFormat extends SegmentInfoFormat {
private final SegmentInfoReader reader = new Lucene46SegmentInfoReader();
/** Sole constructor. */ /** Sole constructor. */
public Lucene46SegmentInfoFormat() { public Lucene46SegmentInfoFormat() {
} }
@Override @Override
public final SegmentInfoReader getSegmentInfoReader() { public SegmentInfo read(Directory dir, String segment, IOContext context) throws IOException {
return reader; final String fileName = IndexFileNames.segmentFileName(segment, "", Lucene46SegmentInfoFormat.SI_EXTENSION);
try (ChecksumIndexInput input = dir.openChecksumInput(fileName, context)) {
int codecVersion = CodecUtil.checkHeader(input, Lucene46SegmentInfoFormat.CODEC_NAME,
Lucene46SegmentInfoFormat.VERSION_START,
Lucene46SegmentInfoFormat.VERSION_CURRENT);
final Version version;
try {
version = Version.parse(input.readString());
} catch (ParseException pe) {
throw new CorruptIndexException("unable to parse version string: " + pe.getMessage(), input, pe);
}
final int docCount = input.readInt();
if (docCount < 0) {
throw new CorruptIndexException("invalid docCount: " + docCount, input);
}
final boolean isCompoundFile = input.readByte() == SegmentInfo.YES;
final Map<String,String> diagnostics = input.readStringStringMap();
final Set<String> files = input.readStringSet();
if (codecVersion >= Lucene46SegmentInfoFormat.VERSION_CHECKSUM) {
CodecUtil.checkFooter(input);
} else {
CodecUtil.checkEOF(input);
}
final SegmentInfo si = new SegmentInfo(dir, version, segment, docCount, isCompoundFile, null, diagnostics, null);
si.setFiles(files);
return si;
}
} }
@Override @Override
public SegmentInfoWriter getSegmentInfoWriter() { public void write(Directory dir, SegmentInfo info, IOContext ioContext) throws IOException {
throw new UnsupportedOperationException("this codec can only be used for reading"); throw new UnsupportedOperationException("this codec can only be used for reading");
} }

View File

@ -1,80 +0,0 @@
package org.apache.lucene.codecs.lucene46;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.text.ParseException;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.SegmentInfoReader;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.util.Version;
/**
* Lucene 4.6 segment infos reader
* @deprecated only for old 4.x segments
*/
@Deprecated
final class Lucene46SegmentInfoReader extends SegmentInfoReader {
/** Sole constructor. */
public Lucene46SegmentInfoReader() {
}
@Override
public SegmentInfo read(Directory dir, String segment, IOContext context) throws IOException {
final String fileName = IndexFileNames.segmentFileName(segment, "", Lucene46SegmentInfoFormat.SI_EXTENSION);
try (ChecksumIndexInput input = dir.openChecksumInput(fileName, context)) {
int codecVersion = CodecUtil.checkHeader(input, Lucene46SegmentInfoFormat.CODEC_NAME,
Lucene46SegmentInfoFormat.VERSION_START,
Lucene46SegmentInfoFormat.VERSION_CURRENT);
final Version version;
try {
version = Version.parse(input.readString());
} catch (ParseException pe) {
throw new CorruptIndexException("unable to parse version string: " + pe.getMessage(), input, pe);
}
final int docCount = input.readInt();
if (docCount < 0) {
throw new CorruptIndexException("invalid docCount: " + docCount, input);
}
final boolean isCompoundFile = input.readByte() == SegmentInfo.YES;
final Map<String,String> diagnostics = input.readStringStringMap();
final Set<String> files = input.readStringSet();
if (codecVersion >= Lucene46SegmentInfoFormat.VERSION_CHECKSUM) {
CodecUtil.checkFooter(input);
} else {
CodecUtil.checkEOF(input);
}
final SegmentInfo si = new SegmentInfo(dir, version, segment, docCount, isCompoundFile, null, diagnostics, null);
si.setFiles(files);
return si;
}
}
}

View File

@ -25,7 +25,7 @@ import java.util.TreeSet;
import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.DocValuesConsumer; import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.codecs.MissingOrdRemapper; import org.apache.lucene.codecs.MissingOrdRemapper;
import org.apache.lucene.codecs.lucene40.Lucene40FieldInfosReader.LegacyDocValuesType; import org.apache.lucene.codecs.lucene40.Lucene40FieldInfosFormat.LegacyDocValuesType;
import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.index.SegmentWriteState;

View File

@ -4,7 +4,6 @@ import java.io.IOException;
import org.apache.lucene.codecs.DocValuesFormat; import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.FieldInfosFormat; import org.apache.lucene.codecs.FieldInfosFormat;
import org.apache.lucene.codecs.FieldInfosWriter;
import org.apache.lucene.codecs.NormsFormat; import org.apache.lucene.codecs.NormsFormat;
import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.SegmentInfoFormat; import org.apache.lucene.codecs.SegmentInfoFormat;
@ -35,13 +34,7 @@ import org.apache.lucene.codecs.TermVectorsFormat;
@Deprecated @Deprecated
public final class Lucene40RWCodec extends Lucene40Codec { public final class Lucene40RWCodec extends Lucene40Codec {
private final FieldInfosFormat fieldInfos = new Lucene40FieldInfosFormat() { private final FieldInfosFormat fieldInfos = new Lucene40RWFieldInfosFormat();
@Override
public FieldInfosWriter getFieldInfosWriter() throws IOException {
return new Lucene40FieldInfosWriter();
}
};
private final DocValuesFormat docValues = new Lucene40RWDocValuesFormat(); private final DocValuesFormat docValues = new Lucene40RWDocValuesFormat();
private final NormsFormat norms = new Lucene40RWNormsFormat(); private final NormsFormat norms = new Lucene40RWNormsFormat();
private final StoredFieldsFormat stored = new Lucene40RWStoredFieldsFormat(); private final StoredFieldsFormat stored = new Lucene40RWStoredFieldsFormat();

View File

@ -35,6 +35,6 @@ public final class Lucene40RWDocValuesFormat extends Lucene40DocValuesFormat {
String filename = IndexFileNames.segmentFileName(state.segmentInfo.name, String filename = IndexFileNames.segmentFileName(state.segmentInfo.name,
"dv", "dv",
Lucene40CompoundFormat.COMPOUND_FILE_EXTENSION); Lucene40CompoundFormat.COMPOUND_FILE_EXTENSION);
return new Lucene40DocValuesWriter(state, filename, Lucene40FieldInfosReader.LEGACY_DV_TYPE_KEY); return new Lucene40DocValuesWriter(state, filename, Lucene40FieldInfosFormat.LEGACY_DV_TYPE_KEY);
} }
} }

View File

@ -19,8 +19,6 @@ package org.apache.lucene.codecs.lucene40;
import java.io.IOException; import java.io.IOException;
import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.FieldInfosWriter;
import org.apache.lucene.codecs.lucene40.Lucene40FieldInfosReader.LegacyDocValuesType;
import org.apache.lucene.index.FieldInfo.DocValuesType; import org.apache.lucene.index.FieldInfo.DocValuesType;
import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfo;
@ -37,10 +35,10 @@ import org.apache.lucene.util.IOUtils;
* @deprecated for test purposes only * @deprecated for test purposes only
*/ */
@Deprecated @Deprecated
public final class Lucene40FieldInfosWriter extends FieldInfosWriter { public final class Lucene40RWFieldInfosFormat extends Lucene40FieldInfosFormat {
/** Sole constructor. */ /** Sole constructor. */
public Lucene40FieldInfosWriter() { public Lucene40RWFieldInfosFormat() {
} }
@Override @Override
@ -76,8 +74,8 @@ public final class Lucene40FieldInfosWriter extends FieldInfosWriter {
output.writeByte(bits); output.writeByte(bits);
// pack the DV types in one byte // pack the DV types in one byte
final byte dv = docValuesByte(fi.getDocValuesType(), fi.getAttribute(Lucene40FieldInfosReader.LEGACY_DV_TYPE_KEY)); final byte dv = docValuesByte(fi.getDocValuesType(), fi.getAttribute(LEGACY_DV_TYPE_KEY));
final byte nrm = docValuesByte(fi.getNormType(), fi.getAttribute(Lucene40FieldInfosReader.LEGACY_NORM_TYPE_KEY)); final byte nrm = docValuesByte(fi.getNormType(), fi.getAttribute(LEGACY_NORM_TYPE_KEY));
assert (dv & (~0xF)) == 0 && (nrm & (~0x0F)) == 0; assert (dv & (~0xF)) == 0 && (nrm & (~0x0F)) == 0;
byte val = (byte) (0xff & ((nrm << 4) | dv)); byte val = (byte) (0xff & ((nrm << 4) | dv));
output.writeByte(val); output.writeByte(val);

View File

@ -36,7 +36,7 @@ public final class Lucene40RWNormsFormat extends Lucene40NormsFormat {
String filename = IndexFileNames.segmentFileName(state.segmentInfo.name, String filename = IndexFileNames.segmentFileName(state.segmentInfo.name,
"nrm", "nrm",
Lucene40CompoundFormat.COMPOUND_FILE_EXTENSION); Lucene40CompoundFormat.COMPOUND_FILE_EXTENSION);
final Lucene40DocValuesWriter impl = new Lucene40DocValuesWriter(state, filename, Lucene40FieldInfosReader.LEGACY_NORM_TYPE_KEY); final Lucene40DocValuesWriter impl = new Lucene40DocValuesWriter(state, filename, Lucene40FieldInfosFormat.LEGACY_NORM_TYPE_KEY);
return new NormsConsumer() { return new NormsConsumer() {
@Override @Override
public void addNormsField(FieldInfo field, Iterable<Number> values) throws IOException { public void addNormsField(FieldInfo field, Iterable<Number> values) throws IOException {

View File

@ -17,7 +17,16 @@ package org.apache.lucene.codecs.lucene40;
* limitations under the License. * limitations under the License.
*/ */
import org.apache.lucene.codecs.SegmentInfoWriter; import java.io.IOException;
import java.util.Collections;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.IOUtils;
/** /**
* Read-write version of 4.0 segmentinfo format for testing * Read-write version of 4.0 segmentinfo format for testing
@ -27,7 +36,33 @@ import org.apache.lucene.codecs.SegmentInfoWriter;
public final class Lucene40RWSegmentInfoFormat extends Lucene40SegmentInfoFormat { public final class Lucene40RWSegmentInfoFormat extends Lucene40SegmentInfoFormat {
@Override @Override
public SegmentInfoWriter getSegmentInfoWriter() { public void write(Directory dir, SegmentInfo si, IOContext ioContext) throws IOException {
return new Lucene40SegmentInfoWriter(); final String fileName = IndexFileNames.segmentFileName(si.name, "", Lucene40SegmentInfoFormat.SI_EXTENSION);
si.addFile(fileName);
final IndexOutput output = dir.createOutput(fileName, ioContext);
boolean success = false;
try {
CodecUtil.writeHeader(output, Lucene40SegmentInfoFormat.CODEC_NAME, Lucene40SegmentInfoFormat.VERSION_CURRENT);
// Write the Lucene version that created this segment, since 3.1
output.writeString(si.getVersion().toString());
output.writeInt(si.getDocCount());
output.writeByte((byte) (si.getUseCompoundFile() ? SegmentInfo.YES : SegmentInfo.NO));
output.writeStringStringMap(si.getDiagnostics());
output.writeStringStringMap(Collections.<String,String>emptyMap());
output.writeStringSet(si.files());
success = true;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(output);
// TODO: why must we do this? do we not get tracking dir wrapper?
IOUtils.deleteFilesIgnoringExceptions(si.dir, fileName);
} else {
output.close();
}
}
} }
} }

View File

@ -1,75 +0,0 @@
package org.apache.lucene.codecs.lucene40;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.Collections;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.SegmentInfoWriter;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.IOUtils;
/**
* writer for 4.0 segmentinfos for testing
* @deprecated for test purposes only
*/
@Deprecated
public final class Lucene40SegmentInfoWriter extends SegmentInfoWriter {
/** Sole constructor. */
public Lucene40SegmentInfoWriter() {
}
/** Save a single segment's info. */
@Override
public void write(Directory dir, SegmentInfo si, FieldInfos fis, IOContext ioContext) throws IOException {
final String fileName = IndexFileNames.segmentFileName(si.name, "", Lucene40SegmentInfoFormat.SI_EXTENSION);
si.addFile(fileName);
final IndexOutput output = dir.createOutput(fileName, ioContext);
boolean success = false;
try {
CodecUtil.writeHeader(output, Lucene40SegmentInfoFormat.CODEC_NAME, Lucene40SegmentInfoFormat.VERSION_CURRENT);
// Write the Lucene version that created this segment, since 3.1
output.writeString(si.getVersion().toString());
output.writeInt(si.getDocCount());
output.writeByte((byte) (si.getUseCompoundFile() ? SegmentInfo.YES : SegmentInfo.NO));
output.writeStringStringMap(si.getDiagnostics());
output.writeStringStringMap(Collections.<String,String>emptyMap());
output.writeStringSet(si.files());
success = true;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(output);
// TODO: why must we do this? do we not get tracking dir wrapper?
IOUtils.deleteFilesIgnoringExceptions(si.dir, fileName);
} else {
output.close();
}
}
}
}

View File

@ -0,0 +1,71 @@
package org.apache.lucene.codecs.lucene40;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.lucene40.Lucene40FieldInfosFormat.LegacyDocValuesType;
import org.apache.lucene.index.BaseFieldInfoFormatTestCase;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfo.DocValuesType;
/** Test Lucene 4.0 FieldInfos Format */
public class TestLucene40FieldInfoFormat extends BaseFieldInfoFormatTestCase {
private final Codec codec = new Lucene40RWCodec();
@Override
protected Codec getCodec() {
return codec;
}
// we only support these three dv types
@Override
@Deprecated
protected DocValuesType[] getDocValuesTypes() {
return new DocValuesType[] {
DocValuesType.BINARY,
DocValuesType.NUMERIC,
DocValuesType.SORTED
};
}
// but we have more internal typing information, previously recorded in fieldinfos.
// this is exposed via attributes (so our writer expects them to be set by the dv impl)
@Override
protected void addAttributes(FieldInfo fi) {
DocValuesType dvType = fi.getDocValuesType();
if (dvType != null) {
switch (dvType) {
case BINARY:
fi.putAttribute(Lucene40FieldInfosFormat.LEGACY_DV_TYPE_KEY, LegacyDocValuesType.BYTES_FIXED_STRAIGHT.name());
break;
case NUMERIC:
fi.putAttribute(Lucene40FieldInfosFormat.LEGACY_DV_TYPE_KEY, LegacyDocValuesType.FIXED_INTS_32.name());
break;
case SORTED:
fi.putAttribute(Lucene40FieldInfosFormat.LEGACY_DV_TYPE_KEY, LegacyDocValuesType.BYTES_FIXED_SORTED.name());
break;
default:
throw new AssertionError();
}
}
if (fi.getNormType() != null) {
fi.putAttribute(Lucene40FieldInfosFormat.LEGACY_NORM_TYPE_KEY, LegacyDocValuesType.FIXED_INTS_8.name());
}
}
}

View File

@ -0,0 +1,58 @@
package org.apache.lucene.codecs.lucene40;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.index.BaseSegmentInfoFormatTestCase;
import org.apache.lucene.util.Version;
/**
* Tests Lucene40InfoFormat
*/
public class TestLucene40SegmentInfoFormat extends BaseSegmentInfoFormatTestCase {
@Override
protected Version[] getVersions() {
// NOTE: some of these bugfix releases we never actually "wrote",
// but staying on the safe side...
return new Version[] {
Version.LUCENE_4_0_0_ALPHA,
Version.LUCENE_4_0_0_BETA,
Version.LUCENE_4_0_0,
Version.LUCENE_4_1_0,
Version.LUCENE_4_2_0,
Version.LUCENE_4_2_1,
Version.LUCENE_4_3_0,
Version.LUCENE_4_3_1,
Version.LUCENE_4_4_0,
Version.LUCENE_4_5_0,
Version.LUCENE_4_5_1,
};
}
@Override
@Deprecated
protected void assertIDEquals(byte[] expected, byte[] actual) {
assertNull(actual); // we don't support IDs
}
@Override
protected Codec getCodec() {
return new Lucene40RWCodec();
}
}

View File

@ -1,17 +1,13 @@
package org.apache.lucene.codecs.lucene41; package org.apache.lucene.codecs.lucene41;
import java.io.IOException;
import org.apache.lucene.codecs.DocValuesFormat; import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.FieldInfosFormat; import org.apache.lucene.codecs.FieldInfosFormat;
import org.apache.lucene.codecs.FieldInfosWriter;
import org.apache.lucene.codecs.NormsFormat; import org.apache.lucene.codecs.NormsFormat;
import org.apache.lucene.codecs.SegmentInfoFormat; import org.apache.lucene.codecs.SegmentInfoFormat;
import org.apache.lucene.codecs.StoredFieldsFormat; import org.apache.lucene.codecs.StoredFieldsFormat;
import org.apache.lucene.codecs.TermVectorsFormat; import org.apache.lucene.codecs.TermVectorsFormat;
import org.apache.lucene.codecs.lucene40.Lucene40FieldInfosFormat;
import org.apache.lucene.codecs.lucene40.Lucene40FieldInfosWriter;
import org.apache.lucene.codecs.lucene40.Lucene40RWDocValuesFormat; import org.apache.lucene.codecs.lucene40.Lucene40RWDocValuesFormat;
import org.apache.lucene.codecs.lucene40.Lucene40RWFieldInfosFormat;
import org.apache.lucene.codecs.lucene40.Lucene40RWNormsFormat; import org.apache.lucene.codecs.lucene40.Lucene40RWNormsFormat;
import org.apache.lucene.codecs.lucene40.Lucene40RWSegmentInfoFormat; import org.apache.lucene.codecs.lucene40.Lucene40RWSegmentInfoFormat;
import org.apache.lucene.codecs.lucene40.Lucene40RWTermVectorsFormat; import org.apache.lucene.codecs.lucene40.Lucene40RWTermVectorsFormat;
@ -40,13 +36,7 @@ import org.apache.lucene.codecs.lucene40.Lucene40RWTermVectorsFormat;
@Deprecated @Deprecated
public final class Lucene41RWCodec extends Lucene41Codec { public final class Lucene41RWCodec extends Lucene41Codec {
private final StoredFieldsFormat fieldsFormat = new Lucene41RWStoredFieldsFormat(); private final StoredFieldsFormat fieldsFormat = new Lucene41RWStoredFieldsFormat();
private final FieldInfosFormat fieldInfos = new Lucene40FieldInfosFormat() { private final FieldInfosFormat fieldInfos = new Lucene40RWFieldInfosFormat();
@Override
public FieldInfosWriter getFieldInfosWriter() throws IOException {
return new Lucene40FieldInfosWriter();
}
};
private final DocValuesFormat docValues = new Lucene40RWDocValuesFormat(); private final DocValuesFormat docValues = new Lucene40RWDocValuesFormat();
private final NormsFormat norms = new Lucene40RWNormsFormat(); private final NormsFormat norms = new Lucene40RWNormsFormat();
private final TermVectorsFormat vectors = new Lucene40RWTermVectorsFormat(); private final TermVectorsFormat vectors = new Lucene40RWTermVectorsFormat();

View File

@ -17,11 +17,8 @@ package org.apache.lucene.codecs.lucene42;
* limitations under the License. * limitations under the License.
*/ */
import java.io.IOException;
import org.apache.lucene.codecs.DocValuesFormat; import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.FieldInfosFormat; import org.apache.lucene.codecs.FieldInfosFormat;
import org.apache.lucene.codecs.FieldInfosWriter;
import org.apache.lucene.codecs.NormsFormat; import org.apache.lucene.codecs.NormsFormat;
import org.apache.lucene.codecs.SegmentInfoFormat; import org.apache.lucene.codecs.SegmentInfoFormat;
import org.apache.lucene.codecs.StoredFieldsFormat; import org.apache.lucene.codecs.StoredFieldsFormat;
@ -39,13 +36,7 @@ public final class Lucene42RWCodec extends Lucene42Codec {
private static final DocValuesFormat dv = new Lucene42RWDocValuesFormat(); private static final DocValuesFormat dv = new Lucene42RWDocValuesFormat();
private static final NormsFormat norms = new Lucene42RWNormsFormat(); private static final NormsFormat norms = new Lucene42RWNormsFormat();
private static final StoredFieldsFormat storedFields = new Lucene41RWStoredFieldsFormat(); private static final StoredFieldsFormat storedFields = new Lucene41RWStoredFieldsFormat();
private static final FieldInfosFormat fieldInfosFormat = new Lucene42RWFieldInfosFormat();
private final FieldInfosFormat fieldInfosFormat = new Lucene42FieldInfosFormat() {
@Override
public FieldInfosWriter getFieldInfosWriter() throws IOException {
return new Lucene42FieldInfosWriter();
}
};
@Override @Override
public DocValuesFormat getDocValuesFormatForField(String field) { public DocValuesFormat getDocValuesFormatForField(String field) {

View File

@ -20,7 +20,6 @@ package org.apache.lucene.codecs.lucene42;
import java.io.IOException; import java.io.IOException;
import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.FieldInfosWriter;
import org.apache.lucene.index.FieldInfo.DocValuesType; import org.apache.lucene.index.FieldInfo.DocValuesType;
import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfo;
@ -37,10 +36,10 @@ import org.apache.lucene.util.IOUtils;
* @deprecated for test purposes only * @deprecated for test purposes only
*/ */
@Deprecated @Deprecated
public final class Lucene42FieldInfosWriter extends FieldInfosWriter { public final class Lucene42RWFieldInfosFormat extends Lucene42FieldInfosFormat {
/** Sole constructor. */ /** Sole constructor. */
public Lucene42FieldInfosWriter() { public Lucene42RWFieldInfosFormat() {
} }
@Override @Override

View File

@ -0,0 +1,44 @@
package org.apache.lucene.codecs.lucene42;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.index.BaseFieldInfoFormatTestCase;
import org.apache.lucene.index.FieldInfo.DocValuesType;
/** Test Lucene 4.2 FieldInfos Format */
public class TestLucene42FieldInfoFormat extends BaseFieldInfoFormatTestCase {
private final Codec codec = new Lucene42RWCodec();
@Override
protected Codec getCodec() {
return codec;
}
// we only support these four dv types
@Override
@Deprecated
protected DocValuesType[] getDocValuesTypes() {
return new DocValuesType[] {
DocValuesType.BINARY,
DocValuesType.NUMERIC,
DocValuesType.SORTED,
DocValuesType.SORTED_SET
};
}
}

View File

@ -17,19 +17,15 @@ package org.apache.lucene.codecs.lucene45;
* limitations under the License. * limitations under the License.
*/ */
import java.io.IOException;
import org.apache.lucene.codecs.DocValuesFormat; import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.FieldInfosFormat; import org.apache.lucene.codecs.FieldInfosFormat;
import org.apache.lucene.codecs.FieldInfosWriter;
import org.apache.lucene.codecs.NormsFormat; import org.apache.lucene.codecs.NormsFormat;
import org.apache.lucene.codecs.SegmentInfoFormat; import org.apache.lucene.codecs.SegmentInfoFormat;
import org.apache.lucene.codecs.StoredFieldsFormat; import org.apache.lucene.codecs.StoredFieldsFormat;
import org.apache.lucene.codecs.TermVectorsFormat; import org.apache.lucene.codecs.TermVectorsFormat;
import org.apache.lucene.codecs.lucene40.Lucene40RWSegmentInfoFormat; import org.apache.lucene.codecs.lucene40.Lucene40RWSegmentInfoFormat;
import org.apache.lucene.codecs.lucene41.Lucene41RWStoredFieldsFormat; import org.apache.lucene.codecs.lucene41.Lucene41RWStoredFieldsFormat;
import org.apache.lucene.codecs.lucene42.Lucene42FieldInfosFormat; import org.apache.lucene.codecs.lucene42.Lucene42RWFieldInfosFormat;
import org.apache.lucene.codecs.lucene42.Lucene42FieldInfosWriter;
import org.apache.lucene.codecs.lucene42.Lucene42RWNormsFormat; import org.apache.lucene.codecs.lucene42.Lucene42RWNormsFormat;
import org.apache.lucene.codecs.lucene42.Lucene42RWTermVectorsFormat; import org.apache.lucene.codecs.lucene42.Lucene42RWTermVectorsFormat;
@ -39,12 +35,7 @@ import org.apache.lucene.codecs.lucene42.Lucene42RWTermVectorsFormat;
@SuppressWarnings("deprecation") @SuppressWarnings("deprecation")
public final class Lucene45RWCodec extends Lucene45Codec { public final class Lucene45RWCodec extends Lucene45Codec {
private final FieldInfosFormat fieldInfosFormat = new Lucene42FieldInfosFormat() { private static final FieldInfosFormat fieldInfosFormat = new Lucene42RWFieldInfosFormat();
@Override
public FieldInfosWriter getFieldInfosWriter() throws IOException {
return new Lucene42FieldInfosWriter();
}
};
@Override @Override
public FieldInfosFormat fieldInfosFormat() { public FieldInfosFormat fieldInfosFormat() {

View File

@ -17,7 +17,16 @@ package org.apache.lucene.codecs.lucene46;
* limitations under the License. * limitations under the License.
*/ */
import org.apache.lucene.codecs.SegmentInfoWriter; import java.io.IOException;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.Version;
/** /**
* Read-Write version of 4.6 segmentinfo format for testing * Read-Write version of 4.6 segmentinfo format for testing
@ -26,7 +35,36 @@ import org.apache.lucene.codecs.SegmentInfoWriter;
@Deprecated @Deprecated
public final class Lucene46RWSegmentInfoFormat extends Lucene46SegmentInfoFormat { public final class Lucene46RWSegmentInfoFormat extends Lucene46SegmentInfoFormat {
@Override @Override
public SegmentInfoWriter getSegmentInfoWriter() { public void write(Directory dir, SegmentInfo si, IOContext ioContext) throws IOException {
return new Lucene46SegmentInfoWriter(); final String fileName = IndexFileNames.segmentFileName(si.name, "", Lucene46SegmentInfoFormat.SI_EXTENSION);
si.addFile(fileName);
final IndexOutput output = dir.createOutput(fileName, ioContext);
boolean success = false;
try {
CodecUtil.writeHeader(output, Lucene46SegmentInfoFormat.CODEC_NAME, Lucene46SegmentInfoFormat.VERSION_CURRENT);
Version version = si.getVersion();
if (version.major < 4) {
throw new IllegalArgumentException("invalid major version: should be >= 4 but got: " + version.major + " segment=" + si);
}
// Write the Lucene version that created this segment, since 3.1
output.writeString(version.toString());
output.writeInt(si.getDocCount());
output.writeByte((byte) (si.getUseCompoundFile() ? SegmentInfo.YES : SegmentInfo.NO));
output.writeStringStringMap(si.getDiagnostics());
output.writeStringSet(si.files());
CodecUtil.writeFooter(output);
success = true;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(output);
// TODO: are we doing this outside of the tracking wrapper? why must SIWriter cleanup like this?
IOUtils.deleteFilesIgnoringExceptions(si.dir, fileName);
} else {
output.close();
}
}
} }
} }

View File

@ -1,78 +0,0 @@
package org.apache.lucene.codecs.lucene46;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.SegmentInfoWriter;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.Version;
/**
* Writer for 4.0 segmentinfo format for testing
* @deprecated for test purposes only
*/
@Deprecated
final class Lucene46SegmentInfoWriter extends SegmentInfoWriter {
/** Sole constructor. */
public Lucene46SegmentInfoWriter() {
}
/** Save a single segment's info. */
@Override
public void write(Directory dir, SegmentInfo si, FieldInfos fis, IOContext ioContext) throws IOException {
final String fileName = IndexFileNames.segmentFileName(si.name, "", Lucene46SegmentInfoFormat.SI_EXTENSION);
si.addFile(fileName);
final IndexOutput output = dir.createOutput(fileName, ioContext);
boolean success = false;
try {
CodecUtil.writeHeader(output, Lucene46SegmentInfoFormat.CODEC_NAME, Lucene46SegmentInfoFormat.VERSION_CURRENT);
Version version = si.getVersion();
if (version.major < 4) {
throw new IllegalArgumentException("invalid major version: should be >= 4 but got: " + version.major + " segment=" + si);
}
// Write the Lucene version that created this segment, since 3.1
output.writeString(version.toString());
output.writeInt(si.getDocCount());
output.writeByte((byte) (si.getUseCompoundFile() ? SegmentInfo.YES : SegmentInfo.NO));
output.writeStringStringMap(si.getDiagnostics());
output.writeStringSet(si.files());
CodecUtil.writeFooter(output);
success = true;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(output);
// TODO: are we doing this outside of the tracking wrapper? why must SIWriter cleanup like this?
IOUtils.deleteFilesIgnoringExceptions(si.dir, fileName);
} else {
output.close();
}
}
}
}

View File

@ -1,9 +1,4 @@
package org.apache.lucene.util; package org.apache.lucene.codecs.lucene46;
import java.io.IOException;
import java.util.BitSet;
import org.junit.Ignore;
/* /*
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more
@ -22,17 +17,17 @@ import org.junit.Ignore;
* limitations under the License. * limitations under the License.
*/ */
public class TestDocIdBitSet extends BaseDocIdSetTestCase<DocIdBitSet> { import org.apache.lucene.codecs.Codec;
import org.apache.lucene.index.BaseFieldInfoFormatTestCase;
/** Test Lucene 4.2 FieldInfos Format */
public class TestLucene46FieldInfoFormat extends BaseFieldInfoFormatTestCase {
private final Codec codec = new Lucene46RWCodec();
@Override @Override
public DocIdBitSet copyOf(BitSet bs, int length) throws IOException { protected Codec getCodec() {
return new DocIdBitSet((BitSet) bs.clone()); return codec;
} }
@Override // TODO: we actually didnt support SORTED_NUMERIC initially, it was done in a minor rev.
@Ignore("no access to the internals of this impl")
public void testRamBytesUsed() throws IOException {
super.testRamBytesUsed();
}
} }

View File

@ -0,0 +1,57 @@
package org.apache.lucene.codecs.lucene46;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.index.BaseSegmentInfoFormatTestCase;
import org.apache.lucene.util.Version;
/**
* Tests Lucene46InfoFormat
*/
public class TestLucene46SegmentInfoFormat extends BaseSegmentInfoFormatTestCase {
@Override
protected Version[] getVersions() {
// NOTE: some of these bugfix releases we never actually "wrote",
// but staying on the safe side...
return new Version[] {
Version.LUCENE_4_6_0,
Version.LUCENE_4_6_1,
Version.LUCENE_4_7_0,
Version.LUCENE_4_7_1,
Version.LUCENE_4_7_2,
Version.LUCENE_4_8_0,
Version.LUCENE_4_8_1,
Version.LUCENE_4_9_0,
Version.LUCENE_4_10_0,
Version.LUCENE_4_10_1
};
}
@Override
@Deprecated
protected void assertIDEquals(byte[] expected, byte[] actual) {
assertNull(actual); // we don't support IDs
}
@Override
protected Codec getCodec() {
return new Lucene46RWCodec();
}
}

View File

@ -18,10 +18,26 @@ package org.apache.lucene.codecs.simpletext;
*/ */
import java.io.IOException; import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.codecs.FieldInfosFormat; import org.apache.lucene.codecs.FieldInfosFormat;
import org.apache.lucene.codecs.FieldInfosReader; import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.codecs.FieldInfosWriter; import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.FieldInfo.DocValuesType;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.StringHelper;
/** /**
* plaintext field infos format * plaintext field infos format
@ -30,16 +46,220 @@ import org.apache.lucene.codecs.FieldInfosWriter;
* @lucene.experimental * @lucene.experimental
*/ */
public class SimpleTextFieldInfosFormat extends FieldInfosFormat { public class SimpleTextFieldInfosFormat extends FieldInfosFormat {
private final FieldInfosReader reader = new SimpleTextFieldInfosReader();
private final FieldInfosWriter writer = new SimpleTextFieldInfosWriter(); /** Extension of field infos */
static final String FIELD_INFOS_EXTENSION = "inf";
static final BytesRef NUMFIELDS = new BytesRef("number of fields ");
static final BytesRef NAME = new BytesRef(" name ");
static final BytesRef NUMBER = new BytesRef(" number ");
static final BytesRef ISINDEXED = new BytesRef(" indexed ");
static final BytesRef STORETV = new BytesRef(" term vectors ");
static final BytesRef STORETVPOS = new BytesRef(" term vector positions ");
static final BytesRef STORETVOFF = new BytesRef(" term vector offsets ");
static final BytesRef PAYLOADS = new BytesRef(" payloads ");
static final BytesRef NORMS = new BytesRef(" norms ");
static final BytesRef NORMS_TYPE = new BytesRef(" norms type ");
static final BytesRef DOCVALUES = new BytesRef(" doc values ");
static final BytesRef DOCVALUES_GEN = new BytesRef(" doc values gen ");
static final BytesRef INDEXOPTIONS = new BytesRef(" index options ");
static final BytesRef NUM_ATTS = new BytesRef(" attributes ");
static final BytesRef ATT_KEY = new BytesRef(" key ");
static final BytesRef ATT_VALUE = new BytesRef(" value ");
@Override @Override
public FieldInfosReader getFieldInfosReader() throws IOException { public FieldInfos read(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, IOContext iocontext) throws IOException {
return reader; final String fileName = IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, FIELD_INFOS_EXTENSION);
ChecksumIndexInput input = directory.openChecksumInput(fileName, iocontext);
BytesRefBuilder scratch = new BytesRefBuilder();
boolean success = false;
try {
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), NUMFIELDS);
final int size = Integer.parseInt(readString(NUMFIELDS.length, scratch));
FieldInfo infos[] = new FieldInfo[size];
for (int i = 0; i < size; i++) {
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), NAME);
String name = readString(NAME.length, scratch);
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), NUMBER);
int fieldNumber = Integer.parseInt(readString(NUMBER.length, scratch));
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), ISINDEXED);
boolean isIndexed = Boolean.parseBoolean(readString(ISINDEXED.length, scratch));
final IndexOptions indexOptions;
if (isIndexed) {
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), INDEXOPTIONS);
indexOptions = IndexOptions.valueOf(readString(INDEXOPTIONS.length, scratch));
} else {
indexOptions = null;
}
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), STORETV);
boolean storeTermVector = Boolean.parseBoolean(readString(STORETV.length, scratch));
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), PAYLOADS);
boolean storePayloads = Boolean.parseBoolean(readString(PAYLOADS.length, scratch));
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), NORMS);
boolean omitNorms = !Boolean.parseBoolean(readString(NORMS.length, scratch));
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), NORMS_TYPE);
String nrmType = readString(NORMS_TYPE.length, scratch);
final DocValuesType normsType = docValuesType(nrmType);
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), DOCVALUES);
String dvType = readString(DOCVALUES.length, scratch);
final DocValuesType docValuesType = docValuesType(dvType);
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), DOCVALUES_GEN);
final long dvGen = Long.parseLong(readString(DOCVALUES_GEN.length, scratch));
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), NUM_ATTS);
int numAtts = Integer.parseInt(readString(NUM_ATTS.length, scratch));
Map<String,String> atts = new HashMap<>();
for (int j = 0; j < numAtts; j++) {
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), ATT_KEY);
String key = readString(ATT_KEY.length, scratch);
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), ATT_VALUE);
String value = readString(ATT_VALUE.length, scratch);
atts.put(key, value);
}
infos[i] = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector,
omitNorms, storePayloads, indexOptions, docValuesType, normsType, dvGen, Collections.unmodifiableMap(atts));
}
SimpleTextUtil.checkFooter(input);
FieldInfos fieldInfos = new FieldInfos(infos);
success = true;
return fieldInfos;
} finally {
if (success) {
input.close();
} else {
IOUtils.closeWhileHandlingException(input);
}
}
}
public DocValuesType docValuesType(String dvType) {
if ("false".equals(dvType)) {
return null;
} else {
return DocValuesType.valueOf(dvType);
}
}
private String readString(int offset, BytesRefBuilder scratch) {
return new String(scratch.bytes(), offset, scratch.length()-offset, StandardCharsets.UTF_8);
} }
@Override @Override
public FieldInfosWriter getFieldInfosWriter() throws IOException { public void write(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, FieldInfos infos, IOContext context) throws IOException {
return writer; final String fileName = IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, FIELD_INFOS_EXTENSION);
IndexOutput out = directory.createOutput(fileName, context);
BytesRefBuilder scratch = new BytesRefBuilder();
boolean success = false;
try {
SimpleTextUtil.write(out, NUMFIELDS);
SimpleTextUtil.write(out, Integer.toString(infos.size()), scratch);
SimpleTextUtil.writeNewline(out);
for (FieldInfo fi : infos) {
SimpleTextUtil.write(out, NAME);
SimpleTextUtil.write(out, fi.name, scratch);
SimpleTextUtil.writeNewline(out);
SimpleTextUtil.write(out, NUMBER);
SimpleTextUtil.write(out, Integer.toString(fi.number), scratch);
SimpleTextUtil.writeNewline(out);
SimpleTextUtil.write(out, ISINDEXED);
SimpleTextUtil.write(out, Boolean.toString(fi.isIndexed()), scratch);
SimpleTextUtil.writeNewline(out);
if (fi.isIndexed()) {
assert fi.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0 || !fi.hasPayloads();
SimpleTextUtil.write(out, INDEXOPTIONS);
SimpleTextUtil.write(out, fi.getIndexOptions().toString(), scratch);
SimpleTextUtil.writeNewline(out);
}
SimpleTextUtil.write(out, STORETV);
SimpleTextUtil.write(out, Boolean.toString(fi.hasVectors()), scratch);
SimpleTextUtil.writeNewline(out);
SimpleTextUtil.write(out, PAYLOADS);
SimpleTextUtil.write(out, Boolean.toString(fi.hasPayloads()), scratch);
SimpleTextUtil.writeNewline(out);
SimpleTextUtil.write(out, NORMS);
SimpleTextUtil.write(out, Boolean.toString(!fi.omitsNorms()), scratch);
SimpleTextUtil.writeNewline(out);
SimpleTextUtil.write(out, NORMS_TYPE);
SimpleTextUtil.write(out, getDocValuesType(fi.getNormType()), scratch);
SimpleTextUtil.writeNewline(out);
SimpleTextUtil.write(out, DOCVALUES);
SimpleTextUtil.write(out, getDocValuesType(fi.getDocValuesType()), scratch);
SimpleTextUtil.writeNewline(out);
SimpleTextUtil.write(out, DOCVALUES_GEN);
SimpleTextUtil.write(out, Long.toString(fi.getDocValuesGen()), scratch);
SimpleTextUtil.writeNewline(out);
Map<String,String> atts = fi.attributes();
int numAtts = atts == null ? 0 : atts.size();
SimpleTextUtil.write(out, NUM_ATTS);
SimpleTextUtil.write(out, Integer.toString(numAtts), scratch);
SimpleTextUtil.writeNewline(out);
if (numAtts > 0) {
for (Map.Entry<String,String> entry : atts.entrySet()) {
SimpleTextUtil.write(out, ATT_KEY);
SimpleTextUtil.write(out, entry.getKey(), scratch);
SimpleTextUtil.writeNewline(out);
SimpleTextUtil.write(out, ATT_VALUE);
SimpleTextUtil.write(out, entry.getValue(), scratch);
SimpleTextUtil.writeNewline(out);
}
}
}
SimpleTextUtil.writeChecksum(out, scratch);
success = true;
} finally {
if (success) {
out.close();
} else {
IOUtils.closeWhileHandlingException(out);
}
}
}
private static String getDocValuesType(DocValuesType type) {
return type == null ? "false" : type.toString();
} }
} }

View File

@ -1,157 +0,0 @@
package org.apache.lucene.codecs.simpletext;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.codecs.FieldInfosReader;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfo.DocValuesType;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.StringHelper;
import static org.apache.lucene.codecs.simpletext.SimpleTextFieldInfosWriter.*;
/**
* reads plaintext field infos files
* <p>
* <b><font color="red">FOR RECREATIONAL USE ONLY</font></B>
* @lucene.experimental
*/
public class SimpleTextFieldInfosReader extends FieldInfosReader {
@Override
public FieldInfos read(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, IOContext iocontext) throws IOException {
final String fileName = IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, FIELD_INFOS_EXTENSION);
ChecksumIndexInput input = directory.openChecksumInput(fileName, iocontext);
BytesRefBuilder scratch = new BytesRefBuilder();
boolean success = false;
try {
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), NUMFIELDS);
final int size = Integer.parseInt(readString(NUMFIELDS.length, scratch));
FieldInfo infos[] = new FieldInfo[size];
for (int i = 0; i < size; i++) {
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), NAME);
String name = readString(NAME.length, scratch);
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), NUMBER);
int fieldNumber = Integer.parseInt(readString(NUMBER.length, scratch));
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), ISINDEXED);
boolean isIndexed = Boolean.parseBoolean(readString(ISINDEXED.length, scratch));
final IndexOptions indexOptions;
if (isIndexed) {
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), INDEXOPTIONS);
indexOptions = IndexOptions.valueOf(readString(INDEXOPTIONS.length, scratch));
} else {
indexOptions = null;
}
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), STORETV);
boolean storeTermVector = Boolean.parseBoolean(readString(STORETV.length, scratch));
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), PAYLOADS);
boolean storePayloads = Boolean.parseBoolean(readString(PAYLOADS.length, scratch));
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), NORMS);
boolean omitNorms = !Boolean.parseBoolean(readString(NORMS.length, scratch));
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), NORMS_TYPE);
String nrmType = readString(NORMS_TYPE.length, scratch);
final DocValuesType normsType = docValuesType(nrmType);
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), DOCVALUES);
String dvType = readString(DOCVALUES.length, scratch);
final DocValuesType docValuesType = docValuesType(dvType);
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), DOCVALUES_GEN);
final long dvGen = Long.parseLong(readString(DOCVALUES_GEN.length, scratch));
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), NUM_ATTS);
int numAtts = Integer.parseInt(readString(NUM_ATTS.length, scratch));
Map<String,String> atts = new HashMap<>();
for (int j = 0; j < numAtts; j++) {
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), ATT_KEY);
String key = readString(ATT_KEY.length, scratch);
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), ATT_VALUE);
String value = readString(ATT_VALUE.length, scratch);
atts.put(key, value);
}
infos[i] = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector,
omitNorms, storePayloads, indexOptions, docValuesType, normsType, dvGen, Collections.unmodifiableMap(atts));
}
SimpleTextUtil.checkFooter(input);
FieldInfos fieldInfos = new FieldInfos(infos);
success = true;
return fieldInfos;
} finally {
if (success) {
input.close();
} else {
IOUtils.closeWhileHandlingException(input);
}
}
}
public DocValuesType docValuesType(String dvType) {
if ("false".equals(dvType)) {
return null;
} else {
return DocValuesType.valueOf(dvType);
}
}
private String readString(int offset, BytesRefBuilder scratch) {
return new String(scratch.bytes(), offset, scratch.length()-offset, StandardCharsets.UTF_8);
}
}

View File

@ -1,151 +0,0 @@
package org.apache.lucene.codecs.simpletext;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.Map;
import org.apache.lucene.codecs.FieldInfosWriter;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfo.DocValuesType;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.IOUtils;
/**
* writes plaintext field infos files
* <p>
* <b><font color="red">FOR RECREATIONAL USE ONLY</font></B>
* @lucene.experimental
*/
public class SimpleTextFieldInfosWriter extends FieldInfosWriter {
/** Extension of field infos */
static final String FIELD_INFOS_EXTENSION = "inf";
static final BytesRef NUMFIELDS = new BytesRef("number of fields ");
static final BytesRef NAME = new BytesRef(" name ");
static final BytesRef NUMBER = new BytesRef(" number ");
static final BytesRef ISINDEXED = new BytesRef(" indexed ");
static final BytesRef STORETV = new BytesRef(" term vectors ");
static final BytesRef STORETVPOS = new BytesRef(" term vector positions ");
static final BytesRef STORETVOFF = new BytesRef(" term vector offsets ");
static final BytesRef PAYLOADS = new BytesRef(" payloads ");
static final BytesRef NORMS = new BytesRef(" norms ");
static final BytesRef NORMS_TYPE = new BytesRef(" norms type ");
static final BytesRef DOCVALUES = new BytesRef(" doc values ");
static final BytesRef DOCVALUES_GEN = new BytesRef(" doc values gen ");
static final BytesRef INDEXOPTIONS = new BytesRef(" index options ");
static final BytesRef NUM_ATTS = new BytesRef(" attributes ");
final static BytesRef ATT_KEY = new BytesRef(" key ");
final static BytesRef ATT_VALUE = new BytesRef(" value ");
@Override
public void write(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, FieldInfos infos, IOContext context) throws IOException {
final String fileName = IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, FIELD_INFOS_EXTENSION);
IndexOutput out = directory.createOutput(fileName, context);
BytesRefBuilder scratch = new BytesRefBuilder();
boolean success = false;
try {
SimpleTextUtil.write(out, NUMFIELDS);
SimpleTextUtil.write(out, Integer.toString(infos.size()), scratch);
SimpleTextUtil.writeNewline(out);
for (FieldInfo fi : infos) {
SimpleTextUtil.write(out, NAME);
SimpleTextUtil.write(out, fi.name, scratch);
SimpleTextUtil.writeNewline(out);
SimpleTextUtil.write(out, NUMBER);
SimpleTextUtil.write(out, Integer.toString(fi.number), scratch);
SimpleTextUtil.writeNewline(out);
SimpleTextUtil.write(out, ISINDEXED);
SimpleTextUtil.write(out, Boolean.toString(fi.isIndexed()), scratch);
SimpleTextUtil.writeNewline(out);
if (fi.isIndexed()) {
assert fi.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0 || !fi.hasPayloads();
SimpleTextUtil.write(out, INDEXOPTIONS);
SimpleTextUtil.write(out, fi.getIndexOptions().toString(), scratch);
SimpleTextUtil.writeNewline(out);
}
SimpleTextUtil.write(out, STORETV);
SimpleTextUtil.write(out, Boolean.toString(fi.hasVectors()), scratch);
SimpleTextUtil.writeNewline(out);
SimpleTextUtil.write(out, PAYLOADS);
SimpleTextUtil.write(out, Boolean.toString(fi.hasPayloads()), scratch);
SimpleTextUtil.writeNewline(out);
SimpleTextUtil.write(out, NORMS);
SimpleTextUtil.write(out, Boolean.toString(!fi.omitsNorms()), scratch);
SimpleTextUtil.writeNewline(out);
SimpleTextUtil.write(out, NORMS_TYPE);
SimpleTextUtil.write(out, getDocValuesType(fi.getNormType()), scratch);
SimpleTextUtil.writeNewline(out);
SimpleTextUtil.write(out, DOCVALUES);
SimpleTextUtil.write(out, getDocValuesType(fi.getDocValuesType()), scratch);
SimpleTextUtil.writeNewline(out);
SimpleTextUtil.write(out, DOCVALUES_GEN);
SimpleTextUtil.write(out, Long.toString(fi.getDocValuesGen()), scratch);
SimpleTextUtil.writeNewline(out);
Map<String,String> atts = fi.attributes();
int numAtts = atts == null ? 0 : atts.size();
SimpleTextUtil.write(out, NUM_ATTS);
SimpleTextUtil.write(out, Integer.toString(numAtts), scratch);
SimpleTextUtil.writeNewline(out);
if (numAtts > 0) {
for (Map.Entry<String,String> entry : atts.entrySet()) {
SimpleTextUtil.write(out, ATT_KEY);
SimpleTextUtil.write(out, entry.getKey(), scratch);
SimpleTextUtil.writeNewline(out);
SimpleTextUtil.write(out, ATT_VALUE);
SimpleTextUtil.write(out, entry.getValue(), scratch);
SimpleTextUtil.writeNewline(out);
}
}
}
SimpleTextUtil.writeChecksum(out, scratch);
success = true;
} finally {
if (success) {
out.close();
} else {
IOUtils.closeWhileHandlingException(out);
}
}
}
private static String getDocValuesType(DocValuesType type) {
return type == null ? "false" : type.toString();
}
}

View File

@ -17,9 +17,28 @@ package org.apache.lucene.codecs.simpletext;
* limitations under the License. * limitations under the License.
*/ */
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.text.ParseException;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.codecs.SegmentInfoFormat; import org.apache.lucene.codecs.SegmentInfoFormat;
import org.apache.lucene.codecs.SegmentInfoReader; import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.codecs.SegmentInfoWriter; import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.Version;
/** /**
* plain text segments file format. * plain text segments file format.
@ -28,18 +47,163 @@ import org.apache.lucene.codecs.SegmentInfoWriter;
* @lucene.experimental * @lucene.experimental
*/ */
public class SimpleTextSegmentInfoFormat extends SegmentInfoFormat { public class SimpleTextSegmentInfoFormat extends SegmentInfoFormat {
private final SegmentInfoReader reader = new SimpleTextSegmentInfoReader(); final static BytesRef SI_VERSION = new BytesRef(" version ");
private final SegmentInfoWriter writer = new SimpleTextSegmentInfoWriter(); final static BytesRef SI_DOCCOUNT = new BytesRef(" number of documents ");
final static BytesRef SI_USECOMPOUND = new BytesRef(" uses compound file ");
final static BytesRef SI_NUM_DIAG = new BytesRef(" diagnostics ");
final static BytesRef SI_DIAG_KEY = new BytesRef(" key ");
final static BytesRef SI_DIAG_VALUE = new BytesRef(" value ");
final static BytesRef SI_NUM_FILES = new BytesRef(" files ");
final static BytesRef SI_FILE = new BytesRef(" file ");
final static BytesRef SI_ID = new BytesRef(" id ");
public static final String SI_EXTENSION = "si"; public static final String SI_EXTENSION = "si";
@Override @Override
public SegmentInfoReader getSegmentInfoReader() { public SegmentInfo read(Directory directory, String segmentName, IOContext context) throws IOException {
return reader; BytesRefBuilder scratch = new BytesRefBuilder();
String segFileName = IndexFileNames.segmentFileName(segmentName, "", SimpleTextSegmentInfoFormat.SI_EXTENSION);
ChecksumIndexInput input = directory.openChecksumInput(segFileName, context);
boolean success = false;
try {
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), SI_VERSION);
final Version version;
try {
version = Version.parse(readString(SI_VERSION.length, scratch));
} catch (ParseException pe) {
throw new CorruptIndexException("unable to parse version string: " + pe.getMessage(), input, pe);
}
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), SI_DOCCOUNT);
final int docCount = Integer.parseInt(readString(SI_DOCCOUNT.length, scratch));
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), SI_USECOMPOUND);
final boolean isCompoundFile = Boolean.parseBoolean(readString(SI_USECOMPOUND.length, scratch));
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), SI_NUM_DIAG);
int numDiag = Integer.parseInt(readString(SI_NUM_DIAG.length, scratch));
Map<String,String> diagnostics = new HashMap<>();
for (int i = 0; i < numDiag; i++) {
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), SI_DIAG_KEY);
String key = readString(SI_DIAG_KEY.length, scratch);
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), SI_DIAG_VALUE);
String value = readString(SI_DIAG_VALUE.length, scratch);
diagnostics.put(key, value);
}
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), SI_NUM_FILES);
int numFiles = Integer.parseInt(readString(SI_NUM_FILES.length, scratch));
Set<String> files = new HashSet<>();
for (int i = 0; i < numFiles; i++) {
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), SI_FILE);
String fileName = readString(SI_FILE.length, scratch);
files.add(fileName);
}
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), SI_ID);
final byte[] id = Arrays.copyOfRange(scratch.bytes(), SI_ID.length, scratch.length());
SimpleTextUtil.checkFooter(input);
SegmentInfo info = new SegmentInfo(directory, version, segmentName, docCount,
isCompoundFile, null, diagnostics, id);
info.setFiles(files);
success = true;
return info;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(input);
} else {
input.close();
}
}
} }
private String readString(int offset, BytesRefBuilder scratch) {
return new String(scratch.bytes(), offset, scratch.length()-offset, StandardCharsets.UTF_8);
}
@Override @Override
public SegmentInfoWriter getSegmentInfoWriter() { public void write(Directory dir, SegmentInfo si, IOContext ioContext) throws IOException {
return writer;
String segFileName = IndexFileNames.segmentFileName(si.name, "", SimpleTextSegmentInfoFormat.SI_EXTENSION);
si.addFile(segFileName);
boolean success = false;
IndexOutput output = dir.createOutput(segFileName, ioContext);
try {
BytesRefBuilder scratch = new BytesRefBuilder();
SimpleTextUtil.write(output, SI_VERSION);
SimpleTextUtil.write(output, si.getVersion().toString(), scratch);
SimpleTextUtil.writeNewline(output);
SimpleTextUtil.write(output, SI_DOCCOUNT);
SimpleTextUtil.write(output, Integer.toString(si.getDocCount()), scratch);
SimpleTextUtil.writeNewline(output);
SimpleTextUtil.write(output, SI_USECOMPOUND);
SimpleTextUtil.write(output, Boolean.toString(si.getUseCompoundFile()), scratch);
SimpleTextUtil.writeNewline(output);
Map<String,String> diagnostics = si.getDiagnostics();
int numDiagnostics = diagnostics == null ? 0 : diagnostics.size();
SimpleTextUtil.write(output, SI_NUM_DIAG);
SimpleTextUtil.write(output, Integer.toString(numDiagnostics), scratch);
SimpleTextUtil.writeNewline(output);
if (numDiagnostics > 0) {
for (Map.Entry<String,String> diagEntry : diagnostics.entrySet()) {
SimpleTextUtil.write(output, SI_DIAG_KEY);
SimpleTextUtil.write(output, diagEntry.getKey(), scratch);
SimpleTextUtil.writeNewline(output);
SimpleTextUtil.write(output, SI_DIAG_VALUE);
SimpleTextUtil.write(output, diagEntry.getValue(), scratch);
SimpleTextUtil.writeNewline(output);
}
}
Set<String> files = si.files();
int numFiles = files == null ? 0 : files.size();
SimpleTextUtil.write(output, SI_NUM_FILES);
SimpleTextUtil.write(output, Integer.toString(numFiles), scratch);
SimpleTextUtil.writeNewline(output);
if (numFiles > 0) {
for(String fileName : files) {
SimpleTextUtil.write(output, SI_FILE);
SimpleTextUtil.write(output, fileName, scratch);
SimpleTextUtil.writeNewline(output);
}
}
SimpleTextUtil.write(output, SI_ID);
SimpleTextUtil.write(output, new BytesRef(si.getId()));
SimpleTextUtil.writeNewline(output);
SimpleTextUtil.writeChecksum(output, scratch);
success = true;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(output);
IOUtils.deleteFilesIgnoringExceptions(dir, segFileName);
} else {
output.close();
}
}
} }
} }

View File

@ -1,134 +0,0 @@
package org.apache.lucene.codecs.simpletext;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.text.ParseException;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.codecs.SegmentInfoReader;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.Version;
import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_DIAG_KEY;
import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_DIAG_VALUE;
import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_DOCCOUNT;
import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_FILE;
import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_NUM_DIAG;
import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_NUM_FILES;
import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_USECOMPOUND;
import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_ID;
import static org.apache.lucene.codecs.simpletext.SimpleTextSegmentInfoWriter.SI_VERSION;
/**
* reads plaintext segments files
* <p>
* <b><font color="red">FOR RECREATIONAL USE ONLY</font></B>
* @lucene.experimental
*/
public class SimpleTextSegmentInfoReader extends SegmentInfoReader {
@Override
public SegmentInfo read(Directory directory, String segmentName, IOContext context) throws IOException {
BytesRefBuilder scratch = new BytesRefBuilder();
String segFileName = IndexFileNames.segmentFileName(segmentName, "", SimpleTextSegmentInfoFormat.SI_EXTENSION);
ChecksumIndexInput input = directory.openChecksumInput(segFileName, context);
boolean success = false;
try {
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), SI_VERSION);
final Version version;
try {
version = Version.parse(readString(SI_VERSION.length, scratch));
} catch (ParseException pe) {
throw new CorruptIndexException("unable to parse version string: " + pe.getMessage(), input, pe);
}
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), SI_DOCCOUNT);
final int docCount = Integer.parseInt(readString(SI_DOCCOUNT.length, scratch));
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), SI_USECOMPOUND);
final boolean isCompoundFile = Boolean.parseBoolean(readString(SI_USECOMPOUND.length, scratch));
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), SI_NUM_DIAG);
int numDiag = Integer.parseInt(readString(SI_NUM_DIAG.length, scratch));
Map<String,String> diagnostics = new HashMap<>();
for (int i = 0; i < numDiag; i++) {
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), SI_DIAG_KEY);
String key = readString(SI_DIAG_KEY.length, scratch);
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), SI_DIAG_VALUE);
String value = readString(SI_DIAG_VALUE.length, scratch);
diagnostics.put(key, value);
}
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), SI_NUM_FILES);
int numFiles = Integer.parseInt(readString(SI_NUM_FILES.length, scratch));
Set<String> files = new HashSet<>();
for (int i = 0; i < numFiles; i++) {
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), SI_FILE);
String fileName = readString(SI_FILE.length, scratch);
files.add(fileName);
}
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), SI_ID);
final byte[] id = Arrays.copyOfRange(scratch.bytes(), SI_ID.length, scratch.length());
SimpleTextUtil.checkFooter(input);
SegmentInfo info = new SegmentInfo(directory, version, segmentName, docCount,
isCompoundFile, null, diagnostics, id);
info.setFiles(files);
success = true;
return info;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(input);
} else {
input.close();
}
}
}
private String readString(int offset, BytesRefBuilder scratch) {
return new String(scratch.bytes(), offset, scratch.length()-offset, StandardCharsets.UTF_8);
}
}

View File

@ -1,124 +0,0 @@
package org.apache.lucene.codecs.simpletext;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.codecs.SegmentInfoWriter;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.IOUtils;
/**
* writes plaintext segments files
* <p>
* <b><font color="red">FOR RECREATIONAL USE ONLY</font></B>
* @lucene.experimental
*/
public class SimpleTextSegmentInfoWriter extends SegmentInfoWriter {
final static BytesRef SI_VERSION = new BytesRef(" version ");
final static BytesRef SI_DOCCOUNT = new BytesRef(" number of documents ");
final static BytesRef SI_USECOMPOUND = new BytesRef(" uses compound file ");
final static BytesRef SI_NUM_DIAG = new BytesRef(" diagnostics ");
final static BytesRef SI_DIAG_KEY = new BytesRef(" key ");
final static BytesRef SI_DIAG_VALUE = new BytesRef(" value ");
final static BytesRef SI_NUM_FILES = new BytesRef(" files ");
final static BytesRef SI_FILE = new BytesRef(" file ");
final static BytesRef SI_ID = new BytesRef(" id ");
@Override
public void write(Directory dir, SegmentInfo si, FieldInfos fis, IOContext ioContext) throws IOException {
String segFileName = IndexFileNames.segmentFileName(si.name, "", SimpleTextSegmentInfoFormat.SI_EXTENSION);
si.addFile(segFileName);
boolean success = false;
IndexOutput output = dir.createOutput(segFileName, ioContext);
try {
BytesRefBuilder scratch = new BytesRefBuilder();
SimpleTextUtil.write(output, SI_VERSION);
SimpleTextUtil.write(output, si.getVersion().toString(), scratch);
SimpleTextUtil.writeNewline(output);
SimpleTextUtil.write(output, SI_DOCCOUNT);
SimpleTextUtil.write(output, Integer.toString(si.getDocCount()), scratch);
SimpleTextUtil.writeNewline(output);
SimpleTextUtil.write(output, SI_USECOMPOUND);
SimpleTextUtil.write(output, Boolean.toString(si.getUseCompoundFile()), scratch);
SimpleTextUtil.writeNewline(output);
Map<String,String> diagnostics = si.getDiagnostics();
int numDiagnostics = diagnostics == null ? 0 : diagnostics.size();
SimpleTextUtil.write(output, SI_NUM_DIAG);
SimpleTextUtil.write(output, Integer.toString(numDiagnostics), scratch);
SimpleTextUtil.writeNewline(output);
if (numDiagnostics > 0) {
for (Map.Entry<String,String> diagEntry : diagnostics.entrySet()) {
SimpleTextUtil.write(output, SI_DIAG_KEY);
SimpleTextUtil.write(output, diagEntry.getKey(), scratch);
SimpleTextUtil.writeNewline(output);
SimpleTextUtil.write(output, SI_DIAG_VALUE);
SimpleTextUtil.write(output, diagEntry.getValue(), scratch);
SimpleTextUtil.writeNewline(output);
}
}
Set<String> files = si.files();
int numFiles = files == null ? 0 : files.size();
SimpleTextUtil.write(output, SI_NUM_FILES);
SimpleTextUtil.write(output, Integer.toString(numFiles), scratch);
SimpleTextUtil.writeNewline(output);
if (numFiles > 0) {
for(String fileName : files) {
SimpleTextUtil.write(output, SI_FILE);
SimpleTextUtil.write(output, fileName, scratch);
SimpleTextUtil.writeNewline(output);
}
}
SimpleTextUtil.write(output, SI_ID);
SimpleTextUtil.write(output, new BytesRef(si.getId()));
SimpleTextUtil.writeNewline(output);
SimpleTextUtil.writeChecksum(output, scratch);
success = true;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(output);
IOUtils.deleteFilesIgnoringExceptions(dir, segFileName);
} else {
output.close();
}
}
}
}

View File

@ -1,4 +1,4 @@
package org.apache.lucene.codecs; package org.apache.lucene.codecs.simpletext;
/* /*
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more
@ -17,24 +17,17 @@ package org.apache.lucene.codecs;
* limitations under the License. * limitations under the License.
*/ */
import java.io.IOException; import org.apache.lucene.codecs.Codec;
import org.apache.lucene.index.BaseFieldInfoFormatTestCase;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
/** /**
* Codec API for reading {@link FieldInfos}. * Tests SimpleTextFieldInfoFormat
* @lucene.experimental
*/ */
public abstract class FieldInfosReader { public class TestSimpleTextFieldInfoFormat extends BaseFieldInfoFormatTestCase {
/** Sole constructor. (For invocation by subclass private final Codec codec = new SimpleTextCodec();
* constructors, typically implicit.) */
protected FieldInfosReader() {
}
/** Read the {@link FieldInfos} previously written with {@link @Override
* FieldInfosWriter}. */ protected Codec getCodec() {
public abstract FieldInfos read(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, IOContext iocontext) throws IOException; return codec;
}
} }

View File

@ -0,0 +1,39 @@
package org.apache.lucene.codecs.simpletext;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.index.BaseSegmentInfoFormatTestCase;
import org.apache.lucene.util.Version;
/**
* Tests SimpleTextSegmentInfoFormat
*/
public class TestSimpleTextSegmentInfoFormat extends BaseSegmentInfoFormatTestCase {
private final Codec codec = new SimpleTextCodec();
@Override
protected Version[] getVersions() {
return new Version[] { Version.LATEST };
}
@Override
protected Codec getCodec() {
return codec;
}
}

View File

@ -20,6 +20,9 @@ package org.apache.lucene.codecs;
import java.io.IOException; import java.io.IOException;
import org.apache.lucene.index.FieldInfos; // javadocs import org.apache.lucene.index.FieldInfos; // javadocs
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
/** /**
* Encodes/decodes {@link FieldInfos} * Encodes/decodes {@link FieldInfos}
@ -30,12 +33,11 @@ public abstract class FieldInfosFormat {
* constructors, typically implicit.) */ * constructors, typically implicit.) */
protected FieldInfosFormat() { protected FieldInfosFormat() {
} }
/** Read the {@link FieldInfos} previously written with {@link #write}. */
public abstract FieldInfos read(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, IOContext iocontext) throws IOException;
/** Returns a {@link FieldInfosReader} to read field infos /** Writes the provided {@link FieldInfos} to the
* from the index */ * directory. */
public abstract FieldInfosReader getFieldInfosReader() throws IOException; public abstract void write(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, FieldInfos infos, IOContext context) throws IOException;
/** Returns a {@link FieldInfosWriter} to write field infos
* to the index */
public abstract FieldInfosWriter getFieldInfosWriter() throws IOException;
} }

View File

@ -17,7 +17,11 @@ package org.apache.lucene.codecs;
* limitations under the License. * limitations under the License.
*/ */
import java.io.IOException;
import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
/** /**
* Expert: Controls the format of the * Expert: Controls the format of the
@ -33,11 +37,18 @@ public abstract class SegmentInfoFormat {
protected SegmentInfoFormat() { protected SegmentInfoFormat() {
} }
/** Returns the {@link SegmentInfoReader} for reading /**
* {@link SegmentInfo} instances. */ * Read {@link SegmentInfo} data from a directory.
public abstract SegmentInfoReader getSegmentInfoReader(); * @param directory directory to read from
* @param segmentName name of the segment to read
* @return infos instance to be populated with data
* @throws IOException If an I/O error occurs
*/
public abstract SegmentInfo read(Directory directory, String segmentName, IOContext context) throws IOException;
/** Returns the {@link SegmentInfoWriter} for writing /**
* {@link SegmentInfo} instances. */ * Write {@link SegmentInfo} data.
public abstract SegmentInfoWriter getSegmentInfoWriter(); * @throws IOException If an I/O error occurs
*/
public abstract void write(Directory dir, SegmentInfo info, IOContext ioContext) throws IOException;
} }

View File

@ -1,46 +0,0 @@
package org.apache.lucene.codecs;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
/**
* Specifies an API for classes that can read {@link SegmentInfo} information.
* @lucene.experimental
*/
public abstract class SegmentInfoReader {
/** Sole constructor. (For invocation by subclass
* constructors, typically implicit.) */
protected SegmentInfoReader() {
}
/**
* Read {@link SegmentInfo} data from a directory.
* @param directory directory to read from
* @param segmentName name of the segment to read
* @return infos instance to be populated with data
* @throws IOException If an I/O error occurs
*/
public abstract SegmentInfo read(Directory directory, String segmentName, IOContext context) throws IOException;
}

View File

@ -18,14 +18,25 @@ package org.apache.lucene.codecs.lucene50;
*/ */
import java.io.IOException; import java.io.IOException;
import java.util.Collections;
import java.util.Map;
import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.DocValuesFormat; import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.FieldInfosFormat; import org.apache.lucene.codecs.FieldInfosFormat;
import org.apache.lucene.codecs.FieldInfosReader; import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.codecs.FieldInfosWriter; import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.FieldInfo.DocValuesType; import org.apache.lucene.index.FieldInfo.DocValuesType;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.DataOutput; import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
/** /**
* Lucene 5.0 Field Infos format. * Lucene 5.0 Field Infos format.
@ -91,21 +102,148 @@ import org.apache.lucene.store.DataOutput;
* @lucene.experimental * @lucene.experimental
*/ */
public final class Lucene50FieldInfosFormat extends FieldInfosFormat { public final class Lucene50FieldInfosFormat extends FieldInfosFormat {
private final FieldInfosReader reader = new Lucene50FieldInfosReader();
private final FieldInfosWriter writer = new Lucene50FieldInfosWriter();
/** Sole constructor. */ /** Sole constructor. */
public Lucene50FieldInfosFormat() { public Lucene50FieldInfosFormat() {
} }
@Override @Override
public FieldInfosReader getFieldInfosReader() throws IOException { public FieldInfos read(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, IOContext context) throws IOException {
return reader; final String fileName = IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, Lucene50FieldInfosFormat.EXTENSION);
try (ChecksumIndexInput input = directory.openChecksumInput(fileName, context)) {
Throwable priorE = null;
FieldInfo infos[] = null;
try {
CodecUtil.checkSegmentHeader(input, Lucene50FieldInfosFormat.CODEC_NAME,
Lucene50FieldInfosFormat.FORMAT_START,
Lucene50FieldInfosFormat.FORMAT_CURRENT,
segmentInfo.getId(), segmentSuffix);
final int size = input.readVInt(); //read in the size
infos = new FieldInfo[size];
for (int i = 0; i < size; i++) {
String name = input.readString();
final int fieldNumber = input.readVInt();
if (fieldNumber < 0) {
throw new CorruptIndexException("invalid field number for field: " + name + ", fieldNumber=" + fieldNumber, input);
}
byte bits = input.readByte();
boolean isIndexed = (bits & Lucene50FieldInfosFormat.IS_INDEXED) != 0;
boolean storeTermVector = (bits & Lucene50FieldInfosFormat.STORE_TERMVECTOR) != 0;
boolean omitNorms = (bits & Lucene50FieldInfosFormat.OMIT_NORMS) != 0;
boolean storePayloads = (bits & Lucene50FieldInfosFormat.STORE_PAYLOADS) != 0;
final IndexOptions indexOptions;
if (!isIndexed) {
indexOptions = null;
} else if ((bits & Lucene50FieldInfosFormat.OMIT_TERM_FREQ_AND_POSITIONS) != 0) {
indexOptions = IndexOptions.DOCS_ONLY;
} else if ((bits & Lucene50FieldInfosFormat.OMIT_POSITIONS) != 0) {
indexOptions = IndexOptions.DOCS_AND_FREQS;
} else if ((bits & Lucene50FieldInfosFormat.STORE_OFFSETS_IN_POSTINGS) != 0) {
indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
} else {
indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
}
// DV Types are packed in one byte
byte val = input.readByte();
final DocValuesType docValuesType = getDocValuesType(input, (byte) (val & 0x0F));
final DocValuesType normsType = getDocValuesType(input, (byte) ((val >>> 4) & 0x0F));
final long dvGen = input.readLong();
final Map<String,String> attributes = input.readStringStringMap();
try {
infos[i] = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector, omitNorms, storePayloads,
indexOptions, docValuesType, normsType, dvGen, Collections.unmodifiableMap(attributes));
infos[i].checkConsistency();
} catch (IllegalStateException e) {
throw new CorruptIndexException("invalid fieldinfo for field: " + name + ", fieldNumber=" + fieldNumber, input, e);
}
}
} catch (Throwable exception) {
priorE = exception;
} finally {
CodecUtil.checkFooter(input, priorE);
}
return new FieldInfos(infos);
}
}
private static DocValuesType getDocValuesType(IndexInput input, byte b) throws IOException {
if (b == 0) {
return null;
} else if (b == 1) {
return DocValuesType.NUMERIC;
} else if (b == 2) {
return DocValuesType.BINARY;
} else if (b == 3) {
return DocValuesType.SORTED;
} else if (b == 4) {
return DocValuesType.SORTED_SET;
} else if (b == 5) {
return DocValuesType.SORTED_NUMERIC;
} else {
throw new CorruptIndexException("invalid docvalues byte: " + b, input);
}
} }
@Override @Override
public FieldInfosWriter getFieldInfosWriter() throws IOException { public void write(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, FieldInfos infos, IOContext context) throws IOException {
return writer; final String fileName = IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, Lucene50FieldInfosFormat.EXTENSION);
try (IndexOutput output = directory.createOutput(fileName, context)) {
CodecUtil.writeSegmentHeader(output, Lucene50FieldInfosFormat.CODEC_NAME, Lucene50FieldInfosFormat.FORMAT_CURRENT, segmentInfo.getId(), segmentSuffix);
output.writeVInt(infos.size());
for (FieldInfo fi : infos) {
fi.checkConsistency();
IndexOptions indexOptions = fi.getIndexOptions();
byte bits = 0x0;
if (fi.hasVectors()) bits |= Lucene50FieldInfosFormat.STORE_TERMVECTOR;
if (fi.omitsNorms()) bits |= Lucene50FieldInfosFormat.OMIT_NORMS;
if (fi.hasPayloads()) bits |= Lucene50FieldInfosFormat.STORE_PAYLOADS;
if (fi.isIndexed()) {
bits |= Lucene50FieldInfosFormat.IS_INDEXED;
assert indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0 || !fi.hasPayloads();
if (indexOptions == IndexOptions.DOCS_ONLY) {
bits |= Lucene50FieldInfosFormat.OMIT_TERM_FREQ_AND_POSITIONS;
} else if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) {
bits |= Lucene50FieldInfosFormat.STORE_OFFSETS_IN_POSTINGS;
} else if (indexOptions == IndexOptions.DOCS_AND_FREQS) {
bits |= Lucene50FieldInfosFormat.OMIT_POSITIONS;
}
}
output.writeString(fi.name);
output.writeVInt(fi.number);
output.writeByte(bits);
// pack the DV types in one byte
final byte dv = docValuesByte(fi.getDocValuesType());
final byte nrm = docValuesByte(fi.getNormType());
assert (dv & (~0xF)) == 0 && (nrm & (~0x0F)) == 0;
byte val = (byte) (0xff & ((nrm << 4) | dv));
output.writeByte(val);
output.writeLong(fi.getDocValuesGen());
output.writeStringStringMap(fi.attributes());
}
CodecUtil.writeFooter(output);
}
}
private static byte docValuesByte(DocValuesType type) {
if (type == null) {
return 0;
} else if (type == DocValuesType.NUMERIC) {
return 1;
} else if (type == DocValuesType.BINARY) {
return 2;
} else if (type == DocValuesType.SORTED) {
return 3;
} else if (type == DocValuesType.SORTED_SET) {
return 4;
} else if (type == DocValuesType.SORTED_NUMERIC) {
return 5;
} else {
throw new AssertionError();
}
} }
/** Extension of field infos */ /** Extension of field infos */

View File

@ -1,129 +0,0 @@
package org.apache.lucene.codecs.lucene50;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.Collections;
import java.util.Map;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.FieldInfosReader;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.FieldInfo.DocValuesType;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
/**
* Lucene 5.0 FieldInfos reader.
*
* @lucene.experimental
* @see Lucene50FieldInfosFormat
*/
final class Lucene50FieldInfosReader extends FieldInfosReader {
/** Sole constructor. */
public Lucene50FieldInfosReader() {
}
@Override
public FieldInfos read(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, IOContext context) throws IOException {
final String fileName = IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, Lucene50FieldInfosFormat.EXTENSION);
try (ChecksumIndexInput input = directory.openChecksumInput(fileName, context)) {
Throwable priorE = null;
FieldInfo infos[] = null;
try {
CodecUtil.checkSegmentHeader(input, Lucene50FieldInfosFormat.CODEC_NAME,
Lucene50FieldInfosFormat.FORMAT_START,
Lucene50FieldInfosFormat.FORMAT_CURRENT,
segmentInfo.getId(), segmentSuffix);
final int size = input.readVInt(); //read in the size
infos = new FieldInfo[size];
for (int i = 0; i < size; i++) {
String name = input.readString();
final int fieldNumber = input.readVInt();
if (fieldNumber < 0) {
throw new CorruptIndexException("invalid field number for field: " + name + ", fieldNumber=" + fieldNumber, input);
}
byte bits = input.readByte();
boolean isIndexed = (bits & Lucene50FieldInfosFormat.IS_INDEXED) != 0;
boolean storeTermVector = (bits & Lucene50FieldInfosFormat.STORE_TERMVECTOR) != 0;
boolean omitNorms = (bits & Lucene50FieldInfosFormat.OMIT_NORMS) != 0;
boolean storePayloads = (bits & Lucene50FieldInfosFormat.STORE_PAYLOADS) != 0;
final IndexOptions indexOptions;
if (!isIndexed) {
indexOptions = null;
} else if ((bits & Lucene50FieldInfosFormat.OMIT_TERM_FREQ_AND_POSITIONS) != 0) {
indexOptions = IndexOptions.DOCS_ONLY;
} else if ((bits & Lucene50FieldInfosFormat.OMIT_POSITIONS) != 0) {
indexOptions = IndexOptions.DOCS_AND_FREQS;
} else if ((bits & Lucene50FieldInfosFormat.STORE_OFFSETS_IN_POSTINGS) != 0) {
indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
} else {
indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
}
// DV Types are packed in one byte
byte val = input.readByte();
final DocValuesType docValuesType = getDocValuesType(input, (byte) (val & 0x0F));
final DocValuesType normsType = getDocValuesType(input, (byte) ((val >>> 4) & 0x0F));
final long dvGen = input.readLong();
final Map<String,String> attributes = input.readStringStringMap();
try {
infos[i] = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector, omitNorms, storePayloads,
indexOptions, docValuesType, normsType, dvGen, Collections.unmodifiableMap(attributes));
infos[i].checkConsistency();
} catch (IllegalStateException e) {
throw new CorruptIndexException("invalid fieldinfo for field: " + name + ", fieldNumber=" + fieldNumber, input, e);
}
}
} catch (Throwable exception) {
priorE = exception;
} finally {
CodecUtil.checkFooter(input, priorE);
}
return new FieldInfos(infos);
}
}
private static DocValuesType getDocValuesType(IndexInput input, byte b) throws IOException {
if (b == 0) {
return null;
} else if (b == 1) {
return DocValuesType.NUMERIC;
} else if (b == 2) {
return DocValuesType.BINARY;
} else if (b == 3) {
return DocValuesType.SORTED;
} else if (b == 4) {
return DocValuesType.SORTED_SET;
} else if (b == 5) {
return DocValuesType.SORTED_NUMERIC;
} else {
throw new CorruptIndexException("invalid docvalues byte: " + b, input);
}
}
}

View File

@ -1,104 +0,0 @@
package org.apache.lucene.codecs.lucene50;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.FieldInfosWriter;
import org.apache.lucene.index.FieldInfo.DocValuesType;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
/**
* Lucene 5.0 FieldInfos writer.
*
* @see Lucene50FieldInfosFormat
* @lucene.experimental
*/
final class Lucene50FieldInfosWriter extends FieldInfosWriter {
/** Sole constructor. */
public Lucene50FieldInfosWriter() {
}
@Override
public void write(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, FieldInfos infos, IOContext context) throws IOException {
final String fileName = IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, Lucene50FieldInfosFormat.EXTENSION);
try (IndexOutput output = directory.createOutput(fileName, context)) {
CodecUtil.writeSegmentHeader(output, Lucene50FieldInfosFormat.CODEC_NAME, Lucene50FieldInfosFormat.FORMAT_CURRENT, segmentInfo.getId(), segmentSuffix);
output.writeVInt(infos.size());
for (FieldInfo fi : infos) {
fi.checkConsistency();
IndexOptions indexOptions = fi.getIndexOptions();
byte bits = 0x0;
if (fi.hasVectors()) bits |= Lucene50FieldInfosFormat.STORE_TERMVECTOR;
if (fi.omitsNorms()) bits |= Lucene50FieldInfosFormat.OMIT_NORMS;
if (fi.hasPayloads()) bits |= Lucene50FieldInfosFormat.STORE_PAYLOADS;
if (fi.isIndexed()) {
bits |= Lucene50FieldInfosFormat.IS_INDEXED;
assert indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0 || !fi.hasPayloads();
if (indexOptions == IndexOptions.DOCS_ONLY) {
bits |= Lucene50FieldInfosFormat.OMIT_TERM_FREQ_AND_POSITIONS;
} else if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) {
bits |= Lucene50FieldInfosFormat.STORE_OFFSETS_IN_POSTINGS;
} else if (indexOptions == IndexOptions.DOCS_AND_FREQS) {
bits |= Lucene50FieldInfosFormat.OMIT_POSITIONS;
}
}
output.writeString(fi.name);
output.writeVInt(fi.number);
output.writeByte(bits);
// pack the DV types in one byte
final byte dv = docValuesByte(fi.getDocValuesType());
final byte nrm = docValuesByte(fi.getNormType());
assert (dv & (~0xF)) == 0 && (nrm & (~0x0F)) == 0;
byte val = (byte) (0xff & ((nrm << 4) | dv));
output.writeByte(val);
output.writeLong(fi.getDocValuesGen());
output.writeStringStringMap(fi.attributes());
}
CodecUtil.writeFooter(output);
}
}
private static byte docValuesByte(DocValuesType type) {
if (type == null) {
return 0;
} else if (type == DocValuesType.NUMERIC) {
return 1;
} else if (type == DocValuesType.BINARY) {
return 2;
} else if (type == DocValuesType.SORTED) {
return 3;
} else if (type == DocValuesType.SORTED_SET) {
return 4;
} else if (type == DocValuesType.SORTED_NUMERIC) {
return 5;
} else {
throw new AssertionError();
}
}
}

View File

@ -17,14 +17,25 @@ package org.apache.lucene.codecs.lucene50;
* limitations under the License. * limitations under the License.
*/ */
import java.io.IOException;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.SegmentInfoFormat; import org.apache.lucene.codecs.SegmentInfoFormat;
import org.apache.lucene.codecs.SegmentInfoReader; import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.codecs.SegmentInfoWriter; import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.IndexWriter; // javadocs import org.apache.lucene.index.IndexWriter; // javadocs
import org.apache.lucene.index.SegmentInfo; // javadocs import org.apache.lucene.index.SegmentInfo; // javadocs
import org.apache.lucene.index.SegmentInfos; // javadocs import org.apache.lucene.index.SegmentInfos; // javadocs
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.DataOutput; // javadocs import org.apache.lucene.store.DataOutput; // javadocs
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.Version;
/** /**
* Lucene 5.0 Segment info format. * Lucene 5.0 Segment info format.
@ -67,21 +78,86 @@ import org.apache.lucene.store.DataOutput; // javadocs
* @lucene.experimental * @lucene.experimental
*/ */
public class Lucene50SegmentInfoFormat extends SegmentInfoFormat { public class Lucene50SegmentInfoFormat extends SegmentInfoFormat {
private final SegmentInfoReader reader = new Lucene50SegmentInfoReader();
private final SegmentInfoWriter writer = new Lucene50SegmentInfoWriter();
/** Sole constructor. */ /** Sole constructor. */
public Lucene50SegmentInfoFormat() { public Lucene50SegmentInfoFormat() {
} }
@Override @Override
public SegmentInfoReader getSegmentInfoReader() { public SegmentInfo read(Directory dir, String segment, IOContext context) throws IOException {
return reader; final String fileName = IndexFileNames.segmentFileName(segment, "", Lucene50SegmentInfoFormat.SI_EXTENSION);
try (ChecksumIndexInput input = dir.openChecksumInput(fileName, context)) {
Throwable priorE = null;
SegmentInfo si = null;
try {
CodecUtil.checkHeader(input, Lucene50SegmentInfoFormat.CODEC_NAME,
Lucene50SegmentInfoFormat.VERSION_START,
Lucene50SegmentInfoFormat.VERSION_CURRENT);
final Version version = Version.fromBits(input.readInt(), input.readInt(), input.readInt());
final int docCount = input.readInt();
if (docCount < 0) {
throw new CorruptIndexException("invalid docCount: " + docCount, input);
}
final boolean isCompoundFile = input.readByte() == SegmentInfo.YES;
final Map<String,String> diagnostics = input.readStringStringMap();
final Set<String> files = input.readStringSet();
byte[] id = new byte[StringHelper.ID_LENGTH];
input.readBytes(id, 0, id.length);
si = new SegmentInfo(dir, version, segment, docCount, isCompoundFile, null, diagnostics, id);
si.setFiles(files);
} catch (Throwable exception) {
priorE = exception;
} finally {
CodecUtil.checkFooter(input, priorE);
}
return si;
}
} }
@Override @Override
public SegmentInfoWriter getSegmentInfoWriter() { public void write(Directory dir, SegmentInfo si, IOContext ioContext) throws IOException {
return writer; final String fileName = IndexFileNames.segmentFileName(si.name, "", Lucene50SegmentInfoFormat.SI_EXTENSION);
si.addFile(fileName);
boolean success = false;
try (IndexOutput output = dir.createOutput(fileName, ioContext)) {
CodecUtil.writeHeader(output, Lucene50SegmentInfoFormat.CODEC_NAME, Lucene50SegmentInfoFormat.VERSION_CURRENT);
Version version = si.getVersion();
if (version.major < 5) {
throw new IllegalArgumentException("invalid major version: should be >= 5 but got: " + version.major + " segment=" + si);
}
// Write the Lucene version that created this segment, since 3.1
output.writeInt(version.major);
output.writeInt(version.minor);
output.writeInt(version.bugfix);
assert version.prerelease == 0;
output.writeInt(si.getDocCount());
output.writeByte((byte) (si.getUseCompoundFile() ? SegmentInfo.YES : SegmentInfo.NO));
output.writeStringStringMap(si.getDiagnostics());
Set<String> files = si.files();
for (String file : files) {
if (!IndexFileNames.parseSegmentName(file).equals(si.name)) {
throw new IllegalArgumentException("invalid files: expected segment=" + si.name + ", got=" + files);
}
}
output.writeStringSet(files);
byte[] id = si.getId();
if (id.length != StringHelper.ID_LENGTH) {
throw new IllegalArgumentException("invalid id, got=" + StringHelper.idToString(id));
}
output.writeBytes(id, 0, id.length);
CodecUtil.writeFooter(output);
success = true;
} finally {
if (!success) {
// TODO: are we doing this outside of the tracking wrapper? why must SIWriter cleanup like this?
IOUtils.deleteFilesIgnoringExceptions(si.dir, fileName);
}
}
} }
/** File extension used to store {@link SegmentInfo}. */ /** File extension used to store {@link SegmentInfo}. */

View File

@ -1,86 +0,0 @@
package org.apache.lucene.codecs.lucene50;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.text.ParseException;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.SegmentInfoReader;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.Version;
/**
* Lucene 5.0 implementation of {@link SegmentInfoReader}.
*
* @see Lucene50SegmentInfoFormat
* @lucene.experimental
*/
public class Lucene50SegmentInfoReader extends SegmentInfoReader {
/** Sole constructor. */
public Lucene50SegmentInfoReader() {
}
@Override
public SegmentInfo read(Directory dir, String segment, IOContext context) throws IOException {
final String fileName = IndexFileNames.segmentFileName(segment, "", Lucene50SegmentInfoFormat.SI_EXTENSION);
try (ChecksumIndexInput input = dir.openChecksumInput(fileName, context)) {
Throwable priorE = null;
SegmentInfo si = null;
try {
CodecUtil.checkHeader(input, Lucene50SegmentInfoFormat.CODEC_NAME,
Lucene50SegmentInfoFormat.VERSION_START,
Lucene50SegmentInfoFormat.VERSION_CURRENT);
final Version version;
try {
version = Version.parse(input.readString());
} catch (ParseException pe) {
throw new CorruptIndexException("unable to parse version string: " + pe.getMessage(), input, pe);
}
final int docCount = input.readInt();
if (docCount < 0) {
throw new CorruptIndexException("invalid docCount: " + docCount, input);
}
final boolean isCompoundFile = input.readByte() == SegmentInfo.YES;
final Map<String,String> diagnostics = input.readStringStringMap();
final Set<String> files = input.readStringSet();
byte[] id = new byte[StringHelper.ID_LENGTH];
input.readBytes(id, 0, id.length);
si = new SegmentInfo(dir, version, segment, docCount, isCompoundFile, null, diagnostics, id);
si.setFiles(files);
} catch (Throwable exception) {
priorE = exception;
} finally {
CodecUtil.checkFooter(input, priorE);
}
return si;
}
}
}

View File

@ -1,87 +0,0 @@
package org.apache.lucene.codecs.lucene50;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.Set;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.SegmentInfoWriter;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.Version;
/**
* Lucene 5.0 implementation of {@link SegmentInfoWriter}.
*
* @see Lucene50SegmentInfoFormat
* @lucene.experimental
*/
public class Lucene50SegmentInfoWriter extends SegmentInfoWriter {
/** Sole constructor. */
public Lucene50SegmentInfoWriter() {
}
/** Save a single segment's info. */
@Override
public void write(Directory dir, SegmentInfo si, FieldInfos fis, IOContext ioContext) throws IOException {
final String fileName = IndexFileNames.segmentFileName(si.name, "", Lucene50SegmentInfoFormat.SI_EXTENSION);
si.addFile(fileName);
boolean success = false;
try (IndexOutput output = dir.createOutput(fileName, ioContext)) {
CodecUtil.writeHeader(output, Lucene50SegmentInfoFormat.CODEC_NAME, Lucene50SegmentInfoFormat.VERSION_CURRENT);
Version version = si.getVersion();
if (version.major < 5) {
throw new IllegalArgumentException("invalid major version: should be >= 5 but got: " + version.major + " segment=" + si);
}
// Write the Lucene version that created this segment, since 3.1
output.writeString(version.toString());
output.writeInt(si.getDocCount());
output.writeByte((byte) (si.getUseCompoundFile() ? SegmentInfo.YES : SegmentInfo.NO));
output.writeStringStringMap(si.getDiagnostics());
Set<String> files = si.files();
for (String file : files) {
if (!IndexFileNames.parseSegmentName(file).equals(si.name)) {
throw new IllegalArgumentException("invalid files: expected segment=" + si.name + ", got=" + files);
}
}
output.writeStringSet(files);
byte[] id = si.getId();
if (id.length != StringHelper.ID_LENGTH) {
throw new IllegalArgumentException("invalid id, got=" + StringHelper.idToString(id));
}
output.writeBytes(id, 0, id.length);
CodecUtil.writeFooter(output);
success = true;
} finally {
if (!success) {
// TODO: are we doing this outside of the tracking wrapper? why must SIWriter cleanup like this?
IOUtils.deleteFilesIgnoringExceptions(si.dir, fileName);
}
}
}
}

View File

@ -25,7 +25,6 @@ import java.util.Map;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.codecs.DocValuesConsumer; import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.codecs.DocValuesFormat; import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.FieldInfosWriter;
import org.apache.lucene.codecs.NormsConsumer; import org.apache.lucene.codecs.NormsConsumer;
import org.apache.lucene.codecs.NormsFormat; import org.apache.lucene.codecs.NormsFormat;
import org.apache.lucene.codecs.StoredFieldsWriter; import org.apache.lucene.codecs.StoredFieldsWriter;
@ -118,8 +117,7 @@ final class DefaultIndexingChain extends DocConsumer {
// consumer can alter the FieldInfo* if necessary. EG, // consumer can alter the FieldInfo* if necessary. EG,
// FreqProxTermsWriter does this with // FreqProxTermsWriter does this with
// FieldInfo.storePayload. // FieldInfo.storePayload.
FieldInfosWriter infosWriter = docWriter.codec.fieldInfosFormat().getFieldInfosWriter(); docWriter.codec.fieldInfosFormat().write(state.directory, state.segmentInfo, "", state.fieldInfos, IOContext.DEFAULT);
infosWriter.write(state.directory, state.segmentInfo, "", state.fieldInfos, IOContext.DEFAULT);
} }
/** Writes all buffered doc values (called from {@link #flush}). */ /** Writes all buffered doc values (called from {@link #flush}). */
@ -622,6 +620,10 @@ final class DefaultIndexingChain extends DocConsumer {
invertState.lastStartOffset = startOffset; invertState.lastStartOffset = startOffset;
} }
invertState.length++;
if (invertState.length < 0) {
throw new IllegalArgumentException("too many tokens in field '" + field.name() + "'");
}
//System.out.println(" term=" + invertState.termAttribute); //System.out.println(" term=" + invertState.termAttribute);
// If we hit an exception in here, we abort // If we hit an exception in here, we abort
@ -633,8 +635,6 @@ final class DefaultIndexingChain extends DocConsumer {
aborting = true; aborting = true;
termsHashPerField.add(); termsHashPerField.add();
aborting = false; aborting = false;
invertState.length++;
} }
// trigger streams to perform end-of-stream operations // trigger streams to perform end-of-stream operations

View File

@ -517,7 +517,7 @@ class DocumentsWriterPerThread {
// creating CFS so that 1) .si isn't slurped into CFS, // creating CFS so that 1) .si isn't slurped into CFS,
// and 2) .si reflects useCompoundFile=true change // and 2) .si reflects useCompoundFile=true change
// above: // above:
codec.segmentInfoFormat().getSegmentInfoWriter().write(directory, newSegment.info, flushedSegment.fieldInfos, context); codec.segmentInfoFormat().write(directory, newSegment.info, context);
// TODO: ideally we would freeze newSegment here!! // TODO: ideally we would freeze newSegment here!!
// because any changes after writing the .si will be // because any changes after writing the .si will be

View File

@ -25,9 +25,11 @@ import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
import java.util.Collections; import java.util.Collections;
import java.util.HashMap; import java.util.HashMap;
import java.util.HashSet;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Objects; import java.util.Objects;
import java.util.Set;
import java.util.regex.Matcher; import java.util.regex.Matcher;
import org.apache.lucene.store.AlreadyClosedException; import org.apache.lucene.store.AlreadyClosedException;
@ -78,7 +80,7 @@ final class IndexFileDeleter implements Closeable {
/* Files that we tried to delete but failed (likely /* Files that we tried to delete but failed (likely
* because they are open and we are running on Windows), * because they are open and we are running on Windows),
* so we will retry them again later: */ * so we will retry them again later: */
private List<String> deletable; private Set<String> deletable;
/* Reference count for all files in the index. /* Reference count for all files in the index.
* Counts how many existing commits reference a file. * Counts how many existing commits reference a file.
@ -359,7 +361,7 @@ final class IndexFileDeleter implements Closeable {
* Remove the CommitPoints in the commitsToDelete List by * Remove the CommitPoints in the commitsToDelete List by
* DecRef'ing all files from each SegmentInfos. * DecRef'ing all files from each SegmentInfos.
*/ */
private void deleteCommits() throws IOException { private void deleteCommits() {
int size = commitsToDelete.size(); int size = commitsToDelete.size();
@ -384,7 +386,7 @@ final class IndexFileDeleter implements Closeable {
commitsToDelete.clear(); commitsToDelete.clear();
// NOTE: does nothing if firstThrowable is null // NOTE: does nothing if firstThrowable is null
IOUtils.reThrow(firstThrowable); IOUtils.reThrowUnchecked(firstThrowable);
// Now compact commits to remove deleted ones (preserving the sort): // Now compact commits to remove deleted ones (preserving the sort):
size = commits.size(); size = commits.size();
@ -462,7 +464,7 @@ final class IndexFileDeleter implements Closeable {
} }
@Override @Override
public void close() throws IOException { public void close() {
// DecRef old files from the last checkpoint, if any: // DecRef old files from the last checkpoint, if any:
assert locked(); assert locked();
@ -498,14 +500,12 @@ final class IndexFileDeleter implements Closeable {
} }
} }
public void deletePendingFiles() throws IOException { public void deletePendingFiles() {
assert locked(); assert locked();
if (deletable != null) { if (deletable != null) {
List<String> oldDeletable = deletable; Set<String> oldDeletable = deletable;
deletable = null; deletable = null;
int size = oldDeletable.size(); for(String fileName : oldDeletable) {
for(int i=0;i<size;i++) {
String fileName = oldDeletable.get(i);
if (infoStream.isEnabled("IFD")) { if (infoStream.isEnabled("IFD")) {
infoStream.message("IFD", "delete pending file " + fileName); infoStream.message("IFD", "delete pending file " + fileName);
} }
@ -611,7 +611,7 @@ final class IndexFileDeleter implements Closeable {
} }
/** Decrefs all provided files, even on exception; throws first exception hit, if any. */ /** Decrefs all provided files, even on exception; throws first exception hit, if any. */
void decRef(Collection<String> files) throws IOException { void decRef(Collection<String> files) {
assert locked(); assert locked();
Throwable firstThrowable = null; Throwable firstThrowable = null;
for(final String file : files) { for(final String file : files) {
@ -626,12 +626,12 @@ final class IndexFileDeleter implements Closeable {
} }
// NOTE: does nothing if firstThrowable is null // NOTE: does nothing if firstThrowable is null
IOUtils.reThrow(firstThrowable); IOUtils.reThrowUnchecked(firstThrowable);
} }
/** Decrefs all provided files, ignoring any exceptions hit; call this if /** Decrefs all provided files, ignoring any exceptions hit; call this if
* you are already handling an exception. */ * you are already handling an exception. */
void decRefWhileHandlingException(Collection<String> files) throws IOException { void decRefWhileHandlingException(Collection<String> files) {
assert locked(); assert locked();
for(final String file : files) { for(final String file : files) {
try { try {
@ -641,7 +641,7 @@ final class IndexFileDeleter implements Closeable {
} }
} }
void decRef(String fileName) throws IOException { void decRef(String fileName) {
assert locked(); assert locked();
RefCount rc = getRefCount(fileName); RefCount rc = getRefCount(fileName);
if (infoStream.isEnabled("IFD")) { if (infoStream.isEnabled("IFD")) {
@ -679,6 +679,8 @@ final class IndexFileDeleter implements Closeable {
RefCount rc; RefCount rc;
if (!refCounts.containsKey(fileName)) { if (!refCounts.containsKey(fileName)) {
rc = new RefCount(fileName); rc = new RefCount(fileName);
// We should never incRef a file we are already wanting to delete:
assert deletable == null || deletable.contains(fileName) == false: "file \"" + fileName + "\" cannot be incRef'd: it's already pending delete";
refCounts.put(fileName, rc); refCounts.put(fileName, rc);
} else { } else {
rc = refCounts.get(fileName); rc = refCounts.get(fileName);
@ -686,7 +688,7 @@ final class IndexFileDeleter implements Closeable {
return rc; return rc;
} }
void deleteFiles(List<String> files) throws IOException { void deleteFiles(List<String> files) {
assert locked(); assert locked();
for(final String file: files) { for(final String file: files) {
deleteFile(file); deleteFile(file);
@ -695,7 +697,7 @@ final class IndexFileDeleter implements Closeable {
/** Deletes the specified files, but only if they are new /** Deletes the specified files, but only if they are new
* (have not yet been incref'd). */ * (have not yet been incref'd). */
void deleteNewFiles(Collection<String> files) throws IOException { void deleteNewFiles(Collection<String> files) {
assert locked(); assert locked();
for (final String fileName: files) { for (final String fileName: files) {
// NOTE: it's very unusual yet possible for the // NOTE: it's very unusual yet possible for the
@ -713,8 +715,7 @@ final class IndexFileDeleter implements Closeable {
} }
} }
void deleteFile(String fileName) void deleteFile(String fileName) {
throws IOException {
assert locked(); assert locked();
ensureOpen(); ensureOpen();
try { try {
@ -734,7 +735,7 @@ final class IndexFileDeleter implements Closeable {
infoStream.message("IFD", "unable to remove file \"" + fileName + "\": " + e.toString() + "; Will re-try later."); infoStream.message("IFD", "unable to remove file \"" + fileName + "\": " + e.toString() + "; Will re-try later.");
} }
if (deletable == null) { if (deletable == null) {
deletable = new ArrayList<>(); deletable = new HashSet<>();
} }
deletable.add(fileName); // add to deletable deletable.add(fileName); // add to deletable
} }

View File

@ -41,7 +41,7 @@ import java.util.concurrent.atomic.AtomicLong;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.FieldInfosReader; import org.apache.lucene.codecs.FieldInfosFormat;
import org.apache.lucene.document.Field; import org.apache.lucene.document.Field;
import org.apache.lucene.index.DocValuesUpdate.BinaryDocValuesUpdate; import org.apache.lucene.index.DocValuesUpdate.BinaryDocValuesUpdate;
import org.apache.lucene.index.DocValuesUpdate.NumericDocValuesUpdate; import org.apache.lucene.index.DocValuesUpdate.NumericDocValuesUpdate;
@ -871,7 +871,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
// TODO: fix tests abusing this method! // TODO: fix tests abusing this method!
static FieldInfos readFieldInfos(SegmentCommitInfo si) throws IOException { static FieldInfos readFieldInfos(SegmentCommitInfo si) throws IOException {
Codec codec = si.info.getCodec(); Codec codec = si.info.getCodec();
FieldInfosReader reader = codec.fieldInfosFormat().getFieldInfosReader(); FieldInfosFormat reader = codec.fieldInfosFormat();
if (si.hasFieldUpdates()) { if (si.hasFieldUpdates()) {
// there are updates, we read latest (always outside of CFS) // there are updates, we read latest (always outside of CFS)
@ -2024,6 +2024,10 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
deleter.close(); deleter.close();
// Must set closed while inside same sync block where we call deleter.refresh, else concurrent threads may try to sneak a flush in,
// after we leave this sync block and before we enter the sync block in the finally clause below that sets closed:
closed = true;
IOUtils.close(writeLock); // release write lock IOUtils.close(writeLock); // release write lock
writeLock = null; writeLock = null;
@ -2267,6 +2271,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
try { try {
synchronized (this) { synchronized (this) {
// Lock order IW -> BDS // Lock order IW -> BDS
ensureOpen(false);
synchronized (bufferedUpdatesStream) { synchronized (bufferedUpdatesStream) {
if (infoStream.isEnabled("IW")) { if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "publishFlushedSegment"); infoStream.message("IW", "publishFlushedSegment");
@ -2542,10 +2547,9 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
return; return;
} }
MergeState mergeState;
boolean success = false; boolean success = false;
try { try {
mergeState = merger.merge(); // merge 'em merger.merge(); // merge 'em
success = true; success = true;
} finally { } finally {
if (!success) { if (!success) {
@ -2594,7 +2598,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
// above: // above:
success = false; success = false;
try { try {
codec.segmentInfoFormat().getSegmentInfoWriter().write(trackingDir, info, mergeState.mergeFieldInfos, context); codec.segmentInfoFormat().write(trackingDir, info, context);
success = true; success = true;
} finally { } finally {
if (!success) { if (!success) {
@ -3845,8 +3849,6 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
merge.checkAborted(directory); merge.checkAborted(directory);
final String mergedName = merge.info.info.name;
List<SegmentCommitInfo> sourceSegments = merge.segments; List<SegmentCommitInfo> sourceSegments = merge.segments;
IOContext context = new IOContext(merge.getMergeInfo()); IOContext context = new IOContext(merge.getMergeInfo());
@ -4060,7 +4062,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
// above: // above:
boolean success2 = false; boolean success2 = false;
try { try {
codec.segmentInfoFormat().getSegmentInfoWriter().write(directory, merge.info.info, mergeState.mergeFieldInfos, context); codec.segmentInfoFormat().write(directory, merge.info.info, context);
success2 = true; success2 = true;
} finally { } finally {
if (!success2) { if (!success2) {
@ -4511,7 +4513,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
Set<String> siFiles = new HashSet<>(); Set<String> siFiles = new HashSet<>();
for (String cfsFile : cfsFiles) { for (String cfsFile : cfsFiles) {
siFiles.add(cfsFile); siFiles.add(cfsFile);
}; }
info.setFiles(siFiles); info.setFiles(siFiles);
return files; return files;

View File

@ -117,8 +117,9 @@ public abstract class MergePolicy {
* @param segments List of {@link SegmentCommitInfo}s * @param segments List of {@link SegmentCommitInfo}s
* to be merged. */ * to be merged. */
public OneMerge(List<SegmentCommitInfo> segments) { public OneMerge(List<SegmentCommitInfo> segments) {
if (0 == segments.size()) if (0 == segments.size()) {
throw new RuntimeException("segments must include at least one segment"); throw new RuntimeException("segments must include at least one segment");
}
// clone the list, as the in list may be based off original SegmentInfos and may be modified // clone the list, as the in list may be based off original SegmentInfos and may be modified
this.segments = new ArrayList<>(segments); this.segments = new ArrayList<>(segments);
int count = 0; int count = 0;
@ -239,14 +240,17 @@ public abstract class MergePolicy {
StringBuilder b = new StringBuilder(); StringBuilder b = new StringBuilder();
final int numSegments = segments.size(); final int numSegments = segments.size();
for(int i=0;i<numSegments;i++) { for(int i=0;i<numSegments;i++) {
if (i > 0) b.append(' '); if (i > 0) {
b.append(' ');
}
b.append(segments.get(i).toString(dir, 0)); b.append(segments.get(i).toString(dir, 0));
} }
if (info != null) { if (info != null) {
b.append(" into ").append(info.info.name); b.append(" into ").append(info.info.name);
} }
if (maxNumSegments != -1) if (maxNumSegments != -1) {
b.append(" [maxNumSegments=" + maxNumSegments + "]"); b.append(" [maxNumSegments=" + maxNumSegments + "]");
}
if (aborted) { if (aborted) {
b.append(" [ABORTED]"); b.append(" [ABORTED]");
} }
@ -312,8 +316,9 @@ public abstract class MergePolicy {
StringBuilder b = new StringBuilder(); StringBuilder b = new StringBuilder();
b.append("MergeSpec:\n"); b.append("MergeSpec:\n");
final int count = merges.size(); final int count = merges.size();
for(int i=0;i<count;i++) for(int i=0;i<count;i++) {
b.append(" ").append(1 + i).append(": ").append(merges.get(i).segString(dir)); b.append(" ").append(1 + i).append(": ").append(merges.get(i).segString(dir));
}
return b.toString(); return b.toString();
} }
} }
@ -477,9 +482,9 @@ public abstract class MergePolicy {
protected long size(SegmentCommitInfo info, IndexWriter writer) throws IOException { protected long size(SegmentCommitInfo info, IndexWriter writer) throws IOException {
long byteSize = info.sizeInBytes(); long byteSize = info.sizeInBytes();
int delCount = writer.numDeletedDocs(info); int delCount = writer.numDeletedDocs(info);
double delRatio = (info.info.getDocCount() <= 0 ? 0.0f : ((float)delCount / (float)info.info.getDocCount())); double delRatio = info.info.getDocCount() <= 0 ? 0.0f : (float) delCount / (float) info.info.getDocCount();
assert delRatio <= 1.0; assert delRatio <= 1.0;
return (info.info.getDocCount() <= 0 ? byteSize : (long)(byteSize * (1.0 - delRatio))); return (info.info.getDocCount() <= 0 ? byteSize : (long) (byteSize * (1.0 - delRatio)));
} }
/** Returns true if this single info is already fully merged (has no /** Returns true if this single info is already fully merged (has no
@ -527,7 +532,7 @@ public abstract class MergePolicy {
throw new IllegalArgumentException("maxCFSSegmentSizeMB must be >=0 (got " + v + ")"); throw new IllegalArgumentException("maxCFSSegmentSizeMB must be >=0 (got " + v + ")");
} }
v *= 1024 * 1024; v *= 1024 * 1024;
this.maxCFSSegmentSize = (v > Long.MAX_VALUE) ? Long.MAX_VALUE : (long) v; this.maxCFSSegmentSize = v > Long.MAX_VALUE ? Long.MAX_VALUE : (long) v;
} }
} }

View File

@ -451,7 +451,7 @@ class ReadersAndUpdates {
final IOContext infosContext = new IOContext(new FlushInfo(info.info.getDocCount(), estInfosSize)); final IOContext infosContext = new IOContext(new FlushInfo(info.info.getDocCount(), estInfosSize));
// separately also track which files were created for this gen // separately also track which files were created for this gen
final TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(dir); final TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(dir);
infosFormat.getFieldInfosWriter().write(trackingDir, info.info, segmentSuffix, fieldInfos, infosContext); infosFormat.write(trackingDir, info.info, segmentSuffix, fieldInfos, infosContext);
info.advanceFieldInfosGen(); info.advanceFieldInfosGen();
return trackingDir.getCreatedFiles(); return trackingDir.getCreatedFiles();
} }

View File

@ -109,7 +109,7 @@ final class SegmentCoreReaders implements Accountable {
cfsDir = dir; cfsDir = dir;
} }
coreFieldInfos = codec.fieldInfosFormat().getFieldInfosReader().read(cfsDir, si.info, "", context); coreFieldInfos = codec.fieldInfosFormat().read(cfsDir, si.info, "", context);
final SegmentReadState segmentReadState = new SegmentReadState(cfsDir, si.info, coreFieldInfos, context); final SegmentReadState segmentReadState = new SegmentReadState(cfsDir, si.info, coreFieldInfos, context);
final PostingsFormat format = codec.postingsFormat(); final PostingsFormat format = codec.postingsFormat();

View File

@ -307,7 +307,7 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
String segName = input.readString(); String segName = input.readString();
Codec codec = Codec.forName(input.readString()); Codec codec = Codec.forName(input.readString());
//System.out.println("SIS.read seg=" + seg + " codec=" + codec); //System.out.println("SIS.read seg=" + seg + " codec=" + codec);
SegmentInfo info = codec.segmentInfoFormat().getSegmentInfoReader().read(directory, segName, IOContext.READ); SegmentInfo info = codec.segmentInfoFormat().read(directory, segName, IOContext.READ);
info.setCodec(codec); info.setCodec(codec);
long delGen = input.readLong(); long delGen = input.readLong();
int delCount = input.readInt(); int delCount = input.readInt();

View File

@ -22,7 +22,6 @@ import java.util.List;
import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.DocValuesConsumer; import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.codecs.FieldInfosWriter;
import org.apache.lucene.codecs.FieldsConsumer; import org.apache.lucene.codecs.FieldsConsumer;
import org.apache.lucene.codecs.NormsConsumer; import org.apache.lucene.codecs.NormsConsumer;
import org.apache.lucene.codecs.StoredFieldsWriter; import org.apache.lucene.codecs.StoredFieldsWriter;
@ -147,8 +146,7 @@ final class SegmentMerger {
} }
// write the merged infos // write the merged infos
FieldInfosWriter fieldInfosWriter = codec.fieldInfosFormat().getFieldInfosWriter(); codec.fieldInfosFormat().write(directory, mergeState.segmentInfo, "", mergeState.mergeFieldInfos, context);
fieldInfosWriter.write(directory, mergeState.segmentInfo, "", mergeState.mergeFieldInfos, context);
return mergeState; return mergeState;
} }

View File

@ -182,7 +182,7 @@ public final class SegmentReader extends LeafReader implements Accountable {
// updates always outside of CFS // updates always outside of CFS
FieldInfosFormat fisFormat = si.info.getCodec().fieldInfosFormat(); FieldInfosFormat fisFormat = si.info.getCodec().fieldInfosFormat();
final String segmentSuffix = Long.toString(si.getFieldInfosGen(), Character.MAX_RADIX); final String segmentSuffix = Long.toString(si.getFieldInfosGen(), Character.MAX_RADIX);
return fisFormat.getFieldInfosReader().read(si.info.dir, si.info, segmentSuffix, IOContext.READONCE); return fisFormat.read(si.info.dir, si.info, segmentSuffix, IOContext.READONCE);
} }
} }

View File

@ -142,7 +142,7 @@ public class TieredMergePolicy extends MergePolicy {
throw new IllegalArgumentException("maxMergedSegmentMB must be >=0 (got " + v + ")"); throw new IllegalArgumentException("maxMergedSegmentMB must be >=0 (got " + v + ")");
} }
v *= 1024 * 1024; v *= 1024 * 1024;
maxMergedSegmentBytes = (v > Long.MAX_VALUE) ? Long.MAX_VALUE : (long) v; maxMergedSegmentBytes = v > Long.MAX_VALUE ? Long.MAX_VALUE : (long) v;
return this; return this;
} }
@ -183,7 +183,7 @@ public class TieredMergePolicy extends MergePolicy {
throw new IllegalArgumentException("floorSegmentMB must be >= 0.0 (got " + v + ")"); throw new IllegalArgumentException("floorSegmentMB must be >= 0.0 (got " + v + ")");
} }
v *= 1024 * 1024; v *= 1024 * 1024;
floorSegmentBytes = (v > Long.MAX_VALUE) ? Long.MAX_VALUE : (long) v; floorSegmentBytes = v > Long.MAX_VALUE ? Long.MAX_VALUE : (long) v;
return this; return this;
} }
@ -314,8 +314,12 @@ public class TieredMergePolicy extends MergePolicy {
// If we have too-large segments, grace them out // If we have too-large segments, grace them out
// of the maxSegmentCount: // of the maxSegmentCount:
int tooBigCount = 0; int tooBigCount = 0;
while (tooBigCount < infosSorted.size() && size(infosSorted.get(tooBigCount), writer) >= maxMergedSegmentBytes/2.0) { while (tooBigCount < infosSorted.size()) {
totIndexBytes -= size(infosSorted.get(tooBigCount), writer); long segBytes = size(infosSorted.get(tooBigCount), writer);
if (segBytes < maxMergedSegmentBytes/2.0) {
break;
}
totIndexBytes -= segBytes;
tooBigCount++; tooBigCount++;
} }
@ -351,7 +355,7 @@ public class TieredMergePolicy extends MergePolicy {
for(int idx = tooBigCount; idx<infosSorted.size(); idx++) { for(int idx = tooBigCount; idx<infosSorted.size(); idx++) {
final SegmentCommitInfo info = infosSorted.get(idx); final SegmentCommitInfo info = infosSorted.get(idx);
if (merging.contains(info)) { if (merging.contains(info)) {
mergingBytes += info.sizeInBytes(); mergingBytes += size(info, writer);
} else if (!toBeMerged.contains(info)) { } else if (!toBeMerged.contains(info)) {
eligible.add(info); eligible.add(info);
} }
@ -400,6 +404,10 @@ public class TieredMergePolicy extends MergePolicy {
totAfterMergeBytes += segBytes; totAfterMergeBytes += segBytes;
} }
// We should never see an empty candidate: we iterated over maxMergeAtOnce
// segments, and already pre-excluded the too-large segments:
assert candidate.size() > 0;
final MergeScore score = score(candidate, hitTooLarge, mergingBytes, writer); final MergeScore score = score(candidate, hitTooLarge, mergingBytes, writer);
if (verbose(writer)) { if (verbose(writer)) {
message(" maybe=" + writer.segString(candidate) + " score=" + score.getScore() + " " + score.getExplanation() + " tooLarge=" + hitTooLarge + " size=" + String.format(Locale.ROOT, "%.3f MB", totAfterMergeBytes/1024./1024.), writer); message(" maybe=" + writer.segString(candidate) + " score=" + score.getScore() + " " + score.getExplanation() + " tooLarge=" + hitTooLarge + " size=" + String.format(Locale.ROOT, "%.3f MB", totAfterMergeBytes/1024./1024.), writer);

Some files were not shown because too many files have changed in this diff Show More