From f05679bb5fd102811968362ffa88d1c5c215e557 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Mon, 5 Dec 2011 02:39:09 +0000 Subject: [PATCH] LUCENE-3606: remove separate norms extension from indexfilenames git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3606@1210314 13f79535-47bb-0310-9956-ffa450edef68 --- .../apache/lucene/index/IndexFileNames.java | 19 +------------------ .../org/apache/lucene/index/IndexWriter.java | 17 ++++++++++++++++- .../org/apache/lucene/index/SegmentInfo.java | 15 ++++++--------- .../lucene/index/codecs/NormsFormat.java | 7 +++++++ .../codecs/lucene40/Lucene40NormsFormat.java | 5 +++++ .../codecs/lucene40/Lucene40NormsReader.java | 18 +++++++++++++++++- .../codecs/lucene40/Lucene40NormsWriter.java | 5 +++++ 7 files changed, 57 insertions(+), 29 deletions(-) diff --git a/lucene/src/java/org/apache/lucene/index/IndexFileNames.java b/lucene/src/java/org/apache/lucene/index/IndexFileNames.java index 77a0abac5cf..d5b3f1a0534 100644 --- a/lucene/src/java/org/apache/lucene/index/IndexFileNames.java +++ b/lucene/src/java/org/apache/lucene/index/IndexFileNames.java @@ -17,8 +17,6 @@ package org.apache.lucene.index; * limitations under the License. */ -import java.util.regex.Pattern; - import org.apache.lucene.index.codecs.Codec; // for javadocs // TODO: put all files under codec and remove all the static extensions here @@ -62,9 +60,6 @@ public final class IndexFileNames { /** Extension of deletes */ public static final String DELETES_EXTENSION = "del"; - /** Extension of separate norms */ - public static final String SEPARATE_NORMS_EXTENSION = "s"; - /** * This array contains all filename extensions used by * Lucene's index files, with one exception, namely the @@ -180,17 +175,5 @@ public final class IndexFileNames { filename = filename.substring(0, idx); } return filename; - } - - /** - * Returns true if the given filename ends with the separate norms file - * pattern: {@code SEPARATE_NORMS_EXTENSION + "[0-9]+"}. - */ - public static boolean isSeparateNormsFile(String filename) { - int idx = filename.lastIndexOf('.'); - if (idx == -1) return false; - String ext = filename.substring(idx + 1); - return Pattern.matches(SEPARATE_NORMS_EXTENSION + "[0-9]+", ext); - } - + } } diff --git a/lucene/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/src/java/org/apache/lucene/index/IndexWriter.java index 4144e379c75..acc4b635cd8 100644 --- a/lucene/src/java/org/apache/lucene/index/IndexWriter.java +++ b/lucene/src/java/org/apache/lucene/index/IndexWriter.java @@ -30,6 +30,7 @@ import java.util.Map; import java.util.Set; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.ConcurrentHashMap; +import java.util.regex.Pattern; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.index.DocumentsWriterPerThread.FlushedSegment; @@ -3980,7 +3981,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { for (String file : files) { assert !IndexFileNames.matchesExtension(file, IndexFileNames.DELETES_EXTENSION) : ".del file is not allowed in .cfs: " + file; - assert !IndexFileNames.isSeparateNormsFile(file) + assert !isSeparateNormsFile(file) : "separate norms file (.s[0-9]+) is not allowed in .cfs: " + file; directory.copy(cfsDir, file, file, context); checkAbort.work(directory.fileLength(file)); @@ -3991,4 +3992,18 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { return files; } + + + /** + * Returns true if the given filename ends with the separate norms file + * pattern: {@code SEPARATE_NORMS_EXTENSION + "[0-9]+"}. + * @deprecated only for asserting + */ + @Deprecated + private static boolean isSeparateNormsFile(String filename) { + int idx = filename.lastIndexOf('.'); + if (idx == -1) return false; + String ext = filename.substring(idx + 1); + return Pattern.matches("s[0-9]+", ext); + } } diff --git a/lucene/src/java/org/apache/lucene/index/SegmentInfo.java b/lucene/src/java/org/apache/lucene/index/SegmentInfo.java index fb92799107f..4e103cbf415 100644 --- a/lucene/src/java/org/apache/lucene/index/SegmentInfo.java +++ b/lucene/src/java/org/apache/lucene/index/SegmentInfo.java @@ -339,6 +339,7 @@ public final class SegmentInfo implements Cloneable { /** * @deprecated separate norms are not supported in >= 4.0 */ + @Deprecated boolean hasSeparateNorms() { if (normGen == null) { return false; @@ -508,16 +509,12 @@ public final class SegmentInfo implements Cloneable { if (delFileName != null && (delGen >= YES || dir.fileExists(delFileName))) { fileSet.add(delFileName); } - - // TODO: push this to codec? + + // because separate norm files are unconditionally stored outside cfs, + // we must explicitly ask for their filenames if we might have separate norms: + // remove this when 3.x indexes are no longer supported if (normGen != null) { - for (Entry entry : normGen.entrySet()) { - long gen = entry.getValue(); - if (gen >= YES) { - // Definitely a separate norm file, with generation: - fileSet.add(IndexFileNames.fileNameFromGeneration(name, IndexFileNames.SEPARATE_NORMS_EXTENSION + entry.getKey(), gen)); - } - } + codec.normsFormat().separateFiles(dir, this, fileSet); } files = new ArrayList(fileSet); diff --git a/lucene/src/java/org/apache/lucene/index/codecs/NormsFormat.java b/lucene/src/java/org/apache/lucene/index/codecs/NormsFormat.java index 60380f79484..5db3dfc6e20 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/NormsFormat.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/NormsFormat.java @@ -34,4 +34,11 @@ public abstract class NormsFormat { public abstract NormsReader normsReader(Directory dir, SegmentInfo info, FieldInfos fields, IOContext context, Directory separateNormsDir) throws IOException; public abstract NormsWriter normsWriter(SegmentWriteState state) throws IOException; public abstract void files(Directory dir, SegmentInfo info, Set files) throws IOException; + + /** + * Note: this should not be overridden! + * @deprecated + */ + @Deprecated + public void separateFiles(Directory dir, SegmentInfo info, Set files) throws IOException {}; } diff --git a/lucene/src/java/org/apache/lucene/index/codecs/lucene40/Lucene40NormsFormat.java b/lucene/src/java/org/apache/lucene/index/codecs/lucene40/Lucene40NormsFormat.java index 9e5a49ae70e..e3a05c8cf04 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/lucene40/Lucene40NormsFormat.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/lucene40/Lucene40NormsFormat.java @@ -45,4 +45,9 @@ public class Lucene40NormsFormat extends NormsFormat { public void files(Directory dir, SegmentInfo info, Set files) throws IOException { Lucene40NormsReader.files(dir, info, files); } + + @Override + public void separateFiles(Directory dir, SegmentInfo info, Set files) throws IOException { + Lucene40NormsReader.separateFiles(dir, info, files); + } } diff --git a/lucene/src/java/org/apache/lucene/index/codecs/lucene40/Lucene40NormsReader.java b/lucene/src/java/org/apache/lucene/index/codecs/lucene40/Lucene40NormsReader.java index 3de22412db0..d2a69a52ecc 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/lucene40/Lucene40NormsReader.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/lucene40/Lucene40NormsReader.java @@ -22,6 +22,7 @@ import java.util.HashMap; import java.util.IdentityHashMap; import java.util.Map; import java.util.Set; +import java.util.Map.Entry; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfos; @@ -132,7 +133,7 @@ public class Lucene40NormsReader extends NormsReader { private static String getNormFilename(String segmentName, Map normGen, int number) { if (hasSeparateNorms(normGen, number)) { - return IndexFileNames.fileNameFromGeneration(segmentName, IndexFileNames.SEPARATE_NORMS_EXTENSION + number, normGen.get(number)); + return IndexFileNames.fileNameFromGeneration(segmentName, Lucene40NormsWriter.SEPARATE_NORMS_EXTENSION + number, normGen.get(number)); } else { // single file for all norms return IndexFileNames.fileNameFromGeneration(segmentName, Lucene40NormsWriter.NORMS_EXTENSION, SegmentInfo.WITHOUT_GEN); @@ -180,4 +181,19 @@ public class Lucene40NormsReader extends NormsReader { files.add(normsFileName); } } + + /** @deprecated */ + @Deprecated + static void separateFiles(Directory dir, SegmentInfo info, Set files) throws IOException { + Map normGen = info.getNormGen(); + if (normGen != null) { + for (Entry entry : normGen.entrySet()) { + long gen = entry.getValue(); + if (gen >= SegmentInfo.YES) { + // Definitely a separate norm file, with generation: + files.add(IndexFileNames.fileNameFromGeneration(info.name, Lucene40NormsWriter.SEPARATE_NORMS_EXTENSION + entry.getKey(), gen)); + } + } + } + } } diff --git a/lucene/src/java/org/apache/lucene/index/codecs/lucene40/Lucene40NormsWriter.java b/lucene/src/java/org/apache/lucene/index/codecs/lucene40/Lucene40NormsWriter.java index 09b7328bf92..8bb54742c11 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/lucene40/Lucene40NormsWriter.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/lucene40/Lucene40NormsWriter.java @@ -40,6 +40,11 @@ public class Lucene40NormsWriter extends NormsWriter { /** Extension of norms file */ static final String NORMS_EXTENSION = "nrm"; + /** Extension of separate norms file + * @deprecated */ + @Deprecated + static final String SEPARATE_NORMS_EXTENSION = "s"; + public Lucene40NormsWriter(Directory directory, String segment, IOContext context) throws IOException { final String normsFileName = IndexFileNames.segmentFileName(segment, "", NORMS_EXTENSION); boolean success = false;