diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40CompoundReader.java b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40CompoundReader.java index da5dbbed9d9..de108dbbfa1 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40CompoundReader.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40CompoundReader.java @@ -234,12 +234,15 @@ final class Lucene40CompoundReader extends BaseDirectory { throw new UnsupportedOperationException(); } - /** Not implemented - * @throws UnsupportedOperationException always: not supported by CFS */ @Override public Lock makeLock(String name) { throw new UnsupportedOperationException(); } + + @Override + public void clearLock(String name) throws IOException { + throw new UnsupportedOperationException(); + } @Override public String toString() { diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextCodec.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextCodec.java index 019bb1cd518..c99eef2e12a 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextCodec.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextCodec.java @@ -27,7 +27,6 @@ import org.apache.lucene.codecs.DocValuesFormat; import org.apache.lucene.codecs.NormsFormat; import org.apache.lucene.codecs.StoredFieldsFormat; import org.apache.lucene.codecs.TermVectorsFormat; -import org.apache.lucene.codecs.lucene50.Lucene50CompoundFormat; /** * plain text index format. @@ -44,8 +43,7 @@ public final class SimpleTextCodec extends Codec { private final NormsFormat normsFormat = new SimpleTextNormsFormat(); private final LiveDocsFormat liveDocs = new SimpleTextLiveDocsFormat(); private final DocValuesFormat dvFormat = new SimpleTextDocValuesFormat(); - // nocommit - private final CompoundFormat compoundFormat = new Lucene50CompoundFormat(); + private final CompoundFormat compoundFormat = new SimpleTextCompoundFormat(); public SimpleTextCodec() { super("SimpleText"); diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextCompoundFormat.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextCompoundFormat.java new file mode 100644 index 00000000000..d6495979493 --- /dev/null +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextCompoundFormat.java @@ -0,0 +1,246 @@ +package org.apache.lucene.codecs.simpletext; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.text.DecimalFormat; +import java.text.DecimalFormatSymbols; +import java.text.ParseException; +import java.util.Arrays; +import java.util.Collection; +import java.util.Locale; + +import org.apache.lucene.codecs.CompoundFormat; +import org.apache.lucene.index.CorruptIndexException; +import org.apache.lucene.index.MergeState.CheckAbort; +import org.apache.lucene.index.IndexFileNames; +import org.apache.lucene.index.SegmentInfo; +import org.apache.lucene.store.BaseDirectory; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IOContext; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.store.Lock; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.BytesRefBuilder; +import org.apache.lucene.util.StringHelper; + +/** + * plain text compound format. + *

+ * FOR RECREATIONAL USE ONLY + * @lucene.experimental + */ +public class SimpleTextCompoundFormat extends CompoundFormat { + + @Override + public Directory getCompoundReader(Directory dir, SegmentInfo si, IOContext context) throws IOException { + String dataFile = IndexFileNames.segmentFileName(si.name, "", DATA_EXTENSION); + final IndexInput in = dir.openInput(dataFile, context); + + BytesRefBuilder scratch = new BytesRefBuilder(); + + // first get to TOC: + DecimalFormat df = new DecimalFormat(OFFSETPATTERN, DecimalFormatSymbols.getInstance(Locale.ROOT)); + long pos = in.length() - TABLEPOS.length - OFFSETPATTERN.length() - 1; + in.seek(pos); + SimpleTextUtil.readLine(in, scratch); + assert StringHelper.startsWith(scratch.get(), TABLEPOS); + long tablePos = -1; + try { + tablePos = df.parse(stripPrefix(scratch, TABLEPOS)).longValue(); + } catch (ParseException e) { + throw new CorruptIndexException("can't parse CFS trailer, got: " + scratch.get().utf8ToString(), in); + } + + // seek to TOC and read it + in.seek(tablePos); + SimpleTextUtil.readLine(in, scratch); + assert StringHelper.startsWith(scratch.get(), TABLE); + int numEntries = Integer.parseInt(stripPrefix(scratch, TABLE)); + + final String fileNames[] = new String[numEntries]; + final long startOffsets[] = new long[numEntries]; + final long endOffsets[] = new long[numEntries]; + + for (int i = 0; i < numEntries; i++) { + SimpleTextUtil.readLine(in, scratch); + assert StringHelper.startsWith(scratch.get(), TABLENAME); + fileNames[i] = si.name + IndexFileNames.stripSegmentName(stripPrefix(scratch, TABLENAME)); + + if (i > 0) { + // files must be unique and in sorted order + assert fileNames[i].compareTo(fileNames[i-1]) > 0; + } + + SimpleTextUtil.readLine(in, scratch); + assert StringHelper.startsWith(scratch.get(), TABLESTART); + startOffsets[i] = Long.parseLong(stripPrefix(scratch, TABLESTART)); + + SimpleTextUtil.readLine(in, scratch); + assert StringHelper.startsWith(scratch.get(), TABLEEND); + endOffsets[i] = Long.parseLong(stripPrefix(scratch, TABLEEND)); + } + + return new BaseDirectory() { + + private int getIndex(String name) throws IOException { + int index = Arrays.binarySearch(fileNames, name); + if (index < 0) { + throw new FileNotFoundException("No sub-file found (fileName=" + name + " files: " + Arrays.toString(fileNames) + ")"); + } + return index; + } + + @Override + public String[] listAll() throws IOException { + ensureOpen(); + return fileNames.clone(); + } + + @Override + public long fileLength(String name) throws IOException { + ensureOpen(); + int index = getIndex(name); + return endOffsets[index] - startOffsets[index]; + } + + @Override + public IndexInput openInput(String name, IOContext context) throws IOException { + ensureOpen(); + int index = getIndex(name); + return in.slice(name, startOffsets[index], endOffsets[index] - startOffsets[index]); + } + + @Override + public void close() throws IOException { + isOpen = false; + in.close(); + } + + // write methods: disabled + + @Override + public IndexOutput createOutput(String name, IOContext context) { throw new UnsupportedOperationException(); } + + @Override + public void sync(Collection names) { throw new UnsupportedOperationException(); } + + @Override + public void deleteFile(String name) { throw new UnsupportedOperationException(); } + + @Override + public void renameFile(String source, String dest) { throw new UnsupportedOperationException(); } + + @Override + public Lock makeLock(String name) { throw new UnsupportedOperationException(); } + + @Override + public void clearLock(String name) { throw new UnsupportedOperationException(); } + }; + } + + @Override + public void write(Directory dir, SegmentInfo si, Collection files, CheckAbort checkAbort, IOContext context) throws IOException { + String dataFile = IndexFileNames.segmentFileName(si.name, "", DATA_EXTENSION); + + int numFiles = files.size(); + String names[] = files.toArray(new String[numFiles]); + Arrays.sort(names); + long startOffsets[] = new long[numFiles]; + long endOffsets[] = new long[numFiles]; + + BytesRefBuilder scratch = new BytesRefBuilder(); + + try (IndexOutput out = dir.createOutput(dataFile, context)) { + for (int i = 0; i < names.length; i++) { + // write header for file + SimpleTextUtil.write(out, HEADER); + SimpleTextUtil.write(out, names[i], scratch); + SimpleTextUtil.writeNewline(out); + + // write bytes for file + startOffsets[i] = out.getFilePointer(); + try (IndexInput in = dir.openInput(names[i], IOContext.READONCE)) { + out.copyBytes(in, in.length()); + } + endOffsets[i] = out.getFilePointer(); + + checkAbort.work(endOffsets[i] - startOffsets[i]); + } + + long tocPos = out.getFilePointer(); + + // write CFS table + SimpleTextUtil.write(out, TABLE); + SimpleTextUtil.write(out, Integer.toString(numFiles), scratch); + SimpleTextUtil.writeNewline(out); + + for (int i = 0; i < names.length; i++) { + SimpleTextUtil.write(out, TABLENAME); + SimpleTextUtil.write(out, names[i], scratch); + SimpleTextUtil.writeNewline(out); + + SimpleTextUtil.write(out, TABLESTART); + SimpleTextUtil.write(out, Long.toString(startOffsets[i]), scratch); + SimpleTextUtil.writeNewline(out); + + SimpleTextUtil.write(out, TABLEEND); + SimpleTextUtil.write(out, Long.toString(endOffsets[i]), scratch); + SimpleTextUtil.writeNewline(out); + } + + DecimalFormat df = new DecimalFormat(OFFSETPATTERN, DecimalFormatSymbols.getInstance(Locale.ROOT)); + SimpleTextUtil.write(out, TABLEPOS); + SimpleTextUtil.write(out, df.format(tocPos), scratch); + SimpleTextUtil.writeNewline(out); + } + } + + @Override + public String[] files(SegmentInfo si) { + return new String[] { IndexFileNames.segmentFileName(si.name, "", DATA_EXTENSION) }; + } + + // helper method to strip strip away 'prefix' from 'scratch' and return as String + private String stripPrefix(BytesRefBuilder scratch, BytesRef prefix) throws IOException { + return new String(scratch.bytes(), prefix.length, scratch.length() - prefix.length, StandardCharsets.UTF_8); + } + + /** Extension of compound file */ + static final String DATA_EXTENSION = "scf"; + + final static BytesRef HEADER = new BytesRef("cfs entry for: "); + + final static BytesRef TABLE = new BytesRef("table of contents, size: "); + final static BytesRef TABLENAME = new BytesRef(" filename: "); + final static BytesRef TABLESTART = new BytesRef(" start: "); + final static BytesRef TABLEEND = new BytesRef(" end: "); + + final static BytesRef TABLEPOS = new BytesRef("table of contents begins at offset: "); + + final static String OFFSETPATTERN; + static { + int numDigits = Long.toString(Long.MAX_VALUE).length(); + char pattern[] = new char[numDigits]; + Arrays.fill(pattern, '0'); + OFFSETPATTERN = new String(pattern); + } +} diff --git a/lucene/codecs/src/test/org/apache/lucene/codecs/simpletext/TestSimpleTextCompoundFormat.java b/lucene/codecs/src/test/org/apache/lucene/codecs/simpletext/TestSimpleTextCompoundFormat.java new file mode 100644 index 00000000000..cda6df39aaf --- /dev/null +++ b/lucene/codecs/src/test/org/apache/lucene/codecs/simpletext/TestSimpleTextCompoundFormat.java @@ -0,0 +1,30 @@ +package org.apache.lucene.codecs.simpletext; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.codecs.Codec; +import org.apache.lucene.index.BaseCompoundFormatTestCase; + +public class TestSimpleTextCompoundFormat extends BaseCompoundFormatTestCase { + private final Codec codec = new SimpleTextCodec(); + + @Override + protected Codec getCodec() { + return codec; + } +} diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50CompoundReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50CompoundReader.java index 3bee84107fd..09696421351 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50CompoundReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50CompoundReader.java @@ -60,6 +60,8 @@ final class Lucene50CompoundReader extends BaseDirectory { /** * Create a new CompoundFileDirectory. */ + // TODO: we should just pre-strip "entries" and append segment name up-front like simpletext? + // this need not be a "general purpose" directory anymore (it only writes index files) public Lucene50CompoundReader(Directory directory, SegmentInfo si, IOContext context) throws IOException { this.directory = directory; this.segmentName = si.name; @@ -179,13 +181,16 @@ final class Lucene50CompoundReader extends BaseDirectory { throw new UnsupportedOperationException(); } - /** Not implemented - * @throws UnsupportedOperationException always: not supported by CFS */ @Override public Lock makeLock(String name) { throw new UnsupportedOperationException(); } + @Override + public void clearLock(String name) throws IOException { + throw new UnsupportedOperationException(); + } + @Override public String toString() { return "CompoundFileDirectory(segment=\"" + segmentName + "\" in dir=" + directory + ")"; diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/BaseCompoundFormatTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/index/BaseCompoundFormatTestCase.java index bef1db7de29..b8d0d010976 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/index/BaseCompoundFormatTestCase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/index/BaseCompoundFormatTestCase.java @@ -316,6 +316,28 @@ public abstract class BaseCompoundFormatTestCase extends BaseIndexFileFormatTest dir.close(); } + // test that cfs reader is read-only + public void testClearLockDisabled() throws IOException { + final String testfile = "_123.test"; + + Directory dir = newDirectory(); + IndexOutput out = dir.createOutput(testfile, IOContext.DEFAULT); + out.writeInt(3); + out.close(); + + SegmentInfo si = newSegmentInfo(dir, "_123"); + si.getCodec().compoundFormat().write(dir, si, Collections.emptyList(), MergeState.CheckAbort.NONE, IOContext.DEFAULT); + Directory cfs = si.getCodec().compoundFormat().getCompoundReader(dir, si, IOContext.DEFAULT); + try { + cfs.clearLock("foobar"); + fail("didn't get expected exception"); + } catch (UnsupportedOperationException expected) { + // expected UOE + } + cfs.close(); + dir.close(); + } + /** * This test creates a compound file based on a large number of files of * various length. The file content is generated randomly. The sizes range @@ -379,7 +401,7 @@ public abstract class BaseCompoundFormatTestCase extends BaseIndexFileFormatTest final IndexInput[] ins = new IndexInput[FILE_COUNT]; for (int fileIdx = 0; fileIdx < FILE_COUNT; fileIdx++) { - ins[fileIdx] = cfs.openInput("file." + fileIdx, newIOContext(random())); + ins[fileIdx] = cfs.openInput("_123." + fileIdx, newIOContext(random())); } assertEquals(1, dir.getFileHandleCount());