mirror of https://github.com/apache/lucene.git
LUCENE-5969: add SimpleText cfs
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5969@1629397 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
150bcd6c60
commit
799a2875e1
|
@ -234,13 +234,16 @@ final class Lucene40CompoundReader extends BaseDirectory {
|
|||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
/** Not implemented
|
||||
* @throws UnsupportedOperationException always: not supported by CFS */
|
||||
@Override
|
||||
public Lock makeLock(String name) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void clearLock(String name) throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "CompoundFileDirectory(file=\"" + fileName + "\" in dir=" + directory + ")";
|
||||
|
|
|
@ -27,7 +27,6 @@ import org.apache.lucene.codecs.DocValuesFormat;
|
|||
import org.apache.lucene.codecs.NormsFormat;
|
||||
import org.apache.lucene.codecs.StoredFieldsFormat;
|
||||
import org.apache.lucene.codecs.TermVectorsFormat;
|
||||
import org.apache.lucene.codecs.lucene50.Lucene50CompoundFormat;
|
||||
|
||||
/**
|
||||
* plain text index format.
|
||||
|
@ -44,8 +43,7 @@ public final class SimpleTextCodec extends Codec {
|
|||
private final NormsFormat normsFormat = new SimpleTextNormsFormat();
|
||||
private final LiveDocsFormat liveDocs = new SimpleTextLiveDocsFormat();
|
||||
private final DocValuesFormat dvFormat = new SimpleTextDocValuesFormat();
|
||||
// nocommit
|
||||
private final CompoundFormat compoundFormat = new Lucene50CompoundFormat();
|
||||
private final CompoundFormat compoundFormat = new SimpleTextCompoundFormat();
|
||||
|
||||
public SimpleTextCodec() {
|
||||
super("SimpleText");
|
||||
|
|
|
@ -0,0 +1,246 @@
|
|||
package org.apache.lucene.codecs.simpletext;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.text.DecimalFormat;
|
||||
import java.text.DecimalFormatSymbols;
|
||||
import java.text.ParseException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.Locale;
|
||||
|
||||
import org.apache.lucene.codecs.CompoundFormat;
|
||||
import org.apache.lucene.index.CorruptIndexException;
|
||||
import org.apache.lucene.index.MergeState.CheckAbort;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.index.SegmentInfo;
|
||||
import org.apache.lucene.store.BaseDirectory;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.store.Lock;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
|
||||
/**
|
||||
* plain text compound format.
|
||||
* <p>
|
||||
* <b><font color="red">FOR RECREATIONAL USE ONLY</font></B>
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class SimpleTextCompoundFormat extends CompoundFormat {
|
||||
|
||||
@Override
|
||||
public Directory getCompoundReader(Directory dir, SegmentInfo si, IOContext context) throws IOException {
|
||||
String dataFile = IndexFileNames.segmentFileName(si.name, "", DATA_EXTENSION);
|
||||
final IndexInput in = dir.openInput(dataFile, context);
|
||||
|
||||
BytesRefBuilder scratch = new BytesRefBuilder();
|
||||
|
||||
// first get to TOC:
|
||||
DecimalFormat df = new DecimalFormat(OFFSETPATTERN, DecimalFormatSymbols.getInstance(Locale.ROOT));
|
||||
long pos = in.length() - TABLEPOS.length - OFFSETPATTERN.length() - 1;
|
||||
in.seek(pos);
|
||||
SimpleTextUtil.readLine(in, scratch);
|
||||
assert StringHelper.startsWith(scratch.get(), TABLEPOS);
|
||||
long tablePos = -1;
|
||||
try {
|
||||
tablePos = df.parse(stripPrefix(scratch, TABLEPOS)).longValue();
|
||||
} catch (ParseException e) {
|
||||
throw new CorruptIndexException("can't parse CFS trailer, got: " + scratch.get().utf8ToString(), in);
|
||||
}
|
||||
|
||||
// seek to TOC and read it
|
||||
in.seek(tablePos);
|
||||
SimpleTextUtil.readLine(in, scratch);
|
||||
assert StringHelper.startsWith(scratch.get(), TABLE);
|
||||
int numEntries = Integer.parseInt(stripPrefix(scratch, TABLE));
|
||||
|
||||
final String fileNames[] = new String[numEntries];
|
||||
final long startOffsets[] = new long[numEntries];
|
||||
final long endOffsets[] = new long[numEntries];
|
||||
|
||||
for (int i = 0; i < numEntries; i++) {
|
||||
SimpleTextUtil.readLine(in, scratch);
|
||||
assert StringHelper.startsWith(scratch.get(), TABLENAME);
|
||||
fileNames[i] = si.name + IndexFileNames.stripSegmentName(stripPrefix(scratch, TABLENAME));
|
||||
|
||||
if (i > 0) {
|
||||
// files must be unique and in sorted order
|
||||
assert fileNames[i].compareTo(fileNames[i-1]) > 0;
|
||||
}
|
||||
|
||||
SimpleTextUtil.readLine(in, scratch);
|
||||
assert StringHelper.startsWith(scratch.get(), TABLESTART);
|
||||
startOffsets[i] = Long.parseLong(stripPrefix(scratch, TABLESTART));
|
||||
|
||||
SimpleTextUtil.readLine(in, scratch);
|
||||
assert StringHelper.startsWith(scratch.get(), TABLEEND);
|
||||
endOffsets[i] = Long.parseLong(stripPrefix(scratch, TABLEEND));
|
||||
}
|
||||
|
||||
return new BaseDirectory() {
|
||||
|
||||
private int getIndex(String name) throws IOException {
|
||||
int index = Arrays.binarySearch(fileNames, name);
|
||||
if (index < 0) {
|
||||
throw new FileNotFoundException("No sub-file found (fileName=" + name + " files: " + Arrays.toString(fileNames) + ")");
|
||||
}
|
||||
return index;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String[] listAll() throws IOException {
|
||||
ensureOpen();
|
||||
return fileNames.clone();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long fileLength(String name) throws IOException {
|
||||
ensureOpen();
|
||||
int index = getIndex(name);
|
||||
return endOffsets[index] - startOffsets[index];
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexInput openInput(String name, IOContext context) throws IOException {
|
||||
ensureOpen();
|
||||
int index = getIndex(name);
|
||||
return in.slice(name, startOffsets[index], endOffsets[index] - startOffsets[index]);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
isOpen = false;
|
||||
in.close();
|
||||
}
|
||||
|
||||
// write methods: disabled
|
||||
|
||||
@Override
|
||||
public IndexOutput createOutput(String name, IOContext context) { throw new UnsupportedOperationException(); }
|
||||
|
||||
@Override
|
||||
public void sync(Collection<String> names) { throw new UnsupportedOperationException(); }
|
||||
|
||||
@Override
|
||||
public void deleteFile(String name) { throw new UnsupportedOperationException(); }
|
||||
|
||||
@Override
|
||||
public void renameFile(String source, String dest) { throw new UnsupportedOperationException(); }
|
||||
|
||||
@Override
|
||||
public Lock makeLock(String name) { throw new UnsupportedOperationException(); }
|
||||
|
||||
@Override
|
||||
public void clearLock(String name) { throw new UnsupportedOperationException(); }
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public void write(Directory dir, SegmentInfo si, Collection<String> files, CheckAbort checkAbort, IOContext context) throws IOException {
|
||||
String dataFile = IndexFileNames.segmentFileName(si.name, "", DATA_EXTENSION);
|
||||
|
||||
int numFiles = files.size();
|
||||
String names[] = files.toArray(new String[numFiles]);
|
||||
Arrays.sort(names);
|
||||
long startOffsets[] = new long[numFiles];
|
||||
long endOffsets[] = new long[numFiles];
|
||||
|
||||
BytesRefBuilder scratch = new BytesRefBuilder();
|
||||
|
||||
try (IndexOutput out = dir.createOutput(dataFile, context)) {
|
||||
for (int i = 0; i < names.length; i++) {
|
||||
// write header for file
|
||||
SimpleTextUtil.write(out, HEADER);
|
||||
SimpleTextUtil.write(out, names[i], scratch);
|
||||
SimpleTextUtil.writeNewline(out);
|
||||
|
||||
// write bytes for file
|
||||
startOffsets[i] = out.getFilePointer();
|
||||
try (IndexInput in = dir.openInput(names[i], IOContext.READONCE)) {
|
||||
out.copyBytes(in, in.length());
|
||||
}
|
||||
endOffsets[i] = out.getFilePointer();
|
||||
|
||||
checkAbort.work(endOffsets[i] - startOffsets[i]);
|
||||
}
|
||||
|
||||
long tocPos = out.getFilePointer();
|
||||
|
||||
// write CFS table
|
||||
SimpleTextUtil.write(out, TABLE);
|
||||
SimpleTextUtil.write(out, Integer.toString(numFiles), scratch);
|
||||
SimpleTextUtil.writeNewline(out);
|
||||
|
||||
for (int i = 0; i < names.length; i++) {
|
||||
SimpleTextUtil.write(out, TABLENAME);
|
||||
SimpleTextUtil.write(out, names[i], scratch);
|
||||
SimpleTextUtil.writeNewline(out);
|
||||
|
||||
SimpleTextUtil.write(out, TABLESTART);
|
||||
SimpleTextUtil.write(out, Long.toString(startOffsets[i]), scratch);
|
||||
SimpleTextUtil.writeNewline(out);
|
||||
|
||||
SimpleTextUtil.write(out, TABLEEND);
|
||||
SimpleTextUtil.write(out, Long.toString(endOffsets[i]), scratch);
|
||||
SimpleTextUtil.writeNewline(out);
|
||||
}
|
||||
|
||||
DecimalFormat df = new DecimalFormat(OFFSETPATTERN, DecimalFormatSymbols.getInstance(Locale.ROOT));
|
||||
SimpleTextUtil.write(out, TABLEPOS);
|
||||
SimpleTextUtil.write(out, df.format(tocPos), scratch);
|
||||
SimpleTextUtil.writeNewline(out);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String[] files(SegmentInfo si) {
|
||||
return new String[] { IndexFileNames.segmentFileName(si.name, "", DATA_EXTENSION) };
|
||||
}
|
||||
|
||||
// helper method to strip strip away 'prefix' from 'scratch' and return as String
|
||||
private String stripPrefix(BytesRefBuilder scratch, BytesRef prefix) throws IOException {
|
||||
return new String(scratch.bytes(), prefix.length, scratch.length() - prefix.length, StandardCharsets.UTF_8);
|
||||
}
|
||||
|
||||
/** Extension of compound file */
|
||||
static final String DATA_EXTENSION = "scf";
|
||||
|
||||
final static BytesRef HEADER = new BytesRef("cfs entry for: ");
|
||||
|
||||
final static BytesRef TABLE = new BytesRef("table of contents, size: ");
|
||||
final static BytesRef TABLENAME = new BytesRef(" filename: ");
|
||||
final static BytesRef TABLESTART = new BytesRef(" start: ");
|
||||
final static BytesRef TABLEEND = new BytesRef(" end: ");
|
||||
|
||||
final static BytesRef TABLEPOS = new BytesRef("table of contents begins at offset: ");
|
||||
|
||||
final static String OFFSETPATTERN;
|
||||
static {
|
||||
int numDigits = Long.toString(Long.MAX_VALUE).length();
|
||||
char pattern[] = new char[numDigits];
|
||||
Arrays.fill(pattern, '0');
|
||||
OFFSETPATTERN = new String(pattern);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,30 @@
|
|||
package org.apache.lucene.codecs.simpletext;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.index.BaseCompoundFormatTestCase;
|
||||
|
||||
public class TestSimpleTextCompoundFormat extends BaseCompoundFormatTestCase {
|
||||
private final Codec codec = new SimpleTextCodec();
|
||||
|
||||
@Override
|
||||
protected Codec getCodec() {
|
||||
return codec;
|
||||
}
|
||||
}
|
|
@ -60,6 +60,8 @@ final class Lucene50CompoundReader extends BaseDirectory {
|
|||
/**
|
||||
* Create a new CompoundFileDirectory.
|
||||
*/
|
||||
// TODO: we should just pre-strip "entries" and append segment name up-front like simpletext?
|
||||
// this need not be a "general purpose" directory anymore (it only writes index files)
|
||||
public Lucene50CompoundReader(Directory directory, SegmentInfo si, IOContext context) throws IOException {
|
||||
this.directory = directory;
|
||||
this.segmentName = si.name;
|
||||
|
@ -179,13 +181,16 @@ final class Lucene50CompoundReader extends BaseDirectory {
|
|||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
/** Not implemented
|
||||
* @throws UnsupportedOperationException always: not supported by CFS */
|
||||
@Override
|
||||
public Lock makeLock(String name) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void clearLock(String name) throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "CompoundFileDirectory(segment=\"" + segmentName + "\" in dir=" + directory + ")";
|
||||
|
|
|
@ -316,6 +316,28 @@ public abstract class BaseCompoundFormatTestCase extends BaseIndexFileFormatTest
|
|||
dir.close();
|
||||
}
|
||||
|
||||
// test that cfs reader is read-only
|
||||
public void testClearLockDisabled() throws IOException {
|
||||
final String testfile = "_123.test";
|
||||
|
||||
Directory dir = newDirectory();
|
||||
IndexOutput out = dir.createOutput(testfile, IOContext.DEFAULT);
|
||||
out.writeInt(3);
|
||||
out.close();
|
||||
|
||||
SegmentInfo si = newSegmentInfo(dir, "_123");
|
||||
si.getCodec().compoundFormat().write(dir, si, Collections.<String>emptyList(), MergeState.CheckAbort.NONE, IOContext.DEFAULT);
|
||||
Directory cfs = si.getCodec().compoundFormat().getCompoundReader(dir, si, IOContext.DEFAULT);
|
||||
try {
|
||||
cfs.clearLock("foobar");
|
||||
fail("didn't get expected exception");
|
||||
} catch (UnsupportedOperationException expected) {
|
||||
// expected UOE
|
||||
}
|
||||
cfs.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
/**
|
||||
* This test creates a compound file based on a large number of files of
|
||||
* various length. The file content is generated randomly. The sizes range
|
||||
|
@ -379,7 +401,7 @@ public abstract class BaseCompoundFormatTestCase extends BaseIndexFileFormatTest
|
|||
|
||||
final IndexInput[] ins = new IndexInput[FILE_COUNT];
|
||||
for (int fileIdx = 0; fileIdx < FILE_COUNT; fileIdx++) {
|
||||
ins[fileIdx] = cfs.openInput("file." + fileIdx, newIOContext(random()));
|
||||
ins[fileIdx] = cfs.openInput("_123." + fileIdx, newIOContext(random()));
|
||||
}
|
||||
|
||||
assertEquals(1, dir.getFileHandleCount());
|
||||
|
|
Loading…
Reference in New Issue