delete again

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@822239 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Uwe Schindler 2009-10-06 12:23:13 +00:00
parent f80ca19510
commit a81933fc9b
1 changed files with 0 additions and 530 deletions

View File

@ -1,530 +0,0 @@
package org.apache.lucene.index;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.Arrays;
import java.util.Enumeration;
import java.util.List;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil;
/*
Verify we can read the pre-2.1 file format, do searches
against it, and add documents to it.
*/
public class TestBackwardsCompatibility extends LuceneTestCase
{
// Uncomment these cases & run them on an older Lucene
// version, to generate an index to test backwards
// compatibility. Then, cd to build/test/index.cfs and
// run "zip index.<VERSION>.cfs.zip *"; cd to
// build/test/index.nocfs and run "zip
// index.<VERSION>.nocfs.zip *". Then move those 2 zip
// files to your trunk checkout and add them to the
// oldNames array.
/*
public void testCreatePreLocklessCFS() throws IOException {
createIndex("index.cfs", true);
}
public void testCreatePreLocklessNoCFS() throws IOException {
createIndex("index.nocfs", false);
}
*/
/* Unzips dirName + ".zip" --> dirName, removing dirName
first */
public void unzip(String zipName, String destDirName) throws IOException {
Enumeration entries;
ZipFile zipFile;
zipFile = new ZipFile(zipName + ".zip");
entries = zipFile.entries();
String dirName = fullDir(destDirName);
File fileDir = new File(dirName);
rmDir(destDirName);
fileDir.mkdir();
while (entries.hasMoreElements()) {
ZipEntry entry = (ZipEntry) entries.nextElement();
InputStream in = zipFile.getInputStream(entry);
OutputStream out = new BufferedOutputStream(new FileOutputStream(new File(fileDir, entry.getName())));
byte[] buffer = new byte[8192];
int len;
while((len = in.read(buffer)) >= 0) {
out.write(buffer, 0, len);
}
in.close();
out.close();
}
zipFile.close();
}
public void testCreateCFS() throws IOException {
String dirName = "testindex.cfs";
createIndex(dirName, true);
rmDir(dirName);
}
public void testCreateNoCFS() throws IOException {
String dirName = "testindex.nocfs";
createIndex(dirName, true);
rmDir(dirName);
}
final String[] oldNames = {"19.cfs",
"19.nocfs",
"20.cfs",
"20.nocfs",
"21.cfs",
"21.nocfs",
"22.cfs",
"22.nocfs",
"23.cfs",
"23.nocfs",
"24.cfs",
"24.nocfs",
};
public void testOptimizeOldIndex() throws IOException {
for(int i=0;i<oldNames.length;i++) {
String dirName = "src/test/org/apache/lucene/index/index." + oldNames[i];
unzip(dirName, oldNames[i]);
String fullPath = fullDir(oldNames[i]);
Directory dir = FSDirectory.open(new File(fullPath));
IndexWriter w = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED);
w.optimize();
w.close();
_TestUtil.checkIndex(dir);
dir.close();
rmDir(oldNames[i]);
}
}
public void testSearchOldIndex() throws IOException {
for(int i=0;i<oldNames.length;i++) {
String dirName = "src/test/org/apache/lucene/index/index." + oldNames[i];
unzip(dirName, oldNames[i]);
searchIndex(oldNames[i], oldNames[i]);
rmDir(oldNames[i]);
}
}
public void testIndexOldIndexNoAdds() throws IOException {
for(int i=0;i<oldNames.length;i++) {
String dirName = "src/test/org/apache/lucene/index/index." + oldNames[i];
unzip(dirName, oldNames[i]);
changeIndexNoAdds(oldNames[i]);
rmDir(oldNames[i]);
}
}
public void testIndexOldIndex() throws IOException {
for(int i=0;i<oldNames.length;i++) {
String dirName = "src/test/org/apache/lucene/index/index." + oldNames[i];
unzip(dirName, oldNames[i]);
changeIndexWithAdds(oldNames[i]);
rmDir(oldNames[i]);
}
}
private void testHits(ScoreDoc[] hits, int expectedCount, IndexReader reader) throws IOException {
final int hitCount = hits.length;
assertEquals("wrong number of hits", expectedCount, hitCount);
for(int i=0;i<hitCount;i++) {
reader.document(hits[i].doc);
reader.getTermFreqVectors(hits[i].doc);
}
}
public void searchIndex(String dirName, String oldName) throws IOException {
//QueryParser parser = new QueryParser("contents", new WhitespaceAnalyzer());
//Query query = parser.parse("handle:1");
dirName = fullDir(dirName);
Directory dir = FSDirectory.open(new File(dirName));
IndexSearcher searcher = new IndexSearcher(dir, true);
IndexReader reader = searcher.getIndexReader();
_TestUtil.checkIndex(dir);
for(int i=0;i<35;i++) {
if (!reader.isDeleted(i)) {
Document d = reader.document(i);
List fields = d.getFields();
if (!oldName.startsWith("19.") &&
!oldName.startsWith("20.") &&
!oldName.startsWith("21.") &&
!oldName.startsWith("22.")) {
if (d.getField("content3") == null) {
assertEquals(5, fields.size());
Field f = (Field) d.getField("id");
assertEquals(""+i, f.stringValue());
f = (Field) d.getField("utf8");
assertEquals("Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", f.stringValue());
f = (Field) d.getField("autf8");
assertEquals("Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", f.stringValue());
f = (Field) d.getField("content2");
assertEquals("here is more content with aaa aaa aaa", f.stringValue());
f = (Field) d.getField("fie\u2C77ld");
assertEquals("field with non-ascii name", f.stringValue());
}
}
} else
// Only ID 7 is deleted
assertEquals(7, i);
}
ScoreDoc[] hits = searcher.search(new TermQuery(new Term("content", "aaa")), null, 1000).scoreDocs;
// First document should be #21 since it's norm was
// increased:
Document d = searcher.doc(hits[0].doc);
assertEquals("didn't get the right document first", "21", d.get("id"));
testHits(hits, 34, searcher.getIndexReader());
if (!oldName.startsWith("19.") &&
!oldName.startsWith("20.") &&
!oldName.startsWith("21.") &&
!oldName.startsWith("22.")) {
// Test on indices >= 2.3
hits = searcher.search(new TermQuery(new Term("utf8", "\u0000")), null, 1000).scoreDocs;
assertEquals(34, hits.length);
hits = searcher.search(new TermQuery(new Term("utf8", "Lu\uD834\uDD1Ece\uD834\uDD60ne")), null, 1000).scoreDocs;
assertEquals(34, hits.length);
hits = searcher.search(new TermQuery(new Term("utf8", "ab\ud917\udc17cd")), null, 1000).scoreDocs;
assertEquals(34, hits.length);
}
searcher.close();
dir.close();
}
private int compare(String name, String v) {
int v0 = Integer.parseInt(name.substring(0, 2));
int v1 = Integer.parseInt(v);
return v0 - v1;
}
/* Open pre-lockless index, add docs, do a delete &
* setNorm, and search */
public void changeIndexWithAdds(String dirName) throws IOException {
String origDirName = dirName;
dirName = fullDir(dirName);
Directory dir = FSDirectory.open(new File(dirName));
// open writer
IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false, IndexWriter.MaxFieldLength.UNLIMITED);
// add 10 docs
for(int i=0;i<10;i++) {
addDoc(writer, 35+i);
}
// make sure writer sees right total -- writer seems not to know about deletes in .del?
final int expected;
if (compare(origDirName, "24") < 0) {
expected = 45;
} else {
expected = 46;
}
assertEquals("wrong doc count", expected, writer.maxDoc());
writer.close();
// make sure searching sees right # hits
IndexSearcher searcher = new IndexSearcher(dir, true);
ScoreDoc[] hits = searcher.search(new TermQuery(new Term("content", "aaa")), null, 1000).scoreDocs;
Document d = searcher.doc(hits[0].doc);
assertEquals("wrong first document", "21", d.get("id"));
testHits(hits, 44, searcher.getIndexReader());
searcher.close();
// make sure we can do delete & setNorm against this
// pre-lockless segment:
IndexReader reader = IndexReader.open(dir, false);
Term searchTerm = new Term("id", "6");
int delCount = reader.deleteDocuments(searchTerm);
assertEquals("wrong delete count", 1, delCount);
reader.setNorm(22, "content", (float) 2.0);
reader.close();
// make sure they "took":
searcher = new IndexSearcher(dir, true);
hits = searcher.search(new TermQuery(new Term("content", "aaa")), null, 1000).scoreDocs;
assertEquals("wrong number of hits", 43, hits.length);
d = searcher.doc(hits[0].doc);
assertEquals("wrong first document", "22", d.get("id"));
testHits(hits, 43, searcher.getIndexReader());
searcher.close();
// optimize
writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false, IndexWriter.MaxFieldLength.UNLIMITED);
writer.optimize();
writer.close();
searcher = new IndexSearcher(dir, true);
hits = searcher.search(new TermQuery(new Term("content", "aaa")), null, 1000).scoreDocs;
assertEquals("wrong number of hits", 43, hits.length);
d = searcher.doc(hits[0].doc);
testHits(hits, 43, searcher.getIndexReader());
assertEquals("wrong first document", "22", d.get("id"));
searcher.close();
dir.close();
}
/* Open pre-lockless index, add docs, do a delete &
* setNorm, and search */
public void changeIndexNoAdds(String dirName) throws IOException {
dirName = fullDir(dirName);
Directory dir = FSDirectory.open(new File(dirName));
// make sure searching sees right # hits
IndexSearcher searcher = new IndexSearcher(dir, true);
ScoreDoc[] hits = searcher.search(new TermQuery(new Term("content", "aaa")), null, 1000).scoreDocs;
assertEquals("wrong number of hits", 34, hits.length);
Document d = searcher.doc(hits[0].doc);
assertEquals("wrong first document", "21", d.get("id"));
searcher.close();
// make sure we can do a delete & setNorm against this
// pre-lockless segment:
IndexReader reader = IndexReader.open(dir, false);
Term searchTerm = new Term("id", "6");
int delCount = reader.deleteDocuments(searchTerm);
assertEquals("wrong delete count", 1, delCount);
reader.setNorm(22, "content", (float) 2.0);
reader.close();
// make sure they "took":
searcher = new IndexSearcher(dir, true);
hits = searcher.search(new TermQuery(new Term("content", "aaa")), null, 1000).scoreDocs;
assertEquals("wrong number of hits", 33, hits.length);
d = searcher.doc(hits[0].doc);
assertEquals("wrong first document", "22", d.get("id"));
testHits(hits, 33, searcher.getIndexReader());
searcher.close();
// optimize
IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false, IndexWriter.MaxFieldLength.UNLIMITED);
writer.optimize();
writer.close();
searcher = new IndexSearcher(dir, true);
hits = searcher.search(new TermQuery(new Term("content", "aaa")), null, 1000).scoreDocs;
assertEquals("wrong number of hits", 33, hits.length);
d = searcher.doc(hits[0].doc);
assertEquals("wrong first document", "22", d.get("id"));
testHits(hits, 33, searcher.getIndexReader());
searcher.close();
dir.close();
}
public void createIndex(String dirName, boolean doCFS) throws IOException {
rmDir(dirName);
dirName = fullDir(dirName);
Directory dir = FSDirectory.open(new File(dirName));
IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
writer.setUseCompoundFile(doCFS);
writer.setMaxBufferedDocs(10);
for(int i=0;i<35;i++) {
addDoc(writer, i);
}
assertEquals("wrong doc count", 35, writer.maxDoc());
writer.close();
// open fresh writer so we get no prx file in the added segment
writer = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED);
writer.setUseCompoundFile(doCFS);
writer.setMaxBufferedDocs(10);
addNoProxDoc(writer);
writer.close();
// Delete one doc so we get a .del file:
IndexReader reader = IndexReader.open(dir, false);
Term searchTerm = new Term("id", "7");
int delCount = reader.deleteDocuments(searchTerm);
assertEquals("didn't delete the right number of documents", 1, delCount);
// Set one norm so we get a .s0 file:
reader.setNorm(21, "content", (float) 1.5);
reader.close();
}
/* Verifies that the expected file names were produced */
public void testExactFileNames() throws IOException {
String outputDir = "lucene.backwardscompat0.index";
rmDir(outputDir);
try {
Directory dir = FSDirectory.open(new File(fullDir(outputDir)));
IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED);
writer.setRAMBufferSizeMB(16.0);
for(int i=0;i<35;i++) {
addDoc(writer, i);
}
assertEquals("wrong doc count", 35, writer.maxDoc());
writer.close();
// Delete one doc so we get a .del file:
IndexReader reader = IndexReader.open(dir, false);
Term searchTerm = new Term("id", "7");
int delCount = reader.deleteDocuments(searchTerm);
assertEquals("didn't delete the right number of documents", 1, delCount);
// Set one norm so we get a .s0 file:
reader.setNorm(21, "content", (float) 1.5);
reader.close();
// The numbering of fields can vary depending on which
// JRE is in use. On some JREs we see content bound to
// field 0; on others, field 1. So, here we have to
// figure out which field number corresponds to
// "content", and then set our expected file names below
// accordingly:
CompoundFileReader cfsReader = new CompoundFileReader(dir, "_0.cfs");
FieldInfos fieldInfos = new FieldInfos(cfsReader, "_0.fnm");
int contentFieldIndex = -1;
for(int i=0;i<fieldInfos.size();i++) {
FieldInfo fi = fieldInfos.fieldInfo(i);
if (fi.name.equals("content")) {
contentFieldIndex = i;
break;
}
}
cfsReader.close();
assertTrue("could not locate the 'content' field number in the _2.cfs segment", contentFieldIndex != -1);
// Now verify file names:
String[] expected;
expected = new String[] {"_0.cfs",
"_0_1.del",
"_0_1.s" + contentFieldIndex,
"segments_3",
"segments.gen"};
String[] actual = dir.listAll();
Arrays.sort(expected);
Arrays.sort(actual);
if (!Arrays.equals(expected, actual)) {
fail("incorrect filenames in index: expected:\n " + asString(expected) + "\n actual:\n " + asString(actual));
}
dir.close();
} finally {
rmDir(outputDir);
}
}
private String asString(String[] l) {
String s = "";
for(int i=0;i<l.length;i++) {
if (i > 0) {
s += "\n ";
}
s += l[i];
}
return s;
}
private void addDoc(IndexWriter writer, int id) throws IOException
{
Document doc = new Document();
doc.add(new Field("content", "aaa", Field.Store.NO, Field.Index.ANALYZED));
doc.add(new Field("id", Integer.toString(id), Field.Store.YES, Field.Index.NOT_ANALYZED));
doc.add(new Field("autf8", "Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
doc.add(new Field("utf8", "Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
doc.add(new Field("content2", "here is more content with aaa aaa aaa", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
doc.add(new Field("fie\u2C77ld", "field with non-ascii name", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
writer.addDocument(doc);
}
private void addNoProxDoc(IndexWriter writer) throws IOException {
Document doc = new Document();
Field f = new Field("content3", "aaa", Field.Store.YES, Field.Index.ANALYZED);
f.setOmitTermFreqAndPositions(true);
doc.add(f);
f = new Field("content4", "aaa", Field.Store.YES, Field.Index.NO);
f.setOmitTermFreqAndPositions(true);
doc.add(f);
writer.addDocument(doc);
}
private void rmDir(String dir) throws IOException {
File fileDir = new File(fullDir(dir));
if (fileDir.exists()) {
File[] files = fileDir.listFiles();
if (files != null) {
for (int i = 0; i < files.length; i++) {
files[i].delete();
}
}
fileDir.delete();
}
}
public static String fullDir(String dirName) throws IOException {
return new File(System.getProperty("tempDir"), dirName).getCanonicalPath();
}
}