mirror of https://github.com/apache/lucene.git
LUCENE-1312: Added full support for InstantiatedIndexReader#getFieldNames() and extended the test case to assert deleted documents behaves as they should (they did).
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@672556 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
5890f58fa4
commit
b8fc54e72a
|
@ -0,0 +1,33 @@
|
||||||
|
Lucene InstantiatedIndex contrib module change Log
|
||||||
|
|
||||||
|
======================= Trunk (not yet released) =======================
|
||||||
|
|
||||||
|
Changes in runtime behavior
|
||||||
|
|
||||||
|
(None)
|
||||||
|
|
||||||
|
API Changes
|
||||||
|
|
||||||
|
(None)
|
||||||
|
|
||||||
|
Bug fixes
|
||||||
|
|
||||||
|
1. LUCENE-1312: Added full support for InstantiatedIndexReader#getFieldNames()
|
||||||
|
and tests that assert that deleted documents behaves as they should (they did).
|
||||||
|
(Jason Rutherglen, Karl Wettin)
|
||||||
|
|
||||||
|
New features
|
||||||
|
|
||||||
|
(None)
|
||||||
|
|
||||||
|
Documentation
|
||||||
|
|
||||||
|
(None)
|
||||||
|
|
||||||
|
Build
|
||||||
|
|
||||||
|
(None)
|
||||||
|
|
||||||
|
Test Cases
|
||||||
|
|
||||||
|
(None)
|
|
@ -0,0 +1,61 @@
|
||||||
|
package org.apache.lucene.store.instantiated;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* For non package access see {@link org.apache.lucene.index.IndexReader#getFieldNames(org.apache.lucene.index.IndexReader.FieldOption)}
|
||||||
|
*/
|
||||||
|
class FieldSetting {
|
||||||
|
String fieldName;
|
||||||
|
|
||||||
|
boolean storeTermVector = false;
|
||||||
|
boolean storeOffsetWithTermVector = false;
|
||||||
|
boolean storePositionWithTermVector = false;
|
||||||
|
boolean storePayloads = false;
|
||||||
|
|
||||||
|
boolean stored = false;
|
||||||
|
boolean indexed = false;
|
||||||
|
boolean tokenized = false;
|
||||||
|
boolean compressed = false;
|
||||||
|
|
||||||
|
FieldSetting() {
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
FieldSetting(String fieldName) {
|
||||||
|
this.fieldName = fieldName;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean equals(Object o) {
|
||||||
|
if (this == o)
|
||||||
|
return true;
|
||||||
|
if (o == null || getClass() != o.getClass())
|
||||||
|
return false;
|
||||||
|
|
||||||
|
final FieldSetting that = (FieldSetting) o;
|
||||||
|
|
||||||
|
return fieldName.equals(that.fieldName);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public int hashCode() {
|
||||||
|
return fieldName.hashCode();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,95 @@
|
||||||
|
package org.apache.lucene.store.instantiated;
|
||||||
|
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Collection;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Essetially a Map<FieldName, {@link org.apache.lucene.store.instantiated.FieldSetting}>
|
||||||
|
*/
|
||||||
|
class FieldSettings {
|
||||||
|
|
||||||
|
|
||||||
|
FieldSettings() {
|
||||||
|
}
|
||||||
|
|
||||||
|
private Map</** field name */String, FieldSetting> fieldSettings = new HashMap<String, FieldSetting>();
|
||||||
|
|
||||||
|
synchronized FieldSetting merge(FieldSetting fieldSetting) {
|
||||||
|
FieldSetting setting = fieldSettings.get(fieldSetting.fieldName);
|
||||||
|
|
||||||
|
if (setting == null) {
|
||||||
|
setting = new FieldSetting(fieldSetting.fieldName);
|
||||||
|
fieldSettings.put(fieldSetting.fieldName, setting);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (fieldSetting.stored) {
|
||||||
|
setting.stored = true;
|
||||||
|
}
|
||||||
|
if (fieldSetting.compressed) {
|
||||||
|
setting.compressed = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ("b3".equals(fieldSetting.fieldName)) {
|
||||||
|
System.currentTimeMillis();
|
||||||
|
}
|
||||||
|
if (fieldSetting.indexed) {
|
||||||
|
setting.indexed = true;
|
||||||
|
}
|
||||||
|
if (fieldSetting.tokenized) {
|
||||||
|
setting.tokenized = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (fieldSetting.storeTermVector) {
|
||||||
|
setting.storeTermVector = true;
|
||||||
|
}
|
||||||
|
if (fieldSetting.storeOffsetWithTermVector) {
|
||||||
|
setting.storeOffsetWithTermVector = true;
|
||||||
|
}
|
||||||
|
if (fieldSetting.storePositionWithTermVector) {
|
||||||
|
setting.storePositionWithTermVector = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (fieldSetting.storePayloads) {
|
||||||
|
setting.storePayloads = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return setting;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
FieldSetting get(String name) {
|
||||||
|
return fieldSettings.get(name);
|
||||||
|
}
|
||||||
|
|
||||||
|
FieldSetting get(String name, boolean create) {
|
||||||
|
FieldSetting fieldSetting = fieldSettings.get(name);
|
||||||
|
if (create && fieldSetting == null) {
|
||||||
|
fieldSetting = new FieldSetting(name);
|
||||||
|
fieldSettings.put(name, fieldSetting);
|
||||||
|
}
|
||||||
|
return fieldSetting;
|
||||||
|
}
|
||||||
|
|
||||||
|
Collection<FieldSetting> values() {
|
||||||
|
return fieldSettings.values();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -16,14 +16,24 @@ package org.apache.lucene.store.instantiated;
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.Field;
|
import org.apache.lucene.document.Field;
|
||||||
import org.apache.lucene.index.*;
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.index.Term;
|
||||||
import java.io.IOException;
|
import org.apache.lucene.index.TermEnum;
|
||||||
import java.io.Serializable;
|
import org.apache.lucene.index.TermPositionVector;
|
||||||
import java.util.*;
|
import org.apache.lucene.index.TermPositions;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Represented as a coupled graph of class instances, this
|
* Represented as a coupled graph of class instances, this
|
||||||
|
@ -49,7 +59,8 @@ public class InstantiatedIndex
|
||||||
private long version = System.currentTimeMillis();
|
private long version = System.currentTimeMillis();
|
||||||
|
|
||||||
private InstantiatedDocument[] documentsByNumber;
|
private InstantiatedDocument[] documentsByNumber;
|
||||||
/** todo: this should be a BitSet */
|
|
||||||
|
/** todo: should this be a BitSet? */
|
||||||
private Set<Integer> deletedDocuments;
|
private Set<Integer> deletedDocuments;
|
||||||
|
|
||||||
private Map<String, Map<String, InstantiatedTerm>> termsByFieldAndText;
|
private Map<String, Map<String, InstantiatedTerm>> termsByFieldAndText;
|
||||||
|
@ -57,6 +68,7 @@ public class InstantiatedIndex
|
||||||
|
|
||||||
private Map<String, byte[]> normsByFieldNameAndDocumentNumber;
|
private Map<String, byte[]> normsByFieldNameAndDocumentNumber;
|
||||||
|
|
||||||
|
private FieldSettings fieldSettings;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates an empty instantiated index for you to fill with data using an {@link org.apache.lucene.store.instantiated.InstantiatedIndexWriter}.
|
* Creates an empty instantiated index for you to fill with data using an {@link org.apache.lucene.store.instantiated.InstantiatedIndexWriter}.
|
||||||
|
@ -68,12 +80,14 @@ public class InstantiatedIndex
|
||||||
void initialize() {
|
void initialize() {
|
||||||
// todo: clear index without loosing memory (uncouple stuff)
|
// todo: clear index without loosing memory (uncouple stuff)
|
||||||
termsByFieldAndText = new HashMap<String, Map<String, InstantiatedTerm>>();
|
termsByFieldAndText = new HashMap<String, Map<String, InstantiatedTerm>>();
|
||||||
|
fieldSettings = new FieldSettings();
|
||||||
orderedTerms = new InstantiatedTerm[0];
|
orderedTerms = new InstantiatedTerm[0];
|
||||||
documentsByNumber = new InstantiatedDocument[0];
|
documentsByNumber = new InstantiatedDocument[0];
|
||||||
normsByFieldNameAndDocumentNumber = new HashMap<String, byte[]>();
|
normsByFieldNameAndDocumentNumber = new HashMap<String, byte[]>();
|
||||||
deletedDocuments = new HashSet<Integer>();
|
deletedDocuments = new HashSet<Integer>();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates a new instantiated index that looks just like the index in a specific state as represented by a reader.
|
* Creates a new instantiated index that looks just like the index in a specific state as represented by a reader.
|
||||||
*
|
*
|
||||||
|
@ -83,7 +97,9 @@ public class InstantiatedIndex
|
||||||
public InstantiatedIndex(IndexReader sourceIndexReader) throws IOException {
|
public InstantiatedIndex(IndexReader sourceIndexReader) throws IOException {
|
||||||
this(sourceIndexReader, null);
|
this(sourceIndexReader, null);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates a new instantiated index that looks just like the index in a specific state as represented by a reader.
|
* Creates a new instantiated index that looks just like the index in a specific state as represented by a reader.
|
||||||
*
|
*
|
||||||
|
@ -97,10 +113,63 @@ public class InstantiatedIndex
|
||||||
throw new IOException("Source index is not optimized.");
|
throw new IOException("Source index is not optimized.");
|
||||||
}
|
}
|
||||||
|
|
||||||
Collection<String> allFieldNames = sourceIndexReader.getFieldNames(IndexReader.FieldOption.ALL);
|
|
||||||
|
|
||||||
initialize();
|
initialize();
|
||||||
|
|
||||||
|
Collection<String> allFieldNames = sourceIndexReader.getFieldNames(IndexReader.FieldOption.ALL);
|
||||||
|
|
||||||
|
// load field options
|
||||||
|
|
||||||
|
Collection<String> indexedNames = sourceIndexReader.getFieldNames(IndexReader.FieldOption.INDEXED);
|
||||||
|
for (String name : indexedNames) {
|
||||||
|
FieldSetting setting = fieldSettings.get(name, true);
|
||||||
|
setting.indexed = true;
|
||||||
|
}
|
||||||
|
Collection<String> indexedNoVecNames = sourceIndexReader.getFieldNames(IndexReader.FieldOption.INDEXED_NO_TERMVECTOR);
|
||||||
|
for (String name : indexedNoVecNames) {
|
||||||
|
FieldSetting setting = fieldSettings.get(name, true);
|
||||||
|
setting.storeTermVector = false;
|
||||||
|
setting.indexed = true;
|
||||||
|
}
|
||||||
|
Collection<String> indexedVecNames = sourceIndexReader.getFieldNames(IndexReader.FieldOption.INDEXED_WITH_TERMVECTOR);
|
||||||
|
for (String name : indexedVecNames) {
|
||||||
|
FieldSetting setting = fieldSettings.get(name, true);
|
||||||
|
setting.storeTermVector = true;
|
||||||
|
setting.indexed = true;
|
||||||
|
}
|
||||||
|
Collection<String> payloadNames = sourceIndexReader.getFieldNames(IndexReader.FieldOption.STORES_PAYLOADS);
|
||||||
|
for (String name : payloadNames) {
|
||||||
|
FieldSetting setting = fieldSettings.get(name, true);
|
||||||
|
setting.storePayloads = true;
|
||||||
|
}
|
||||||
|
Collection<String> termVecNames = sourceIndexReader.getFieldNames(IndexReader.FieldOption.TERMVECTOR);
|
||||||
|
for (String name : termVecNames) {
|
||||||
|
FieldSetting setting = fieldSettings.get(name, true);
|
||||||
|
setting.storeTermVector = true;
|
||||||
|
}
|
||||||
|
Collection<String> termVecOffsetNames = sourceIndexReader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET);
|
||||||
|
for (String name : termVecOffsetNames) {
|
||||||
|
FieldSetting setting = fieldSettings.get(name, true);
|
||||||
|
setting.storeOffsetWithTermVector = true;
|
||||||
|
}
|
||||||
|
Collection<String> termVecPosNames = sourceIndexReader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION);
|
||||||
|
for (String name : termVecPosNames) {
|
||||||
|
FieldSetting setting = fieldSettings.get(name, true);
|
||||||
|
setting.storePositionWithTermVector = true;
|
||||||
|
}
|
||||||
|
Collection<String> termVecPosOffNames = sourceIndexReader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET);
|
||||||
|
for (String name : termVecPosOffNames) {
|
||||||
|
FieldSetting setting = fieldSettings.get(name, true);
|
||||||
|
setting.storeOffsetWithTermVector = true;
|
||||||
|
setting.storePositionWithTermVector = true;
|
||||||
|
}
|
||||||
|
Collection<String> unindexedNames = sourceIndexReader.getFieldNames(IndexReader.FieldOption.UNINDEXED);
|
||||||
|
for (String name : unindexedNames) {
|
||||||
|
FieldSetting setting = fieldSettings.get(name, true);
|
||||||
|
setting.indexed = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
documentsByNumber = new InstantiatedDocument[sourceIndexReader.numDocs()];
|
documentsByNumber = new InstantiatedDocument[sourceIndexReader.numDocs()];
|
||||||
|
|
||||||
// create documents
|
// create documents
|
||||||
|
@ -129,6 +198,8 @@ public class InstantiatedIndex
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// create norms
|
// create norms
|
||||||
for (String fieldName : allFieldNames) {
|
for (String fieldName : allFieldNames) {
|
||||||
if (fields == null || fields.contains(fieldName)) {
|
if (fields == null || fields.contains(fieldName)) {
|
||||||
|
@ -271,4 +342,9 @@ public class InstantiatedIndex
|
||||||
void setVersion(long version) {
|
void setVersion(long version) {
|
||||||
this.version = version;
|
this.version = version;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
FieldSettings getFieldSettings() {
|
||||||
|
return fieldSettings;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -16,22 +16,37 @@ package org.apache.lucene.store.instantiated;
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.Iterator;
|
||||||
|
import java.util.LinkedList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.FieldSelector;
|
import org.apache.lucene.document.FieldSelector;
|
||||||
import org.apache.lucene.index.*;
|
import org.apache.lucene.index.CorruptIndexException;
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.index.IndexWriter;
|
||||||
|
import org.apache.lucene.index.Term;
|
||||||
|
import org.apache.lucene.index.TermDocs;
|
||||||
|
import org.apache.lucene.index.TermEnum;
|
||||||
|
import org.apache.lucene.index.TermFreqVector;
|
||||||
|
import org.apache.lucene.index.TermPositions;
|
||||||
|
import org.apache.lucene.index.TermVectorMapper;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.*;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* An InstantiatedIndexReader is not a snapshot in time,
|
* An InstantiatedIndexReader is not a snapshot in time, it is completely in
|
||||||
* it is completely in sync with the latest commit to the store!
|
* sync with the latest commit to the store!
|
||||||
*
|
*
|
||||||
* Consider using InstantiatedIndex as if it was immutable.
|
* Consider using InstantiatedIndex as if it was immutable.
|
||||||
*/
|
*/
|
||||||
public class InstantiatedIndexReader
|
public class InstantiatedIndexReader extends IndexReader {
|
||||||
extends IndexReader {
|
|
||||||
|
|
||||||
private final InstantiatedIndex index;
|
private final InstantiatedIndex index;
|
||||||
|
|
||||||
|
@ -47,36 +62,32 @@ public class InstantiatedIndexReader
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* An InstantiatedIndexReader is not a snapshot in time,
|
* An InstantiatedIndexReader is not a snapshot in time, it is completely in
|
||||||
* it is completely in sync with the latest commit to the store!
|
* sync with the latest commit to the store!
|
||||||
*
|
*
|
||||||
* @return output from {@link InstantiatedIndex#getVersion()} in associated instantiated index.
|
* @return output from {@link InstantiatedIndex#getVersion()} in associated instantiated index.
|
||||||
*/
|
*/
|
||||||
public long getVersion() {
|
public long getVersion() {
|
||||||
return index.getVersion();
|
return index.getVersion();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public Directory directory() {
|
public Directory directory() {
|
||||||
throw new UnsupportedOperationException();
|
throw new UnsupportedOperationException();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* An InstantiatedIndexReader is always current!
|
* An InstantiatedIndexReader is always current!
|
||||||
*
|
*
|
||||||
* Check whether this IndexReader is still using the
|
* Check whether this IndexReader is still using the current (i.e., most
|
||||||
* current (i.e., most recently committed) version of the
|
* recently committed) version of the index. If a writer has committed any
|
||||||
* index. If a writer has committed any changes to the
|
* changes to the index since this reader was opened, this will return
|
||||||
* index since this reader was opened, this will return
|
* <code>false</code>, in which case you must open a new IndexReader in
|
||||||
* <code>false</code>, in which case you must open a new
|
* order to see the changes. See the description of the <a
|
||||||
* IndexReader in order to see the changes. See the
|
* href="IndexWriter.html#autoCommit"><code>autoCommit</code></a> flag
|
||||||
* description of the <a href="IndexWriter.html#autoCommit"><code>autoCommit</code></a>
|
* which controls when the {@link IndexWriter} actually commits changes to the
|
||||||
* flag which controls when the {@link IndexWriter}
|
* index.
|
||||||
* actually commits changes to the index.
|
*
|
||||||
*
|
|
||||||
* @return always true
|
* @return always true
|
||||||
* @throws CorruptIndexException if the index is corrupt
|
* @throws CorruptIndexException if the index is corrupt
|
||||||
* @throws IOException if there is a low-level IO error
|
* @throws IOException if there is a low-level IO error
|
||||||
|
@ -92,7 +103,7 @@ public class InstantiatedIndexReader
|
||||||
|
|
||||||
private Set<InstantiatedDocument> deletedDocuments = new HashSet<InstantiatedDocument>();
|
private Set<InstantiatedDocument> deletedDocuments = new HashSet<InstantiatedDocument>();
|
||||||
private Set<Integer> deletedDocumentNumbers = new HashSet<Integer>();
|
private Set<Integer> deletedDocumentNumbers = new HashSet<Integer>();
|
||||||
private Map<String, List<NormUpdate>> updatedNormsByFieldNameAndDocumentNumber = null;
|
private Map<String,List<NormUpdate>> updatedNormsByFieldNameAndDocumentNumber = null;
|
||||||
|
|
||||||
private class NormUpdate {
|
private class NormUpdate {
|
||||||
private int doc;
|
private int doc;
|
||||||
|
@ -140,7 +151,7 @@ public class InstantiatedIndexReader
|
||||||
|
|
||||||
// 1. update norms
|
// 1. update norms
|
||||||
if (updatedNormsByFieldNameAndDocumentNumber != null) {
|
if (updatedNormsByFieldNameAndDocumentNumber != null) {
|
||||||
for (Map.Entry<String, List<NormUpdate>> e : updatedNormsByFieldNameAndDocumentNumber.entrySet()) {
|
for (Map.Entry<String,List<NormUpdate>> e : updatedNormsByFieldNameAndDocumentNumber.entrySet()) {
|
||||||
byte[] norms = getIndex().getNormsByFieldNameAndDocumentNumber().get(e.getKey());
|
byte[] norms = getIndex().getNormsByFieldNameAndDocumentNumber().get(e.getKey());
|
||||||
for (NormUpdate normUpdate : e.getValue()) {
|
for (NormUpdate normUpdate : e.getValue()) {
|
||||||
norms[normUpdate.doc] = normUpdate.value;
|
norms[normUpdate.doc] = normUpdate.value;
|
||||||
|
@ -168,27 +179,67 @@ public class InstantiatedIndexReader
|
||||||
|
|
||||||
protected void doClose() throws IOException {
|
protected void doClose() throws IOException {
|
||||||
// ignored
|
// ignored
|
||||||
|
// todo perhaps release all associated instances?
|
||||||
}
|
}
|
||||||
|
|
||||||
public Collection getFieldNames(FieldOption fldOption) {
|
public Collection getFieldNames(FieldOption fieldOption) {
|
||||||
if (fldOption != FieldOption.ALL) {
|
Set<String> fieldSet = new HashSet<String>();
|
||||||
throw new IllegalArgumentException("Only FieldOption.ALL implemented."); // todo
|
for (FieldSetting fi : index.getFieldSettings().values()) {
|
||||||
|
if (fieldOption == IndexReader.FieldOption.ALL) {
|
||||||
|
fieldSet.add(fi.fieldName);
|
||||||
|
} else if (!fi.indexed && fieldOption == IndexReader.FieldOption.UNINDEXED) {
|
||||||
|
fieldSet.add(fi.fieldName);
|
||||||
|
} else if (fi.storePayloads && fieldOption == IndexReader.FieldOption.STORES_PAYLOADS) {
|
||||||
|
fieldSet.add(fi.fieldName);
|
||||||
|
} else if (fi.indexed && fieldOption == IndexReader.FieldOption.INDEXED) {
|
||||||
|
fieldSet.add(fi.fieldName);
|
||||||
|
} else if (fi.indexed && fi.storeTermVector == false && fieldOption == IndexReader.FieldOption.INDEXED_NO_TERMVECTOR) {
|
||||||
|
fieldSet.add(fi.fieldName);
|
||||||
|
} else if (fi.storeTermVector == true && fi.storePositionWithTermVector == false && fi.storeOffsetWithTermVector == false
|
||||||
|
&& fieldOption == IndexReader.FieldOption.TERMVECTOR) {
|
||||||
|
fieldSet.add(fi.fieldName);
|
||||||
|
} else if (fi.indexed && fi.storeTermVector && fieldOption == IndexReader.FieldOption.INDEXED_WITH_TERMVECTOR) {
|
||||||
|
fieldSet.add(fi.fieldName);
|
||||||
|
} else if (fi.storePositionWithTermVector && fi.storeOffsetWithTermVector == false
|
||||||
|
&& fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_POSITION) {
|
||||||
|
fieldSet.add(fi.fieldName);
|
||||||
|
} else if (fi.storeOffsetWithTermVector && fi.storePositionWithTermVector == false
|
||||||
|
&& fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET) {
|
||||||
|
fieldSet.add(fi.fieldName);
|
||||||
|
} else if ((fi.storeOffsetWithTermVector && fi.storePositionWithTermVector)
|
||||||
|
&& fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET) {
|
||||||
|
fieldSet.add(fi.fieldName);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return new ArrayList<String>(getIndex().getTermsByFieldAndText().keySet());
|
return fieldSet;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This implementation ignores the field selector! All fields are always returned
|
* Return the {@link org.apache.lucene.document.Document} at the <code>n</code><sup>th</sup>
|
||||||
|
* position.
|
||||||
|
<p>
|
||||||
|
* <b>Warning!</b>
|
||||||
|
* The resulting document is the actual stored document instance
|
||||||
|
* and not a deserialized clone as retuned by an IndexReader
|
||||||
|
* over a {@link org.apache.lucene.store.Directory}.
|
||||||
|
* I.e., if you need to touch the document, clone it first!
|
||||||
|
* <p>
|
||||||
|
* This can also be seen as a feature for live canges of stored values,
|
||||||
|
* but be carful! Adding a field with an name unknown to the index
|
||||||
|
* or to a field with previously no stored values will make
|
||||||
|
* {@link org.apache.lucene.store.instantiated.InstantiatedIndexReader#getFieldNames(org.apache.lucene.index.IndexReader.FieldOption)}
|
||||||
|
* out of sync, causing problems for instance when merging the
|
||||||
|
* instantiated index to another index.
|
||||||
|
<p>
|
||||||
|
* This implementation ignores the field selector! All stored fields are always returned!
|
||||||
|
* <p>
|
||||||
*
|
*
|
||||||
* Get the {@link org.apache.lucene.document.Document} at the <code>n</code><sup>th</sup> position.
|
* @param n document number
|
||||||
*
|
|
||||||
* @param n Get the document at the <code>n</code><sup>th</sup> position
|
|
||||||
* @param fieldSelector ignored
|
* @param fieldSelector ignored
|
||||||
* @return The stored fields of the {@link org.apache.lucene.document.Document} at the nth position
|
* @return The stored fields of the {@link org.apache.lucene.document.Document} at the nth position
|
||||||
* @throws CorruptIndexException if the index is corrupt
|
* @throws CorruptIndexException if the index is corrupt
|
||||||
* @throws IOException if there is a low-level IO error
|
* @throws IOException if there is a low-level IO error
|
||||||
*
|
*
|
||||||
* @see org.apache.lucene.document.Fieldable
|
* @see org.apache.lucene.document.Fieldable
|
||||||
* @see org.apache.lucene.document.FieldSelector
|
* @see org.apache.lucene.document.FieldSelector
|
||||||
* @see org.apache.lucene.document.SetBasedFieldSelector
|
* @see org.apache.lucene.document.SetBasedFieldSelector
|
||||||
|
@ -198,19 +249,34 @@ public class InstantiatedIndexReader
|
||||||
return document(n);
|
return document(n);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the stored fields of the <code>n</code><sup>th</sup>
|
||||||
|
* <code>Document</code> in this index.
|
||||||
|
* <p>
|
||||||
|
* <b>Warning!</b>
|
||||||
|
* The resulting document is the actual stored document instance
|
||||||
|
* and not a deserialized clone as retuned by an IndexReader
|
||||||
|
* over a {@link org.apache.lucene.store.Directory}.
|
||||||
|
* I.e., if you need to touch the document, clone it first!
|
||||||
|
* <p>
|
||||||
|
* This can also be seen as a feature for live canges of stored values,
|
||||||
|
* but be carful! Adding a field with an name unknown to the index
|
||||||
|
* or to a field with previously no stored values will make
|
||||||
|
* {@link org.apache.lucene.store.instantiated.InstantiatedIndexReader#getFieldNames(org.apache.lucene.index.IndexReader.FieldOption)}
|
||||||
|
* out of sync, causing problems for instance when merging the
|
||||||
|
* instantiated index to another index.
|
||||||
|
*
|
||||||
|
* @throws CorruptIndexException if the index is corrupt
|
||||||
|
* @throws IOException if there is a low-level IO error
|
||||||
|
*/
|
||||||
|
|
||||||
public Document document(int n) throws IOException {
|
public Document document(int n) throws IOException {
|
||||||
if ((deletedDocumentNumbers != null
|
return isDeleted(n) ? null : getIndex().getDocumentsByNumber()[n].getDocument();
|
||||||
&& deletedDocumentNumbers.contains(n))
|
|
||||||
||
|
|
||||||
(getIndex().getDeletedDocuments() != null
|
|
||||||
&& getIndex().getDeletedDocuments().contains(n))) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
return getIndex().getDocumentsByNumber()[n].getDocument();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* never ever touch these values. it is the true values, unless norms have been touched.
|
* never ever touch these values. it is the true values, unless norms have
|
||||||
|
* been touched.
|
||||||
*/
|
*/
|
||||||
public byte[] norms(String field) throws IOException {
|
public byte[] norms(String field) throws IOException {
|
||||||
byte[] norms = getIndex().getNormsByFieldNameAndDocumentNumber().get(field);
|
byte[] norms = getIndex().getNormsByFieldNameAndDocumentNumber().get(field);
|
||||||
|
@ -233,7 +299,7 @@ public class InstantiatedIndexReader
|
||||||
|
|
||||||
protected void doSetNorm(int doc, String field, byte value) throws IOException {
|
protected void doSetNorm(int doc, String field, byte value) throws IOException {
|
||||||
if (updatedNormsByFieldNameAndDocumentNumber == null) {
|
if (updatedNormsByFieldNameAndDocumentNumber == null) {
|
||||||
updatedNormsByFieldNameAndDocumentNumber = new HashMap<String, List<NormUpdate>>(getIndex().getNormsByFieldNameAndDocumentNumber().size());
|
updatedNormsByFieldNameAndDocumentNumber = new HashMap<String,List<NormUpdate>>(getIndex().getNormsByFieldNameAndDocumentNumber().size());
|
||||||
}
|
}
|
||||||
List<NormUpdate> list = updatedNormsByFieldNameAndDocumentNumber.get(field);
|
List<NormUpdate> list = updatedNormsByFieldNameAndDocumentNumber.get(field);
|
||||||
if (list == null) {
|
if (list == null) {
|
||||||
|
@ -252,7 +318,6 @@ public class InstantiatedIndexReader
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public TermEnum terms() throws IOException {
|
public TermEnum terms() throws IOException {
|
||||||
return new InstantiatedTermEnum(this);
|
return new InstantiatedTermEnum(this);
|
||||||
}
|
}
|
||||||
|
@ -260,11 +325,11 @@ public class InstantiatedIndexReader
|
||||||
public TermEnum terms(Term t) throws IOException {
|
public TermEnum terms(Term t) throws IOException {
|
||||||
InstantiatedTerm it = getIndex().findTerm(t);
|
InstantiatedTerm it = getIndex().findTerm(t);
|
||||||
if (it != null) {
|
if (it != null) {
|
||||||
return new InstantiatedTermEnum(this, it.getTermIndex());
|
return new InstantiatedTermEnum(this, it.getTermIndex());
|
||||||
} else {
|
} else {
|
||||||
int startPos = Arrays.binarySearch(index.getOrderedTerms(), t, InstantiatedTerm.termComparator);
|
int startPos = Arrays.binarySearch(index.getOrderedTerms(), t, InstantiatedTerm.termComparator);
|
||||||
if (startPos < 0) {
|
if (startPos < 0) {
|
||||||
startPos = -1 -startPos;
|
startPos = -1 - startPos;
|
||||||
}
|
}
|
||||||
return new InstantiatedTermEnum(this, startPos);
|
return new InstantiatedTermEnum(this, startPos);
|
||||||
}
|
}
|
||||||
|
@ -293,19 +358,16 @@ public class InstantiatedIndexReader
|
||||||
|
|
||||||
public TermFreqVector getTermFreqVector(int docNumber, String field) throws IOException {
|
public TermFreqVector getTermFreqVector(int docNumber, String field) throws IOException {
|
||||||
InstantiatedDocument doc = getIndex().getDocumentsByNumber()[docNumber];
|
InstantiatedDocument doc = getIndex().getDocumentsByNumber()[docNumber];
|
||||||
if (doc.getVectorSpace() == null
|
if (doc.getVectorSpace() == null || doc.getVectorSpace().get(field) == null) {
|
||||||
|| doc.getVectorSpace().get(field) == null) {
|
|
||||||
return null;
|
return null;
|
||||||
} else {
|
} else {
|
||||||
return new InstantiatedTermPositionVector(doc, field);
|
return new InstantiatedTermPositionVector(doc, field);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public void getTermFreqVector(int docNumber, String field, TermVectorMapper mapper) throws IOException {
|
public void getTermFreqVector(int docNumber, String field, TermVectorMapper mapper) throws IOException {
|
||||||
InstantiatedDocument doc = getIndex().getDocumentsByNumber()[docNumber];
|
InstantiatedDocument doc = getIndex().getDocumentsByNumber()[docNumber];
|
||||||
if (doc.getVectorSpace() != null
|
if (doc.getVectorSpace() != null && doc.getVectorSpace().get(field) == null) {
|
||||||
&& doc.getVectorSpace().get(field) == null) {
|
|
||||||
List<InstantiatedTermDocumentInformation> tv = doc.getVectorSpace().get(field);
|
List<InstantiatedTermDocumentInformation> tv = doc.getVectorSpace().get(field);
|
||||||
mapper.setExpectations(field, tv.size(), true, true);
|
mapper.setExpectations(field, tv.size(), true, true);
|
||||||
for (InstantiatedTermDocumentInformation tdi : tv) {
|
for (InstantiatedTermDocumentInformation tdi : tv) {
|
||||||
|
@ -316,7 +378,7 @@ public class InstantiatedIndexReader
|
||||||
|
|
||||||
public void getTermFreqVector(int docNumber, TermVectorMapper mapper) throws IOException {
|
public void getTermFreqVector(int docNumber, TermVectorMapper mapper) throws IOException {
|
||||||
InstantiatedDocument doc = getIndex().getDocumentsByNumber()[docNumber];
|
InstantiatedDocument doc = getIndex().getDocumentsByNumber()[docNumber];
|
||||||
for (Map.Entry<String, List<InstantiatedTermDocumentInformation>> e : doc.getVectorSpace().entrySet()) {
|
for (Map.Entry<String,List<InstantiatedTermDocumentInformation>> e : doc.getVectorSpace().entrySet()) {
|
||||||
mapper.setExpectations(e.getKey(), e.getValue().size(), true, true);
|
mapper.setExpectations(e.getKey(), e.getValue().size(), true, true);
|
||||||
for (InstantiatedTermDocumentInformation tdi : e.getValue()) {
|
for (InstantiatedTermDocumentInformation tdi : e.getValue()) {
|
||||||
mapper.map(tdi.getTerm().text(), tdi.getTermPositions().length, tdi.getTermOffsets(), tdi.getTermPositions());
|
mapper.map(tdi.getTerm().text(), tdi.getTermPositions().length, tdi.getTermOffsets(), tdi.getTermPositions());
|
||||||
|
|
|
@ -16,6 +16,22 @@ package org.apache.lucene.store.instantiated;
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.PrintStream;
|
||||||
|
import java.io.StringReader;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.Comparator;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.Iterator;
|
||||||
|
import java.util.LinkedHashMap;
|
||||||
|
import java.util.LinkedList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.Token;
|
import org.apache.lucene.analysis.Token;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
@ -28,11 +44,6 @@ import org.apache.lucene.index.TermVectorOffsetInfo;
|
||||||
import org.apache.lucene.search.DefaultSimilarity;
|
import org.apache.lucene.search.DefaultSimilarity;
|
||||||
import org.apache.lucene.search.Similarity;
|
import org.apache.lucene.search.Similarity;
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.PrintStream;
|
|
||||||
import java.io.StringReader;
|
|
||||||
import java.util.*;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This class, similar to {@link org.apache.lucene.index.IndexWriter}, has no locking mechanism.
|
* This class, similar to {@link org.apache.lucene.index.IndexWriter}, has no locking mechanism.
|
||||||
*
|
*
|
||||||
|
@ -161,6 +172,11 @@ public class InstantiatedIndexWriter {
|
||||||
|
|
||||||
boolean orderedTermsDirty = false;
|
boolean orderedTermsDirty = false;
|
||||||
Set<InstantiatedTerm> dirtyTerms = new HashSet<InstantiatedTerm>(1000);
|
Set<InstantiatedTerm> dirtyTerms = new HashSet<InstantiatedTerm>(1000);
|
||||||
|
|
||||||
|
Map<String, FieldSetting> fieldSettingsByFieldName = new HashMap<String, FieldSetting>();
|
||||||
|
for (String fieldName : fieldNameBuffer) {
|
||||||
|
fieldSettingsByFieldName.put(fieldName, new FieldSetting(fieldName));
|
||||||
|
}
|
||||||
|
|
||||||
InstantiatedDocument[] documentsByNumber = new InstantiatedDocument[index.getDocumentsByNumber().length + termDocumentInformationFactoryByDocument.size()];
|
InstantiatedDocument[] documentsByNumber = new InstantiatedDocument[index.getDocumentsByNumber().length + termDocumentInformationFactoryByDocument.size()];
|
||||||
System.arraycopy(index.getDocumentsByNumber(), 0, documentsByNumber, 0, index.getDocumentsByNumber().length);
|
System.arraycopy(index.getDocumentsByNumber(), 0, documentsByNumber, 0, index.getDocumentsByNumber().length);
|
||||||
|
@ -215,7 +231,7 @@ public class InstantiatedIndexWriter {
|
||||||
}
|
}
|
||||||
termsInDocument += eFieldTermDocInfoFactoriesByTermText.getValue().size();
|
termsInDocument += eFieldTermDocInfoFactoriesByTermText.getValue().size();
|
||||||
|
|
||||||
if (eFieldTermDocInfoFactoriesByTermText.getKey().isIndexed && !eFieldTermDocInfoFactoriesByTermText.getKey().omitNorms) {
|
if (eFieldTermDocInfoFactoriesByTermText.getKey().indexed && !eFieldTermDocInfoFactoriesByTermText.getKey().omitNorms) {
|
||||||
float norm = eFieldTermDocInfoFactoriesByTermText.getKey().boost;
|
float norm = eFieldTermDocInfoFactoriesByTermText.getKey().boost;
|
||||||
norm *= document.getDocument().getBoost();
|
norm *= document.getDocument().getBoost();
|
||||||
norm *= similarity.lengthNorm(eFieldTermDocInfoFactoriesByTermText.getKey().fieldName, eFieldTermDocInfoFactoriesByTermText.getKey().fieldLength);
|
norm *= similarity.lengthNorm(eFieldTermDocInfoFactoriesByTermText.getKey().fieldName, eFieldTermDocInfoFactoriesByTermText.getKey().fieldLength);
|
||||||
|
@ -340,6 +356,7 @@ public class InstantiatedIndexWriter {
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
fieldSettingsByFieldName.putAll(documentFieldSettingsByFieldName);
|
||||||
}
|
}
|
||||||
|
|
||||||
// order document informations in dirty terms
|
// order document informations in dirty terms
|
||||||
|
@ -358,6 +375,9 @@ public class InstantiatedIndexWriter {
|
||||||
index.setDocumentsByNumber(documentsByNumber);
|
index.setDocumentsByNumber(documentsByNumber);
|
||||||
index.setOrderedTerms(orderedTerms.toArray(new InstantiatedTerm[orderedTerms.size()]));
|
index.setOrderedTerms(orderedTerms.toArray(new InstantiatedTerm[orderedTerms.size()]));
|
||||||
|
|
||||||
|
for (FieldSetting fieldSetting : fieldSettingsByFieldName.values()) {
|
||||||
|
index.getFieldSettings().merge(fieldSetting);
|
||||||
|
}
|
||||||
// set term index
|
// set term index
|
||||||
if (orderedTermsDirty) {
|
if (orderedTermsDirty) {
|
||||||
// todo optimize, only update from start position
|
// todo optimize, only update from start position
|
||||||
|
@ -434,45 +454,46 @@ public class InstantiatedIndexWriter {
|
||||||
|
|
||||||
Map<String /* field name */, FieldSetting> fieldSettingsByFieldName = new HashMap<String, FieldSetting>();
|
Map<String /* field name */, FieldSetting> fieldSettingsByFieldName = new HashMap<String, FieldSetting>();
|
||||||
for (Field field : (List<Field>) document.getDocument().getFields()) {
|
for (Field field : (List<Field>) document.getDocument().getFields()) {
|
||||||
FieldSetting fieldSettings = fieldSettingsByFieldName.get(field.name());
|
FieldSetting fieldSetting = fieldSettingsByFieldName.get(field.name());
|
||||||
if (fieldSettings == null) {
|
if (fieldSetting == null) {
|
||||||
fieldSettings = new FieldSetting();
|
fieldSetting = new FieldSetting();
|
||||||
fieldSettings.fieldName = field.name().intern();
|
fieldSetting.fieldName = field.name().intern();
|
||||||
fieldSettingsByFieldName.put(fieldSettings.fieldName, fieldSettings);
|
fieldSettingsByFieldName.put(fieldSetting.fieldName, fieldSetting);
|
||||||
fieldNameBuffer.add(fieldSettings.fieldName);
|
fieldNameBuffer.add(fieldSetting.fieldName);
|
||||||
}
|
}
|
||||||
|
|
||||||
// todo: fixme: multiple fields with the same name does not mean field boost += more boost.
|
// todo: fixme: multiple fields with the same name does not mean field boost += more boost.
|
||||||
fieldSettings.boost *= field.getBoost();
|
fieldSetting.boost *= field.getBoost();
|
||||||
//fieldSettings.dimensions++;
|
//fieldSettings.dimensions++;
|
||||||
|
|
||||||
|
|
||||||
// once fieldSettings, always fieldSettings.
|
// once fieldSettings, always fieldSettings.
|
||||||
if (field.getOmitNorms() != fieldSettings.omitNorms) {
|
if (field.getOmitNorms()) {
|
||||||
fieldSettings.omitNorms = true;
|
fieldSetting.omitNorms = true;
|
||||||
}
|
}
|
||||||
if (field.isIndexed() != fieldSettings.isIndexed) {
|
if (field.isIndexed() ) {
|
||||||
fieldSettings.isIndexed = true;
|
fieldSetting.indexed = true;
|
||||||
}
|
}
|
||||||
if (field.isTokenized() != fieldSettings.isTokenized) {
|
if (field.isTokenized()) {
|
||||||
fieldSettings.isTokenized = true;
|
fieldSetting.tokenized = true;
|
||||||
}
|
}
|
||||||
if (field.isCompressed() != fieldSettings.isCompressed) {
|
if (field.isCompressed()) {
|
||||||
fieldSettings.isCompressed = true;
|
fieldSetting.compressed = true;
|
||||||
}
|
}
|
||||||
if (field.isStored() != fieldSettings.isStored) {
|
if (field.isStored()) {
|
||||||
fieldSettings.isStored = true;
|
fieldSetting.stored = true;
|
||||||
}
|
}
|
||||||
if (field.isBinary() != fieldSettings.isBinary) {
|
if (field.isBinary()) {
|
||||||
fieldSettings.isBinary = true;
|
fieldSetting.isBinary = true;
|
||||||
}
|
}
|
||||||
if (field.isTermVectorStored() != fieldSettings.storeTermVector) {
|
if (field.isTermVectorStored()) {
|
||||||
fieldSettings.storeTermVector = true;
|
fieldSetting.storeTermVector = true;
|
||||||
}
|
}
|
||||||
if (field.isStorePositionWithTermVector() != fieldSettings.storePositionWithTermVector) {
|
if (field.isStorePositionWithTermVector()) {
|
||||||
fieldSettings.storePositionWithTermVector = true;
|
fieldSetting.storePositionWithTermVector = true;
|
||||||
}
|
}
|
||||||
if (field.isStoreOffsetWithTermVector() != fieldSettings.storeOffsetWithTermVector) {
|
if (field.isStoreOffsetWithTermVector()) {
|
||||||
fieldSettings.storeOffsetWithTermVector = true;
|
fieldSetting.storeOffsetWithTermVector = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -483,7 +504,7 @@ public class InstantiatedIndexWriter {
|
||||||
|
|
||||||
Field field = it.next();
|
Field field = it.next();
|
||||||
|
|
||||||
FieldSetting fieldSettings = fieldSettingsByFieldName.get(field.name());
|
FieldSetting fieldSetting = fieldSettingsByFieldName.get(field.name());
|
||||||
|
|
||||||
if (field.isIndexed()) {
|
if (field.isIndexed()) {
|
||||||
|
|
||||||
|
@ -505,15 +526,15 @@ public class InstantiatedIndexWriter {
|
||||||
next.setTermText(next.termText().intern()); // todo: not sure this needs to be interned?
|
next.setTermText(next.termText().intern()); // todo: not sure this needs to be interned?
|
||||||
tokens.add(next); // the vector will be built on commit.
|
tokens.add(next); // the vector will be built on commit.
|
||||||
next = tokenStream.next();
|
next = tokenStream.next();
|
||||||
fieldSettings.fieldLength++;
|
fieldSetting.fieldLength++;
|
||||||
if (fieldSettings.fieldLength > maxFieldLength) {
|
if (fieldSetting.fieldLength > maxFieldLength) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// untokenized
|
// untokenized
|
||||||
tokens.add(new Token(field.stringValue().intern(), 0, field.stringValue().length(), "untokenized"));
|
tokens.add(new Token(field.stringValue().intern(), 0, field.stringValue().length(), "untokenized"));
|
||||||
fieldSettings.fieldLength++;
|
fieldSetting.fieldLength++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -528,7 +549,7 @@ public class InstantiatedIndexWriter {
|
||||||
|
|
||||||
// build term vector, term positions and term offsets
|
// build term vector, term positions and term offsets
|
||||||
for (Map.Entry<Field, LinkedList<Token>> eField_Tokens : tokensByField.entrySet()) {
|
for (Map.Entry<Field, LinkedList<Token>> eField_Tokens : tokensByField.entrySet()) {
|
||||||
FieldSetting fieldSettings = fieldSettingsByFieldName.get(eField_Tokens.getKey().name());
|
FieldSetting fieldSetting = fieldSettingsByFieldName.get(eField_Tokens.getKey().name());
|
||||||
|
|
||||||
Map<String, TermDocumentInformationFactory> termDocumentInformationFactoryByTermText = termDocumentInformationFactoryByTermTextAndFieldSetting.get(fieldSettingsByFieldName.get(eField_Tokens.getKey().name()));
|
Map<String, TermDocumentInformationFactory> termDocumentInformationFactoryByTermText = termDocumentInformationFactoryByTermTextAndFieldSetting.get(fieldSettingsByFieldName.get(eField_Tokens.getKey().name()));
|
||||||
if (termDocumentInformationFactoryByTermText == null) {
|
if (termDocumentInformationFactoryByTermText == null) {
|
||||||
|
@ -539,9 +560,9 @@ public class InstantiatedIndexWriter {
|
||||||
int lastOffset = 0;
|
int lastOffset = 0;
|
||||||
|
|
||||||
// for each new field, move positions a bunch.
|
// for each new field, move positions a bunch.
|
||||||
if (fieldSettings.position > 0) {
|
if (fieldSetting.position > 0) {
|
||||||
// todo what if no analyzer set, multiple fields with same name and index without tokenization?
|
// todo what if no analyzer set, multiple fields with same name and index without tokenization?
|
||||||
fieldSettings.position += analyzer.getPositionIncrementGap(fieldSettings.fieldName);
|
fieldSetting.position += analyzer.getPositionIncrementGap(fieldSetting.fieldName);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (Token token : eField_Tokens.getValue()) {
|
for (Token token : eField_Tokens.getValue()) {
|
||||||
|
@ -553,26 +574,27 @@ public class InstantiatedIndexWriter {
|
||||||
}
|
}
|
||||||
//termDocumentInformationFactory.termFrequency++;
|
//termDocumentInformationFactory.termFrequency++;
|
||||||
|
|
||||||
fieldSettings.position += (token.getPositionIncrement() - 1);
|
fieldSetting.position += (token.getPositionIncrement() - 1);
|
||||||
termDocumentInformationFactory.termPositions.add(fieldSettings.position++);
|
termDocumentInformationFactory.termPositions.add(fieldSetting.position++);
|
||||||
|
|
||||||
if (token.getPayload() != null && token.getPayload().length() > 0) {
|
if (token.getPayload() != null && token.getPayload().length() > 0) {
|
||||||
termDocumentInformationFactory.payloads.add(token.getPayload().toByteArray());
|
termDocumentInformationFactory.payloads.add(token.getPayload().toByteArray());
|
||||||
|
fieldSetting.storePayloads = true;
|
||||||
} else {
|
} else {
|
||||||
termDocumentInformationFactory.payloads.add(null);
|
termDocumentInformationFactory.payloads.add(null);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (eField_Tokens.getKey().isStoreOffsetWithTermVector()) {
|
if (eField_Tokens.getKey().isStoreOffsetWithTermVector()) {
|
||||||
|
|
||||||
termDocumentInformationFactory.termOffsets.add(new TermVectorOffsetInfo(fieldSettings.offset + token.startOffset(), fieldSettings.offset + token.endOffset()));
|
termDocumentInformationFactory.termOffsets.add(new TermVectorOffsetInfo(fieldSetting.offset + token.startOffset(), fieldSetting.offset + token.endOffset()));
|
||||||
lastOffset = fieldSettings.offset + token.endOffset();
|
lastOffset = fieldSetting.offset + token.endOffset();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (eField_Tokens.getKey().isStoreOffsetWithTermVector()) {
|
if (eField_Tokens.getKey().isStoreOffsetWithTermVector()) {
|
||||||
fieldSettings.offset = lastOffset + 1;
|
fieldSetting.offset = lastOffset + 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -631,45 +653,6 @@ public class InstantiatedIndexWriter {
|
||||||
return analyzer;
|
return analyzer;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private class FieldSetting {
|
|
||||||
private String fieldName;
|
|
||||||
|
|
||||||
private float boost = 1;
|
|
||||||
//private int dimensions = 0; // this is futuristic
|
|
||||||
private int position = 0;
|
|
||||||
private int offset;
|
|
||||||
private int fieldLength = 0;
|
|
||||||
|
|
||||||
private boolean storeTermVector = false;
|
|
||||||
private boolean storeOffsetWithTermVector = false;
|
|
||||||
private boolean storePositionWithTermVector = false;
|
|
||||||
private boolean omitNorms = false;
|
|
||||||
private boolean isTokenized = false;
|
|
||||||
|
|
||||||
private boolean isStored = false;
|
|
||||||
private boolean isIndexed = false;
|
|
||||||
private boolean isBinary = false;
|
|
||||||
private boolean isCompressed = false;
|
|
||||||
|
|
||||||
//private float norm;
|
|
||||||
//private byte encodedNorm;
|
|
||||||
|
|
||||||
public boolean equals(Object o) {
|
|
||||||
if (this == o) return true;
|
|
||||||
if (o == null || getClass() != o.getClass()) return false;
|
|
||||||
|
|
||||||
final FieldSetting that = (FieldSetting) o;
|
|
||||||
|
|
||||||
return fieldName.equals(that.fieldName);
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
public int hashCode() {
|
|
||||||
return fieldName.hashCode();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private class TermDocumentInformationFactory {
|
private class TermDocumentInformationFactory {
|
||||||
private LinkedList<byte[]> payloads = new LinkedList<byte[]>();
|
private LinkedList<byte[]> payloads = new LinkedList<byte[]>();
|
||||||
private LinkedList<Integer> termPositions = new LinkedList<Integer>();
|
private LinkedList<Integer> termPositions = new LinkedList<Integer>();
|
||||||
|
@ -677,5 +660,23 @@ public class InstantiatedIndexWriter {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static class FieldSetting extends org.apache.lucene.store.instantiated.FieldSetting {
|
||||||
|
|
||||||
|
float boost = 1;
|
||||||
|
int position = 0;
|
||||||
|
int offset;
|
||||||
|
int fieldLength = 0;
|
||||||
|
|
||||||
|
boolean omitNorms = false;
|
||||||
|
boolean isBinary = false;
|
||||||
|
|
||||||
|
private FieldSetting() {
|
||||||
|
}
|
||||||
|
|
||||||
|
private FieldSetting(String fieldName) {
|
||||||
|
super(fieldName);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -121,16 +121,11 @@ public class InstantiatedTermDocs
|
||||||
} else {
|
} else {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Does nothing
|
* Does nothing
|
||||||
*/
|
*/
|
||||||
public void close() {
|
public void close() {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -61,7 +61,7 @@ public class InstantiatedTermEnum
|
||||||
* Returns the current Term in the enumeration.
|
* Returns the current Term in the enumeration.
|
||||||
*/
|
*/
|
||||||
public Term term() {
|
public Term term() {
|
||||||
return /*term == null ? null :*/ term.getTerm();
|
return term == null ? null : term.getTerm();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -70,9 +70,10 @@
|
||||||
<h2>Caveats</h2>
|
<h2>Caveats</h2>
|
||||||
<ul>
|
<ul>
|
||||||
<li>No locks! Consider using InstantiatedIndex as if it was immutable.</li>
|
<li>No locks! Consider using InstantiatedIndex as if it was immutable.</li>
|
||||||
<li>No documents with fields containing readers!</li>
|
<li>No documents with fields containing readers.</li>
|
||||||
<li>Only FieldOption.All allowed by IndexReader#getFieldNames(FieldOption).</li>
|
|
||||||
<li>No field selection when retrieving documents, as all stored field are available in memory.</li>
|
<li>No field selection when retrieving documents, as all stored field are available in memory.</li>
|
||||||
|
<li>Any document returned must cloned if they are to be touched.</li>
|
||||||
|
<li>Norms array returned must not be touched.</li>
|
||||||
</ul>
|
</ul>
|
||||||
|
|
||||||
<h2>Use cases</h2>
|
<h2>Use cases</h2>
|
||||||
|
|
|
@ -47,7 +47,7 @@ public class TestIndicesEquals extends TestCase {
|
||||||
|
|
||||||
// create dir data
|
// create dir data
|
||||||
IndexWriter indexWriter = new IndexWriter(dir, new StandardAnalyzer(), true);
|
IndexWriter indexWriter = new IndexWriter(dir, new StandardAnalyzer(), true);
|
||||||
for (int i = 0; i < 5; i++) {
|
for (int i = 0; i < 20; i++) {
|
||||||
Document document = new Document();
|
Document document = new Document();
|
||||||
assembleDocument(document, i);
|
assembleDocument(document, i);
|
||||||
indexWriter.addDocument(document);
|
indexWriter.addDocument(document);
|
||||||
|
@ -59,9 +59,10 @@ public class TestIndicesEquals extends TestCase {
|
||||||
InstantiatedIndex ii = new InstantiatedIndex(ir);
|
InstantiatedIndex ii = new InstantiatedIndex(ir);
|
||||||
ir.close();
|
ir.close();
|
||||||
|
|
||||||
testEquals(dir, ii);
|
testEqualBehaviour(dir, ii);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public void testInstantiatedIndexWriter() throws Exception {
|
public void testInstantiatedIndexWriter() throws Exception {
|
||||||
|
|
||||||
|
|
||||||
|
@ -86,7 +87,7 @@ public class TestIndicesEquals extends TestCase {
|
||||||
}
|
}
|
||||||
instantiatedIndexWriter.close();
|
instantiatedIndexWriter.close();
|
||||||
|
|
||||||
testEquals(dir, ii);
|
testEqualBehaviour(dir, ii);
|
||||||
|
|
||||||
testTermDocs(dir, ii);
|
testTermDocs(dir, ii);
|
||||||
|
|
||||||
|
@ -186,6 +187,25 @@ public class TestIndicesEquals extends TestCase {
|
||||||
* @param testIndex the index that is supposed to equals the apriori index.
|
* @param testIndex the index that is supposed to equals the apriori index.
|
||||||
* @throws Exception
|
* @throws Exception
|
||||||
*/
|
*/
|
||||||
|
protected void testEqualBehaviour(Directory aprioriIndex, InstantiatedIndex testIndex) throws Exception {
|
||||||
|
|
||||||
|
testEquals(aprioriIndex, testIndex);
|
||||||
|
|
||||||
|
// delete a few documents
|
||||||
|
IndexReader ir = IndexReader.open(aprioriIndex);
|
||||||
|
ir.deleteDocument(3);
|
||||||
|
ir.deleteDocument(8);
|
||||||
|
ir.close();
|
||||||
|
|
||||||
|
ir = testIndex.indexReaderFactory();
|
||||||
|
ir.deleteDocument(3);
|
||||||
|
ir.deleteDocument(8);
|
||||||
|
ir.close();
|
||||||
|
|
||||||
|
// make sure they still equal
|
||||||
|
testEquals(aprioriIndex, testIndex);
|
||||||
|
}
|
||||||
|
|
||||||
protected void testEquals(Directory aprioriIndex, InstantiatedIndex testIndex) throws Exception {
|
protected void testEquals(Directory aprioriIndex, InstantiatedIndex testIndex) throws Exception {
|
||||||
|
|
||||||
IndexReader aprioriReader = IndexReader.open(aprioriIndex);
|
IndexReader aprioriReader = IndexReader.open(aprioriIndex);
|
||||||
|
@ -193,6 +213,17 @@ public class TestIndicesEquals extends TestCase {
|
||||||
|
|
||||||
assertEquals(aprioriReader.numDocs(), testReader.numDocs());
|
assertEquals(aprioriReader.numDocs(), testReader.numDocs());
|
||||||
|
|
||||||
|
// assert field options
|
||||||
|
assertEquals(aprioriReader.getFieldNames(IndexReader.FieldOption.INDEXED), testReader.getFieldNames(IndexReader.FieldOption.INDEXED));
|
||||||
|
assertEquals(aprioriReader.getFieldNames(IndexReader.FieldOption.INDEXED_NO_TERMVECTOR), testReader.getFieldNames(IndexReader.FieldOption.INDEXED_NO_TERMVECTOR));
|
||||||
|
assertEquals(aprioriReader.getFieldNames(IndexReader.FieldOption.INDEXED_WITH_TERMVECTOR), testReader.getFieldNames(IndexReader.FieldOption.INDEXED_WITH_TERMVECTOR));
|
||||||
|
assertEquals(aprioriReader.getFieldNames(IndexReader.FieldOption.STORES_PAYLOADS), testReader.getFieldNames(IndexReader.FieldOption.STORES_PAYLOADS));
|
||||||
|
assertEquals(aprioriReader.getFieldNames(IndexReader.FieldOption.TERMVECTOR), testReader.getFieldNames(IndexReader.FieldOption.TERMVECTOR));
|
||||||
|
assertEquals(aprioriReader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET), testReader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET));
|
||||||
|
assertEquals(aprioriReader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION), testReader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION));
|
||||||
|
assertEquals(aprioriReader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), testReader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET));
|
||||||
|
assertEquals(aprioriReader.getFieldNames(IndexReader.FieldOption.UNINDEXED), testReader.getFieldNames(IndexReader.FieldOption.UNINDEXED));
|
||||||
|
|
||||||
for (Object field : aprioriReader.getFieldNames(IndexReader.FieldOption.ALL)) {
|
for (Object field : aprioriReader.getFieldNames(IndexReader.FieldOption.ALL)) {
|
||||||
|
|
||||||
// test norms as used by normal use
|
// test norms as used by normal use
|
||||||
|
|
Loading…
Reference in New Issue