From 8d0cbcbb53139413a3fdbb364764e811145b2ccf Mon Sep 17 00:00:00 2001 From: orenovadia Date: Tue, 2 Feb 2021 14:38:43 -0800 Subject: [PATCH] LUCENE-9680 - Re-add IndexWriter::getFieldNames --- lucene/CHANGES.txt | 3 ++ .../org/apache/lucene/index/FieldInfos.java | 5 ++ .../org/apache/lucene/index/IndexWriter.java | 11 +++++ .../apache/lucene/index/TestIndexWriter.java | 48 +++++++++++++++++++ 4 files changed, 67 insertions(+) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 824efaa4a87..40f2f507750 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -7,6 +7,9 @@ http://s.apache.org/luceneversions New Features +* LUCENE-9680: IndexWriter#getFieldNames() method added to get fields present in index. + This method was removed in LUCENE-8909. (Oren Ovadia) + * LUCENE-9322: Vector-valued fields, Lucene90 Codec (Mike Sokolov, Julie Tibshirani, Tomoko Uchida) * LUCENE-9004: Approximate nearest vector search via NSW graphs diff --git a/lucene/core/src/java/org/apache/lucene/index/FieldInfos.java b/lucene/core/src/java/org/apache/lucene/index/FieldInfos.java index dfa32235f27..0f8d3415b2f 100644 --- a/lucene/core/src/java/org/apache/lucene/index/FieldInfos.java +++ b/lucene/core/src/java/org/apache/lucene/index/FieldInfos.java @@ -25,6 +25,7 @@ import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Objects; +import java.util.Set; import java.util.stream.Collectors; import java.util.stream.StreamSupport; import org.apache.lucene.util.ArrayUtil; @@ -681,6 +682,10 @@ public class FieldInfos implements Iterable { } } + synchronized Set getFieldNames() { + return Set.copyOf(nameToNumber.keySet()); + } + synchronized void clear() { numberToName.clear(); nameToNumber.clear(); diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java index 8b7e9e052e6..9b8d55e5ada 100644 --- a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java +++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java @@ -1987,6 +1987,17 @@ public class IndexWriter return dvUpdates; } + /** + * Return an unmodifiable set of all field names as visible from this IndexWriter, across all + * segments of the index. + * + * @lucene.experimental + */ + public Set getFieldNames() { + // FieldNumbers#getFieldNames() returns an unmodifiableSet + return globalFieldNumberMap.getFieldNames(); + } + // for test purpose final synchronized int getSegmentCount() { return segmentInfos.size(); diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java index 1a5e87a40ce..1ee58132a52 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java @@ -4635,4 +4635,52 @@ public class TestIndexWriter extends LuceneTestCase { } } } + + public void testGetFieldNames() throws IOException { + Directory dir = newDirectory(); + + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))); + + assertEquals(Set.of(), writer.getFieldNames()); + + addDocWithField(writer, "f1"); + assertEquals(Set.of("f1"), writer.getFieldNames()); + + // should be unmodifiable: + final Set fieldSet = writer.getFieldNames(); + assertThrows(UnsupportedOperationException.class, () -> fieldSet.add("cannot modify")); + assertThrows(UnsupportedOperationException.class, () -> fieldSet.remove("f1")); + + addDocWithField(writer, "f2"); + assertEquals(Set.of("f1", "f2"), writer.getFieldNames()); + + // set from a previous call is an independent immutable copy, cannot be modified. + assertEquals(Set.of("f1"), fieldSet); + + // flush should not have an effect on field names + writer.flush(); + assertEquals(Set.of("f1", "f2"), writer.getFieldNames()); + + // commit should not have an effect on field names + writer.commit(); + assertEquals(Set.of("f1", "f2"), writer.getFieldNames()); + + writer.close(); + + // new writer should identify committed fields + writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))); + assertEquals(Set.of("f1", "f2"), writer.getFieldNames()); + + writer.deleteAll(); + assertEquals(Set.of(), writer.getFieldNames()); + + writer.close(); + dir.close(); + } + + private static void addDocWithField(IndexWriter writer, String field) throws IOException { + Document doc = new Document(); + doc.add(newField(field, "value", storedTextType)); + writer.addDocument(doc); + } }