From 91f5d2ff79572d2999a06339b42303d2a483f37d Mon Sep 17 00:00:00 2001 From: Amish Shah Date: Wed, 18 Sep 2019 19:53:48 +0900 Subject: [PATCH] LUCENE:8945: Allow to change the output file delimiter on Luke "export terms" feature Signed-off-by: Tomoko Uchida --- lucene/CHANGES.txt | 2 +- .../desktop/components/MenuBarProvider.java | 2 +- .../menubar/ExportTermsDialogFactory.java | 54 ++++++++++++++++++- .../lucene/luke/models/tools/IndexTools.java | 3 +- .../luke/models/tools/IndexToolsImpl.java | 4 +- 5 files changed, 58 insertions(+), 7 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 9eda7c14824..be5e7eae6e7 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -26,7 +26,7 @@ New Features * LUCENE-8936: Add SpanishMinimalStemFilter (vinod kumar via Tomoko Uchida) -* LUCENE-8764: Add "export all terms" feature to Luke. (Leonardo Menezes via Tomoko Uchida) +* LUCENE-8764 LUCENE-8945: Add "export all terms and doc freqs" feature to Luke with delimiters. (Leonardo Menezes, Amish Shah via Tomoko Uchida) * LUCENE-8747: Composite Matches from multiple subqueries now allow access to their submatches, and a new NamedMatches API allows marking of subqueries diff --git a/lucene/luke/src/java/org/apache/lucene/luke/app/desktop/components/MenuBarProvider.java b/lucene/luke/src/java/org/apache/lucene/luke/app/desktop/components/MenuBarProvider.java index 3090283868e..90b2d4fb585 100644 --- a/lucene/luke/src/java/org/apache/lucene/luke/app/desktop/components/MenuBarProvider.java +++ b/lucene/luke/src/java/org/apache/lucene/luke/app/desktop/components/MenuBarProvider.java @@ -269,7 +269,7 @@ public final class MenuBarProvider { } void showExportTermsDialog(ActionEvent e) { - new DialogOpener<>(exportTermsDialogFactory).open("Export terms", 600, 400, + new DialogOpener<>(exportTermsDialogFactory).open("Export terms", 600, 450, factory -> { }); } diff --git a/lucene/luke/src/java/org/apache/lucene/luke/app/desktop/components/dialog/menubar/ExportTermsDialogFactory.java b/lucene/luke/src/java/org/apache/lucene/luke/app/desktop/components/dialog/menubar/ExportTermsDialogFactory.java index 07fe3cf4ce9..471094223c1 100644 --- a/lucene/luke/src/java/org/apache/lucene/luke/app/desktop/components/dialog/menubar/ExportTermsDialogFactory.java +++ b/lucene/luke/src/java/org/apache/lucene/luke/app/desktop/components/dialog/menubar/ExportTermsDialogFactory.java @@ -38,8 +38,10 @@ import java.awt.event.ActionEvent; import java.io.File; import java.io.IOException; import java.lang.invoke.MethodHandles; +import java.util.Arrays; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; +import java.util.stream.Stream; import org.apache.logging.log4j.Logger; import org.apache.lucene.luke.app.IndexHandler; @@ -76,6 +78,8 @@ public final class ExportTermsDialogFactory implements DialogOpener.DialogFactor private final JComboBox fieldCombo = new JComboBox(); + private final JComboBox delimiterCombo = new JComboBox(); + private final JTextField destDir = new JTextField(); private final JLabel statusLbl = new JLabel(); @@ -88,6 +92,8 @@ public final class ExportTermsDialogFactory implements DialogOpener.DialogFactor private IndexTools toolsModel; + private String selectedDelimiter; + public synchronized static ExportTermsDialogFactory getInstance() throws IOException { if (instance == null) { instance = new ExportTermsDialogFactory(); @@ -99,6 +105,8 @@ public final class ExportTermsDialogFactory implements DialogOpener.DialogFactor this.prefs = PreferencesFactory.getInstance(); this.indexHandler = IndexHandler.getInstance(); indexHandler.addObserver(new Observer()); + Stream.of(Delimiter.values()).forEachOrdered(delimiterVal -> delimiterCombo.addItem(delimiterVal.getDescription())); + delimiterCombo.setSelectedItem(Delimiter.COMMA.getDescription());//Set default delimiter } @Override @@ -120,6 +128,7 @@ public final class ExportTermsDialogFactory implements DialogOpener.DialogFactor panel.add(currentOpenIndexPanel()); panel.add(fieldComboPanel()); panel.add(destinationDirPanel()); + panel.add(delimiterComboPanel()); panel.add(statusPanel()); panel.add(actionButtonsPanel()); @@ -138,6 +147,14 @@ public final class ExportTermsDialogFactory implements DialogOpener.DialogFactor return panel; } + private JPanel delimiterComboPanel() { + JPanel panel = new JPanel(new GridLayout(2, 1)); + panel.setOpaque(false); + panel.add(new JLabel("Select Delimiter: ")); + panel.add(delimiterCombo); + return panel; + } + private JPanel fieldComboPanel() { JPanel panel = new JPanel(new GridLayout(2, 1)); panel.setOpaque(false); @@ -225,9 +242,11 @@ public final class ExportTermsDialogFactory implements DialogOpener.DialogFactor statusLbl.setText("Exporting..."); indicatorLbl.setVisible(true); String field = (String) fieldCombo.getSelectedItem(); + selectedDelimiter = Delimiter.getSelectedDelimiterValue((String) delimiterCombo.getSelectedItem()); + String directory = destDir.getText(); try { - filename = toolsModel.exportTerms(directory, field); + filename = toolsModel.exportTerms(directory, field, selectedDelimiter); } catch (LukeException e) { log.error("Error while exporting terms from field " + field, e); statusLbl.setText(MessageUtils.getLocalizedMessage("export.terms.label.error", e.getMessage())); @@ -245,7 +264,7 @@ public final class ExportTermsDialogFactory implements DialogOpener.DialogFactor protected void done() { indicatorLbl.setVisible(false); if (filename != null) { - statusLbl.setText(MessageUtils.getLocalizedMessage("export.terms.label.success", filename, "[term],[doc frequency]")); + statusLbl.setText(MessageUtils.getLocalizedMessage("export.terms.label.success", filename, "[term]" + selectedDelimiter + "[doc frequency]")); } } }; @@ -272,4 +291,35 @@ public final class ExportTermsDialogFactory implements DialogOpener.DialogFactor } + /** + * Delimiters that can be selected + */ + private enum Delimiter { + COMMA("Comma", ","), WHITESPACE("Whitespace", " "), TAB("Tab", "\t"); + + private final String description; + private final String separator; + + private Delimiter(final String description, final String separator) { + this.description = description; + this.separator = separator; + } + + String getDescription() { + return this.description; + } + + String getSeparator() { + return this.separator; + } + + static String getSelectedDelimiterValue(String delimiter) { + return Arrays.stream(Delimiter.values()) + .filter(e -> e.description.equals(delimiter)) + .findFirst() + .orElse(COMMA) + .getSeparator(); + } + } + } diff --git a/lucene/luke/src/java/org/apache/lucene/luke/models/tools/IndexTools.java b/lucene/luke/src/java/org/apache/lucene/luke/models/tools/IndexTools.java index 72d5384c2e0..a4f4d12052e 100644 --- a/lucene/luke/src/java/org/apache/lucene/luke/models/tools/IndexTools.java +++ b/lucene/luke/src/java/org/apache/lucene/luke/models/tools/IndexTools.java @@ -100,7 +100,8 @@ public interface IndexTools { * Export terms from given field into a new file on the destination directory * @param destDir - destination directory * @param field - field name + * @param delimiter - delimiter to separate terms and their frequency * @return The file containing the export */ - String exportTerms(String destDir, String field); + String exportTerms(String destDir, String field, String delimiter); } diff --git a/lucene/luke/src/java/org/apache/lucene/luke/models/tools/IndexToolsImpl.java b/lucene/luke/src/java/org/apache/lucene/luke/models/tools/IndexToolsImpl.java index f4ca89ed811..4fdd6e3f96a 100644 --- a/lucene/luke/src/java/org/apache/lucene/luke/models/tools/IndexToolsImpl.java +++ b/lucene/luke/src/java/org/apache/lucene/luke/models/tools/IndexToolsImpl.java @@ -193,7 +193,7 @@ public final class IndexToolsImpl extends LukeModel implements IndexTools { } } - public String exportTerms(String destDir, String field) { + public String exportTerms(String destDir, String field, String delimiter) { String filename = "terms_" + field + "_" + System.currentTimeMillis() + ".out"; Path path = Paths.get(destDir, filename); try { @@ -205,7 +205,7 @@ public final class IndexToolsImpl extends LukeModel implements IndexTools { TermsEnum termsEnum = terms.iterator(); BytesRef term; while (!Thread.currentThread().isInterrupted() && (term = termsEnum.next()) != null) { - writer.write(String.format(Locale.US, "%s,%d\n", term.utf8ToString(), +termsEnum.docFreq())); + writer.write(String.format(Locale.US, "%s%s%d\n", term.utf8ToString(), delimiter, +termsEnum.docFreq())); } return path.toString(); }