diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index ef85717eca8..ce86658e2ef 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -63,6 +63,8 @@ New Features * LUCENE-8936: Add SpanishMinimalStemFilter (vinod kumar via Tomoko Uchida) +* LUCENE-8764: Add "export all terms" feature to Luke. (Leonardo Menezes via Tomoko Uchida) + Improvements * LUCENE-8874: Show SPI names instead of class names in Luke Analysis tab. (Tomoko Uchida) diff --git a/lucene/luke/src/java/org/apache/lucene/luke/app/desktop/components/MenuBarProvider.java b/lucene/luke/src/java/org/apache/lucene/luke/app/desktop/components/MenuBarProvider.java index 2a5008f4c2b..3090283868e 100644 --- a/lucene/luke/src/java/org/apache/lucene/luke/app/desktop/components/MenuBarProvider.java +++ b/lucene/luke/src/java/org/apache/lucene/luke/app/desktop/components/MenuBarProvider.java @@ -33,6 +33,7 @@ import org.apache.lucene.luke.app.desktop.PreferencesFactory; import org.apache.lucene.luke.app.desktop.components.dialog.menubar.AboutDialogFactory; import org.apache.lucene.luke.app.desktop.components.dialog.menubar.CheckIndexDialogFactory; import org.apache.lucene.luke.app.desktop.components.dialog.menubar.CreateIndexDialogFactory; +import org.apache.lucene.luke.app.desktop.components.dialog.menubar.ExportTermsDialogFactory; import org.apache.lucene.luke.app.desktop.components.dialog.menubar.OpenIndexDialogFactory; import org.apache.lucene.luke.app.desktop.components.dialog.menubar.OptimizeIndexDialogFactory; import org.apache.lucene.luke.app.desktop.util.DialogOpener; @@ -57,6 +58,8 @@ public final class MenuBarProvider { private final OptimizeIndexDialogFactory optimizeIndexDialogFactory; + private final ExportTermsDialogFactory exportTermsDialogFactory; + private final CheckIndexDialogFactory checkIndexDialogFactory; private final AboutDialogFactory aboutDialogFactory; @@ -81,6 +84,8 @@ public final class MenuBarProvider { private final JMenuItem optimizeIndexMItem = new JMenuItem(); + private final JMenuItem exportTermsMItem = new JMenuItem(); + private final JMenuItem checkIndexMItem = new JMenuItem(); private final JMenuItem aboutMItem = new JMenuItem(); @@ -95,6 +100,7 @@ public final class MenuBarProvider { this.openIndexDialogFactory = OpenIndexDialogFactory.getInstance(); this.createIndexDialogFactory = CreateIndexDialogFactory.getInstance(); this.optimizeIndexDialogFactory = OptimizeIndexDialogFactory.getInstance(); + this.exportTermsDialogFactory = ExportTermsDialogFactory.getInstance(); this.checkIndexDialogFactory = CheckIndexDialogFactory.getInstance(); this.aboutDialogFactory = AboutDialogFactory.getInstance(); @@ -173,6 +179,10 @@ public final class MenuBarProvider { checkIndexMItem.setEnabled(false); checkIndexMItem.addActionListener(listeners::showCheckIndexDialog); toolsMenu.add(checkIndexMItem); + exportTermsMItem.setText(MessageUtils.getLocalizedMessage("menu.item.export.terms")); + exportTermsMItem.setEnabled(false); + exportTermsMItem.addActionListener(listeners::showExportTermsDialog); + toolsMenu.add(exportTermsMItem); return toolsMenu; } @@ -258,6 +268,12 @@ public final class MenuBarProvider { }); } + void showExportTermsDialog(ActionEvent e) { + new DialogOpener<>(exportTermsDialogFactory).open("Export terms", 600, 400, + factory -> { + }); + } + } private class Observer implements IndexObserver, DirectoryObserver { @@ -267,6 +283,7 @@ public final class MenuBarProvider { reopenIndexMItem.setEnabled(false); closeIndexMItem.setEnabled(false); optimizeIndexMItem.setEnabled(false); + exportTermsMItem.setEnabled(false); checkIndexMItem.setEnabled(true); } @@ -279,6 +296,7 @@ public final class MenuBarProvider { public void openIndex(LukeState state) { reopenIndexMItem.setEnabled(true); closeIndexMItem.setEnabled(true); + exportTermsMItem.setEnabled(true); if (!state.readOnly() && state.hasDirectoryReader()) { optimizeIndexMItem.setEnabled(true); } @@ -297,6 +315,7 @@ public final class MenuBarProvider { closeIndexMItem.setEnabled(false); optimizeIndexMItem.setEnabled(false); checkIndexMItem.setEnabled(false); + exportTermsMItem.setEnabled(false); } } diff --git a/lucene/luke/src/java/org/apache/lucene/luke/app/desktop/components/dialog/menubar/ExportTermsDialogFactory.java b/lucene/luke/src/java/org/apache/lucene/luke/app/desktop/components/dialog/menubar/ExportTermsDialogFactory.java new file mode 100644 index 00000000000..07fe3cf4ce9 --- /dev/null +++ b/lucene/luke/src/java/org/apache/lucene/luke/app/desktop/components/dialog/menubar/ExportTermsDialogFactory.java @@ -0,0 +1,275 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.luke.app.desktop.components.dialog.menubar; + +import javax.swing.BorderFactory; +import javax.swing.BoxLayout; +import javax.swing.JButton; +import javax.swing.JComboBox; +import javax.swing.JDialog; +import javax.swing.JFileChooser; +import javax.swing.JLabel; +import javax.swing.JPanel; +import javax.swing.JTextField; +import javax.swing.SwingWorker; +import java.awt.Color; +import java.awt.Dialog; +import java.awt.Dimension; +import java.awt.FlowLayout; +import java.awt.GridLayout; +import java.awt.Insets; +import java.awt.Window; +import java.awt.event.ActionEvent; +import java.io.File; +import java.io.IOException; +import java.lang.invoke.MethodHandles; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; + +import org.apache.logging.log4j.Logger; +import org.apache.lucene.luke.app.IndexHandler; +import org.apache.lucene.luke.app.IndexObserver; +import org.apache.lucene.luke.app.LukeState; +import org.apache.lucene.luke.app.desktop.Preferences; +import org.apache.lucene.luke.app.desktop.PreferencesFactory; +import org.apache.lucene.luke.app.desktop.util.DialogOpener; +import org.apache.lucene.luke.app.desktop.util.ImageUtils; +import org.apache.lucene.luke.app.desktop.util.MessageUtils; +import org.apache.lucene.luke.app.desktop.util.StyleConstants; +import org.apache.lucene.luke.models.LukeException; +import org.apache.lucene.luke.models.tools.IndexTools; +import org.apache.lucene.luke.models.tools.IndexToolsFactory; +import org.apache.lucene.luke.models.util.IndexUtils; +import org.apache.lucene.luke.util.LoggerFactory; +import org.apache.lucene.util.NamedThreadFactory; +import org.apache.lucene.util.SuppressForbidden; + +/** + * Factory of export terms dialog + */ +public final class ExportTermsDialogFactory implements DialogOpener.DialogFactory { + + private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); + + private static ExportTermsDialogFactory instance; + + private final IndexToolsFactory indexToolsFactory = new IndexToolsFactory(); + + private final Preferences prefs; + + private final IndexHandler indexHandler; + + private final JComboBox fieldCombo = new JComboBox(); + + private final JTextField destDir = new JTextField(); + + private final JLabel statusLbl = new JLabel(); + + private final JLabel indicatorLbl = new JLabel(); + + private final ListenerFunctions listeners = new ListenerFunctions(); + + private JDialog dialog; + + private IndexTools toolsModel; + + public synchronized static ExportTermsDialogFactory getInstance() throws IOException { + if (instance == null) { + instance = new ExportTermsDialogFactory(); + } + return instance; + } + + private ExportTermsDialogFactory() throws IOException { + this.prefs = PreferencesFactory.getInstance(); + this.indexHandler = IndexHandler.getInstance(); + indexHandler.addObserver(new Observer()); + } + + @Override + public JDialog create(Window owner, String title, int width, int height) { + dialog = new JDialog(owner, title, Dialog.ModalityType.APPLICATION_MODAL); + dialog.add(content()); + dialog.setSize(new Dimension(width, height)); + dialog.setLocationRelativeTo(owner); + dialog.getContentPane().setBackground(prefs.getColorTheme().getBackgroundColor()); + return dialog; + } + + private JPanel content() { + JPanel panel = new JPanel(new GridLayout(5, 1)); + panel.setOpaque(false); + panel.setLayout(new BoxLayout(panel, BoxLayout.PAGE_AXIS)); + panel.setBorder(BorderFactory.createEmptyBorder(15, 15, 15, 15)); + + panel.add(currentOpenIndexPanel()); + panel.add(fieldComboPanel()); + panel.add(destinationDirPanel()); + panel.add(statusPanel()); + panel.add(actionButtonsPanel()); + + return panel; + } + + private JPanel currentOpenIndexPanel() { + JPanel panel = new JPanel(new FlowLayout(FlowLayout.LEADING)); + panel.setBorder(BorderFactory.createEmptyBorder()); + panel.setOpaque(false); + JLabel label = new JLabel(MessageUtils.getLocalizedMessage("export.terms.label.index_path")); + JLabel value = new JLabel(indexHandler.getState().getIndexPath()); + value.setToolTipText(indexHandler.getState().getIndexPath()); + panel.add(label); + panel.add(value); + return panel; + } + + private JPanel fieldComboPanel() { + JPanel panel = new JPanel(new GridLayout(2, 1)); + panel.setOpaque(false); + panel.add(new JLabel(MessageUtils.getLocalizedMessage("export.terms.field"))); + panel.add(fieldCombo); + return panel; + } + + private JPanel destinationDirPanel() { + JPanel panel = new JPanel(new GridLayout(2, 1)); + panel.setOpaque(false); + + panel.add(new JLabel(MessageUtils.getLocalizedMessage("export.terms.label.output_path"))); + + JPanel inputPanel = new JPanel(new FlowLayout(FlowLayout.LEADING)); + inputPanel.setBorder(BorderFactory.createEmptyBorder()); + inputPanel.setOpaque(false); + destDir.setText(System.getProperty("user.home")); + destDir.setColumns(60); + destDir.setPreferredSize(new Dimension(200, 30)); + destDir.setFont(StyleConstants.FONT_MONOSPACE_LARGE); + destDir.setEditable(false); + destDir.setBackground(Color.white); + inputPanel.add(destDir); + + JButton browseBtn = new JButton(MessageUtils.getLocalizedMessage("export.terms.button.browse")); + browseBtn.setFont(StyleConstants.FONT_BUTTON_LARGE); + browseBtn.setMargin(new Insets(3, 0, 3, 0)); + browseBtn.addActionListener(listeners::browseDirectory); + inputPanel.add(browseBtn); + + panel.add(inputPanel); + return panel; + } + + private JPanel actionButtonsPanel() { + // Buttons + JPanel execButtons = new JPanel(new FlowLayout(FlowLayout.TRAILING)); + execButtons.setOpaque(false); + JButton exportBtn = new JButton(MessageUtils.getLocalizedMessage("export.terms.button.export")); + exportBtn.setMargin(new Insets(3, 0, 3, 0)); + exportBtn.addActionListener(listeners::export); + execButtons.add(exportBtn); + JButton closeBtn = new JButton(MessageUtils.getLocalizedMessage("button.close")); + closeBtn.setMargin(new Insets(3, 0, 3, 0)); + closeBtn.addActionListener(e -> dialog.dispose()); + execButtons.add(closeBtn); + return execButtons; + } + + private JPanel statusPanel() { + JPanel status = new JPanel(new FlowLayout(FlowLayout.LEADING)); + status.setOpaque(false); + indicatorLbl.setIcon(ImageUtils.createImageIcon("indicator.gif", 20, 20)); + indicatorLbl.setVisible(false); + status.add(statusLbl); + status.add(indicatorLbl); + return status; + } + + private class ListenerFunctions { + + @SuppressForbidden(reason = "JFilechooser#getSelectedFile() returns java.io.File") + void browseDirectory(ActionEvent e) { + JFileChooser fileChooser = new JFileChooser(); + fileChooser.setFileSelectionMode(JFileChooser.DIRECTORIES_ONLY); + fileChooser.setFileHidingEnabled(false); + int retVal = fileChooser.showOpenDialog(dialog); + if (retVal == JFileChooser.APPROVE_OPTION) { + File f = fileChooser.getSelectedFile(); + destDir.setText(f.getAbsolutePath()); + } + } + + void export(ActionEvent e) { + ExecutorService executor = Executors.newSingleThreadExecutor(new NamedThreadFactory("export-terms-dialog")); + + SwingWorker task = new SwingWorker() { + + String filename; + + @Override + protected Void doInBackground() { + setProgress(0); + statusLbl.setText("Exporting..."); + indicatorLbl.setVisible(true); + String field = (String) fieldCombo.getSelectedItem(); + String directory = destDir.getText(); + try { + filename = toolsModel.exportTerms(directory, field); + } catch (LukeException e) { + log.error("Error while exporting terms from field " + field, e); + statusLbl.setText(MessageUtils.getLocalizedMessage("export.terms.label.error", e.getMessage())); + } catch (Exception e) { + log.error("Error while exporting terms from field " + field, e); + statusLbl.setText(MessageUtils.getLocalizedMessage("message.error.unknown")); + throw e; + } finally { + setProgress(100); + } + return null; + } + + @Override + protected void done() { + indicatorLbl.setVisible(false); + if (filename != null) { + statusLbl.setText(MessageUtils.getLocalizedMessage("export.terms.label.success", filename, "[term],[doc frequency]")); + } + } + }; + + executor.submit(task); + executor.shutdown(); + } + + } + + private class Observer implements IndexObserver { + + @Override + public void openIndex(LukeState state) { + toolsModel = indexToolsFactory.newInstance(state.getIndexReader(), state.useCompound(), state.keepAllCommits()); + IndexUtils.getFieldNames(state.getIndexReader()).stream().sorted().forEach(fieldCombo::addItem); + } + + @Override + public void closeIndex() { + fieldCombo.removeAllItems(); + toolsModel = null; + } + + } + +} diff --git a/lucene/luke/src/java/org/apache/lucene/luke/models/tools/IndexTools.java b/lucene/luke/src/java/org/apache/lucene/luke/models/tools/IndexTools.java index 877646cd4b4..72d5384c2e0 100644 --- a/lucene/luke/src/java/org/apache/lucene/luke/models/tools/IndexTools.java +++ b/lucene/luke/src/java/org/apache/lucene/luke/models/tools/IndexTools.java @@ -94,4 +94,13 @@ public interface IndexTools { * @param dataDir - the directory path which contains sample documents (20 Newsgroups). */ void createNewIndex(String dataDir); + + + /** + * Export terms from given field into a new file on the destination directory + * @param destDir - destination directory + * @param field - field name + * @return The file containing the export + */ + String exportTerms(String destDir, String field); } diff --git a/lucene/luke/src/java/org/apache/lucene/luke/models/tools/IndexToolsImpl.java b/lucene/luke/src/java/org/apache/lucene/luke/models/tools/IndexToolsImpl.java index 166958b8d10..f4ca89ed811 100644 --- a/lucene/luke/src/java/org/apache/lucene/luke/models/tools/IndexToolsImpl.java +++ b/lucene/luke/src/java/org/apache/lucene/luke/models/tools/IndexToolsImpl.java @@ -17,11 +17,15 @@ package org.apache.lucene.luke.models.tools; +import java.io.BufferedWriter; import java.io.IOException; import java.io.PrintStream; +import java.nio.charset.Charset; +import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; import java.util.List; +import java.util.Locale; import java.util.Objects; import org.apache.lucene.analysis.Analyzer; @@ -30,6 +34,9 @@ import org.apache.lucene.index.CheckIndex; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.MultiTerms; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.TermsEnum; import org.apache.lucene.luke.models.LukeException; import org.apache.lucene.luke.models.LukeModel; import org.apache.lucene.luke.models.util.IndexUtils; @@ -37,6 +44,7 @@ import org.apache.lucene.luke.models.util.twentynewsgroups.Message; import org.apache.lucene.luke.models.util.twentynewsgroups.MessageFilesParser; import org.apache.lucene.search.Query; import org.apache.lucene.store.Directory; +import org.apache.lucene.util.BytesRef; /** Default implementation of {@link IndexTools} */ public final class IndexToolsImpl extends LukeModel implements IndexTools { @@ -184,4 +192,25 @@ public final class IndexToolsImpl extends LukeModel implements IndexTools { } } } + + public String exportTerms(String destDir, String field) { + String filename = "terms_" + field + "_" + System.currentTimeMillis() + ".out"; + Path path = Paths.get(destDir, filename); + try { + Terms terms = MultiTerms.getTerms(reader, field); + if (terms == null) { + throw new LukeException(String.format(Locale.US, "Field %s does not contain any terms to be exported", field)); + } + try (BufferedWriter writer = Files.newBufferedWriter(path, Charset.forName("UTF-8"))) { + TermsEnum termsEnum = terms.iterator(); + BytesRef term; + while (!Thread.currentThread().isInterrupted() && (term = termsEnum.next()) != null) { + writer.write(String.format(Locale.US, "%s,%d\n", term.utf8ToString(), +termsEnum.docFreq())); + } + return path.toString(); + } + } catch (IOException e) { + throw new LukeException("Terms file export for field [" + field + "] to file [" + filename + "] has failed.", e); + } + } } diff --git a/lucene/luke/src/resources/org/apache/lucene/luke/app/desktop/messages/messages.properties b/lucene/luke/src/resources/org/apache/lucene/luke/app/desktop/messages/messages.properties index e6fed08d49e..f9c8c45a0f4 100644 --- a/lucene/luke/src/resources/org/apache/lucene/luke/app/desktop/messages/messages.properties +++ b/lucene/luke/src/resources/org/apache/lucene/luke/app/desktop/messages/messages.properties @@ -51,6 +51,7 @@ menu.item.create_index=Create new index menu.item.close_index=Close index menu.item.exit=Exit menu.item.optimize=Optimize index +menu.item.export.terms=Export terms menu.item.check_index=Check index menu.item.theme_gray=Gray menu.item.theme_classic=Classic @@ -83,6 +84,15 @@ createindex.label.data_link=http://kdd.ics.uci.edu/databases/20newsgroups/20news createindex.label.datadir=Data directory: createindex.textarea.data_help1=You can index sample documents from 20 Newsgroups corpus that is available at here: createindex.textarea.data_help2=Download and extract the tgz file, then select the extracted directory path.\nCreating an index with the full size corpus takes some time... :) +# Export terms +export.terms.label.index_path=Index directory path: +export.terms.label.output_path=Output directory path: +export.terms.field=Field to export terms from: +export.terms.button.export=Export +export.terms.button.browse=Browse +export.terms.label.success=Terms successfully exported to:
{0}

Output format is: {1} +export.terms.label.error=Failed to export:
{0} + # Optimize index optimize.dialog.title=Optimize index optimize.label.index_path=Index directory path: