LUCENE-8764: Add "export all terms" feature to Luke

Co-authored-by: Tomoko Uchida <tomoko@apache.org>
This commit is contained in:
Leonardo Menezes 2019-08-03 18:20:26 +09:00 committed by Tomoko Uchida
parent 8c4fde94fe
commit ff7b0c9de5
6 changed files with 344 additions and 0 deletions

View File

@ -63,6 +63,8 @@ New Features
* LUCENE-8936: Add SpanishMinimalStemFilter (vinod kumar via Tomoko Uchida)
* LUCENE-8764: Add "export all terms" feature to Luke. (Leonardo Menezes via Tomoko Uchida)
Improvements
* LUCENE-8874: Show SPI names instead of class names in Luke Analysis tab. (Tomoko Uchida)

View File

@ -33,6 +33,7 @@ import org.apache.lucene.luke.app.desktop.PreferencesFactory;
import org.apache.lucene.luke.app.desktop.components.dialog.menubar.AboutDialogFactory;
import org.apache.lucene.luke.app.desktop.components.dialog.menubar.CheckIndexDialogFactory;
import org.apache.lucene.luke.app.desktop.components.dialog.menubar.CreateIndexDialogFactory;
import org.apache.lucene.luke.app.desktop.components.dialog.menubar.ExportTermsDialogFactory;
import org.apache.lucene.luke.app.desktop.components.dialog.menubar.OpenIndexDialogFactory;
import org.apache.lucene.luke.app.desktop.components.dialog.menubar.OptimizeIndexDialogFactory;
import org.apache.lucene.luke.app.desktop.util.DialogOpener;
@ -57,6 +58,8 @@ public final class MenuBarProvider {
private final OptimizeIndexDialogFactory optimizeIndexDialogFactory;
private final ExportTermsDialogFactory exportTermsDialogFactory;
private final CheckIndexDialogFactory checkIndexDialogFactory;
private final AboutDialogFactory aboutDialogFactory;
@ -81,6 +84,8 @@ public final class MenuBarProvider {
private final JMenuItem optimizeIndexMItem = new JMenuItem();
private final JMenuItem exportTermsMItem = new JMenuItem();
private final JMenuItem checkIndexMItem = new JMenuItem();
private final JMenuItem aboutMItem = new JMenuItem();
@ -95,6 +100,7 @@ public final class MenuBarProvider {
this.openIndexDialogFactory = OpenIndexDialogFactory.getInstance();
this.createIndexDialogFactory = CreateIndexDialogFactory.getInstance();
this.optimizeIndexDialogFactory = OptimizeIndexDialogFactory.getInstance();
this.exportTermsDialogFactory = ExportTermsDialogFactory.getInstance();
this.checkIndexDialogFactory = CheckIndexDialogFactory.getInstance();
this.aboutDialogFactory = AboutDialogFactory.getInstance();
@ -173,6 +179,10 @@ public final class MenuBarProvider {
checkIndexMItem.setEnabled(false);
checkIndexMItem.addActionListener(listeners::showCheckIndexDialog);
toolsMenu.add(checkIndexMItem);
exportTermsMItem.setText(MessageUtils.getLocalizedMessage("menu.item.export.terms"));
exportTermsMItem.setEnabled(false);
exportTermsMItem.addActionListener(listeners::showExportTermsDialog);
toolsMenu.add(exportTermsMItem);
return toolsMenu;
}
@ -258,6 +268,12 @@ public final class MenuBarProvider {
});
}
void showExportTermsDialog(ActionEvent e) {
new DialogOpener<>(exportTermsDialogFactory).open("Export terms", 600, 400,
factory -> {
});
}
}
private class Observer implements IndexObserver, DirectoryObserver {
@ -267,6 +283,7 @@ public final class MenuBarProvider {
reopenIndexMItem.setEnabled(false);
closeIndexMItem.setEnabled(false);
optimizeIndexMItem.setEnabled(false);
exportTermsMItem.setEnabled(false);
checkIndexMItem.setEnabled(true);
}
@ -279,6 +296,7 @@ public final class MenuBarProvider {
public void openIndex(LukeState state) {
reopenIndexMItem.setEnabled(true);
closeIndexMItem.setEnabled(true);
exportTermsMItem.setEnabled(true);
if (!state.readOnly() && state.hasDirectoryReader()) {
optimizeIndexMItem.setEnabled(true);
}
@ -297,6 +315,7 @@ public final class MenuBarProvider {
closeIndexMItem.setEnabled(false);
optimizeIndexMItem.setEnabled(false);
checkIndexMItem.setEnabled(false);
exportTermsMItem.setEnabled(false);
}
}

View File

@ -0,0 +1,275 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.luke.app.desktop.components.dialog.menubar;
import javax.swing.BorderFactory;
import javax.swing.BoxLayout;
import javax.swing.JButton;
import javax.swing.JComboBox;
import javax.swing.JDialog;
import javax.swing.JFileChooser;
import javax.swing.JLabel;
import javax.swing.JPanel;
import javax.swing.JTextField;
import javax.swing.SwingWorker;
import java.awt.Color;
import java.awt.Dialog;
import java.awt.Dimension;
import java.awt.FlowLayout;
import java.awt.GridLayout;
import java.awt.Insets;
import java.awt.Window;
import java.awt.event.ActionEvent;
import java.io.File;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import org.apache.logging.log4j.Logger;
import org.apache.lucene.luke.app.IndexHandler;
import org.apache.lucene.luke.app.IndexObserver;
import org.apache.lucene.luke.app.LukeState;
import org.apache.lucene.luke.app.desktop.Preferences;
import org.apache.lucene.luke.app.desktop.PreferencesFactory;
import org.apache.lucene.luke.app.desktop.util.DialogOpener;
import org.apache.lucene.luke.app.desktop.util.ImageUtils;
import org.apache.lucene.luke.app.desktop.util.MessageUtils;
import org.apache.lucene.luke.app.desktop.util.StyleConstants;
import org.apache.lucene.luke.models.LukeException;
import org.apache.lucene.luke.models.tools.IndexTools;
import org.apache.lucene.luke.models.tools.IndexToolsFactory;
import org.apache.lucene.luke.models.util.IndexUtils;
import org.apache.lucene.luke.util.LoggerFactory;
import org.apache.lucene.util.NamedThreadFactory;
import org.apache.lucene.util.SuppressForbidden;
/**
* Factory of export terms dialog
*/
public final class ExportTermsDialogFactory implements DialogOpener.DialogFactory {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
private static ExportTermsDialogFactory instance;
private final IndexToolsFactory indexToolsFactory = new IndexToolsFactory();
private final Preferences prefs;
private final IndexHandler indexHandler;
private final JComboBox<String> fieldCombo = new JComboBox<String>();
private final JTextField destDir = new JTextField();
private final JLabel statusLbl = new JLabel();
private final JLabel indicatorLbl = new JLabel();
private final ListenerFunctions listeners = new ListenerFunctions();
private JDialog dialog;
private IndexTools toolsModel;
public synchronized static ExportTermsDialogFactory getInstance() throws IOException {
if (instance == null) {
instance = new ExportTermsDialogFactory();
}
return instance;
}
private ExportTermsDialogFactory() throws IOException {
this.prefs = PreferencesFactory.getInstance();
this.indexHandler = IndexHandler.getInstance();
indexHandler.addObserver(new Observer());
}
@Override
public JDialog create(Window owner, String title, int width, int height) {
dialog = new JDialog(owner, title, Dialog.ModalityType.APPLICATION_MODAL);
dialog.add(content());
dialog.setSize(new Dimension(width, height));
dialog.setLocationRelativeTo(owner);
dialog.getContentPane().setBackground(prefs.getColorTheme().getBackgroundColor());
return dialog;
}
private JPanel content() {
JPanel panel = new JPanel(new GridLayout(5, 1));
panel.setOpaque(false);
panel.setLayout(new BoxLayout(panel, BoxLayout.PAGE_AXIS));
panel.setBorder(BorderFactory.createEmptyBorder(15, 15, 15, 15));
panel.add(currentOpenIndexPanel());
panel.add(fieldComboPanel());
panel.add(destinationDirPanel());
panel.add(statusPanel());
panel.add(actionButtonsPanel());
return panel;
}
private JPanel currentOpenIndexPanel() {
JPanel panel = new JPanel(new FlowLayout(FlowLayout.LEADING));
panel.setBorder(BorderFactory.createEmptyBorder());
panel.setOpaque(false);
JLabel label = new JLabel(MessageUtils.getLocalizedMessage("export.terms.label.index_path"));
JLabel value = new JLabel(indexHandler.getState().getIndexPath());
value.setToolTipText(indexHandler.getState().getIndexPath());
panel.add(label);
panel.add(value);
return panel;
}
private JPanel fieldComboPanel() {
JPanel panel = new JPanel(new GridLayout(2, 1));
panel.setOpaque(false);
panel.add(new JLabel(MessageUtils.getLocalizedMessage("export.terms.field")));
panel.add(fieldCombo);
return panel;
}
private JPanel destinationDirPanel() {
JPanel panel = new JPanel(new GridLayout(2, 1));
panel.setOpaque(false);
panel.add(new JLabel(MessageUtils.getLocalizedMessage("export.terms.label.output_path")));
JPanel inputPanel = new JPanel(new FlowLayout(FlowLayout.LEADING));
inputPanel.setBorder(BorderFactory.createEmptyBorder());
inputPanel.setOpaque(false);
destDir.setText(System.getProperty("user.home"));
destDir.setColumns(60);
destDir.setPreferredSize(new Dimension(200, 30));
destDir.setFont(StyleConstants.FONT_MONOSPACE_LARGE);
destDir.setEditable(false);
destDir.setBackground(Color.white);
inputPanel.add(destDir);
JButton browseBtn = new JButton(MessageUtils.getLocalizedMessage("export.terms.button.browse"));
browseBtn.setFont(StyleConstants.FONT_BUTTON_LARGE);
browseBtn.setMargin(new Insets(3, 0, 3, 0));
browseBtn.addActionListener(listeners::browseDirectory);
inputPanel.add(browseBtn);
panel.add(inputPanel);
return panel;
}
private JPanel actionButtonsPanel() {
// Buttons
JPanel execButtons = new JPanel(new FlowLayout(FlowLayout.TRAILING));
execButtons.setOpaque(false);
JButton exportBtn = new JButton(MessageUtils.getLocalizedMessage("export.terms.button.export"));
exportBtn.setMargin(new Insets(3, 0, 3, 0));
exportBtn.addActionListener(listeners::export);
execButtons.add(exportBtn);
JButton closeBtn = new JButton(MessageUtils.getLocalizedMessage("button.close"));
closeBtn.setMargin(new Insets(3, 0, 3, 0));
closeBtn.addActionListener(e -> dialog.dispose());
execButtons.add(closeBtn);
return execButtons;
}
private JPanel statusPanel() {
JPanel status = new JPanel(new FlowLayout(FlowLayout.LEADING));
status.setOpaque(false);
indicatorLbl.setIcon(ImageUtils.createImageIcon("indicator.gif", 20, 20));
indicatorLbl.setVisible(false);
status.add(statusLbl);
status.add(indicatorLbl);
return status;
}
private class ListenerFunctions {
@SuppressForbidden(reason = "JFilechooser#getSelectedFile() returns java.io.File")
void browseDirectory(ActionEvent e) {
JFileChooser fileChooser = new JFileChooser();
fileChooser.setFileSelectionMode(JFileChooser.DIRECTORIES_ONLY);
fileChooser.setFileHidingEnabled(false);
int retVal = fileChooser.showOpenDialog(dialog);
if (retVal == JFileChooser.APPROVE_OPTION) {
File f = fileChooser.getSelectedFile();
destDir.setText(f.getAbsolutePath());
}
}
void export(ActionEvent e) {
ExecutorService executor = Executors.newSingleThreadExecutor(new NamedThreadFactory("export-terms-dialog"));
SwingWorker<Void, Void> task = new SwingWorker<Void, Void>() {
String filename;
@Override
protected Void doInBackground() {
setProgress(0);
statusLbl.setText("Exporting...");
indicatorLbl.setVisible(true);
String field = (String) fieldCombo.getSelectedItem();
String directory = destDir.getText();
try {
filename = toolsModel.exportTerms(directory, field);
} catch (LukeException e) {
log.error("Error while exporting terms from field " + field, e);
statusLbl.setText(MessageUtils.getLocalizedMessage("export.terms.label.error", e.getMessage()));
} catch (Exception e) {
log.error("Error while exporting terms from field " + field, e);
statusLbl.setText(MessageUtils.getLocalizedMessage("message.error.unknown"));
throw e;
} finally {
setProgress(100);
}
return null;
}
@Override
protected void done() {
indicatorLbl.setVisible(false);
if (filename != null) {
statusLbl.setText(MessageUtils.getLocalizedMessage("export.terms.label.success", filename, "[term],[doc frequency]"));
}
}
};
executor.submit(task);
executor.shutdown();
}
}
private class Observer implements IndexObserver {
@Override
public void openIndex(LukeState state) {
toolsModel = indexToolsFactory.newInstance(state.getIndexReader(), state.useCompound(), state.keepAllCommits());
IndexUtils.getFieldNames(state.getIndexReader()).stream().sorted().forEach(fieldCombo::addItem);
}
@Override
public void closeIndex() {
fieldCombo.removeAllItems();
toolsModel = null;
}
}
}

View File

@ -94,4 +94,13 @@ public interface IndexTools {
* @param dataDir - the directory path which contains sample documents (20 Newsgroups).
*/
void createNewIndex(String dataDir);
/**
* Export terms from given field into a new file on the destination directory
* @param destDir - destination directory
* @param field - field name
* @return The file containing the export
*/
String exportTerms(String destDir, String field);
}

View File

@ -17,11 +17,15 @@
package org.apache.lucene.luke.models.tools;
import java.io.BufferedWriter;
import java.io.IOException;
import java.io.PrintStream;
import java.nio.charset.Charset;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.List;
import java.util.Locale;
import java.util.Objects;
import org.apache.lucene.analysis.Analyzer;
@ -30,6 +34,9 @@ import org.apache.lucene.index.CheckIndex;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.MultiTerms;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.luke.models.LukeException;
import org.apache.lucene.luke.models.LukeModel;
import org.apache.lucene.luke.models.util.IndexUtils;
@ -37,6 +44,7 @@ import org.apache.lucene.luke.models.util.twentynewsgroups.Message;
import org.apache.lucene.luke.models.util.twentynewsgroups.MessageFilesParser;
import org.apache.lucene.search.Query;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
/** Default implementation of {@link IndexTools} */
public final class IndexToolsImpl extends LukeModel implements IndexTools {
@ -184,4 +192,25 @@ public final class IndexToolsImpl extends LukeModel implements IndexTools {
}
}
}
public String exportTerms(String destDir, String field) {
String filename = "terms_" + field + "_" + System.currentTimeMillis() + ".out";
Path path = Paths.get(destDir, filename);
try {
Terms terms = MultiTerms.getTerms(reader, field);
if (terms == null) {
throw new LukeException(String.format(Locale.US, "Field %s does not contain any terms to be exported", field));
}
try (BufferedWriter writer = Files.newBufferedWriter(path, Charset.forName("UTF-8"))) {
TermsEnum termsEnum = terms.iterator();
BytesRef term;
while (!Thread.currentThread().isInterrupted() && (term = termsEnum.next()) != null) {
writer.write(String.format(Locale.US, "%s,%d\n", term.utf8ToString(), +termsEnum.docFreq()));
}
return path.toString();
}
} catch (IOException e) {
throw new LukeException("Terms file export for field [" + field + "] to file [" + filename + "] has failed.", e);
}
}
}

View File

@ -51,6 +51,7 @@ menu.item.create_index=Create new index
menu.item.close_index=Close index
menu.item.exit=Exit
menu.item.optimize=Optimize index
menu.item.export.terms=Export terms
menu.item.check_index=Check index
menu.item.theme_gray=Gray
menu.item.theme_classic=Classic
@ -83,6 +84,15 @@ createindex.label.data_link=http://kdd.ics.uci.edu/databases/20newsgroups/20news
createindex.label.datadir=Data directory:
createindex.textarea.data_help1=You can index sample documents from 20 Newsgroups corpus that is available at here:
createindex.textarea.data_help2=Download and extract the tgz file, then select the extracted directory path.\nCreating an index with the full size corpus takes some time... :)
# Export terms
export.terms.label.index_path=Index directory path:
export.terms.label.output_path=Output directory path:
export.terms.field=Field to export terms from:
export.terms.button.export=Export
export.terms.button.browse=Browse
export.terms.label.success=<html>Terms successfully exported to: <br>{0}<br><br>Output format is: {1}</html>
export.terms.label.error=<html>Failed to export: <br>{0}</html>
# Optimize index
optimize.dialog.title=Optimize index
optimize.label.index_path=Index directory path: