mirror of https://github.com/apache/lucene.git
LUCENE-8764: Add "export all terms" feature to Luke
Co-authored-by: Tomoko Uchida <tomoko@apache.org>
This commit is contained in:
parent
8c4fde94fe
commit
ff7b0c9de5
|
@ -63,6 +63,8 @@ New Features
|
|||
|
||||
* LUCENE-8936: Add SpanishMinimalStemFilter (vinod kumar via Tomoko Uchida)
|
||||
|
||||
* LUCENE-8764: Add "export all terms" feature to Luke. (Leonardo Menezes via Tomoko Uchida)
|
||||
|
||||
Improvements
|
||||
|
||||
* LUCENE-8874: Show SPI names instead of class names in Luke Analysis tab. (Tomoko Uchida)
|
||||
|
|
|
@ -33,6 +33,7 @@ import org.apache.lucene.luke.app.desktop.PreferencesFactory;
|
|||
import org.apache.lucene.luke.app.desktop.components.dialog.menubar.AboutDialogFactory;
|
||||
import org.apache.lucene.luke.app.desktop.components.dialog.menubar.CheckIndexDialogFactory;
|
||||
import org.apache.lucene.luke.app.desktop.components.dialog.menubar.CreateIndexDialogFactory;
|
||||
import org.apache.lucene.luke.app.desktop.components.dialog.menubar.ExportTermsDialogFactory;
|
||||
import org.apache.lucene.luke.app.desktop.components.dialog.menubar.OpenIndexDialogFactory;
|
||||
import org.apache.lucene.luke.app.desktop.components.dialog.menubar.OptimizeIndexDialogFactory;
|
||||
import org.apache.lucene.luke.app.desktop.util.DialogOpener;
|
||||
|
@ -57,6 +58,8 @@ public final class MenuBarProvider {
|
|||
|
||||
private final OptimizeIndexDialogFactory optimizeIndexDialogFactory;
|
||||
|
||||
private final ExportTermsDialogFactory exportTermsDialogFactory;
|
||||
|
||||
private final CheckIndexDialogFactory checkIndexDialogFactory;
|
||||
|
||||
private final AboutDialogFactory aboutDialogFactory;
|
||||
|
@ -81,6 +84,8 @@ public final class MenuBarProvider {
|
|||
|
||||
private final JMenuItem optimizeIndexMItem = new JMenuItem();
|
||||
|
||||
private final JMenuItem exportTermsMItem = new JMenuItem();
|
||||
|
||||
private final JMenuItem checkIndexMItem = new JMenuItem();
|
||||
|
||||
private final JMenuItem aboutMItem = new JMenuItem();
|
||||
|
@ -95,6 +100,7 @@ public final class MenuBarProvider {
|
|||
this.openIndexDialogFactory = OpenIndexDialogFactory.getInstance();
|
||||
this.createIndexDialogFactory = CreateIndexDialogFactory.getInstance();
|
||||
this.optimizeIndexDialogFactory = OptimizeIndexDialogFactory.getInstance();
|
||||
this.exportTermsDialogFactory = ExportTermsDialogFactory.getInstance();
|
||||
this.checkIndexDialogFactory = CheckIndexDialogFactory.getInstance();
|
||||
this.aboutDialogFactory = AboutDialogFactory.getInstance();
|
||||
|
||||
|
@ -173,6 +179,10 @@ public final class MenuBarProvider {
|
|||
checkIndexMItem.setEnabled(false);
|
||||
checkIndexMItem.addActionListener(listeners::showCheckIndexDialog);
|
||||
toolsMenu.add(checkIndexMItem);
|
||||
exportTermsMItem.setText(MessageUtils.getLocalizedMessage("menu.item.export.terms"));
|
||||
exportTermsMItem.setEnabled(false);
|
||||
exportTermsMItem.addActionListener(listeners::showExportTermsDialog);
|
||||
toolsMenu.add(exportTermsMItem);
|
||||
return toolsMenu;
|
||||
}
|
||||
|
||||
|
@ -258,6 +268,12 @@ public final class MenuBarProvider {
|
|||
});
|
||||
}
|
||||
|
||||
void showExportTermsDialog(ActionEvent e) {
|
||||
new DialogOpener<>(exportTermsDialogFactory).open("Export terms", 600, 400,
|
||||
factory -> {
|
||||
});
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private class Observer implements IndexObserver, DirectoryObserver {
|
||||
|
@ -267,6 +283,7 @@ public final class MenuBarProvider {
|
|||
reopenIndexMItem.setEnabled(false);
|
||||
closeIndexMItem.setEnabled(false);
|
||||
optimizeIndexMItem.setEnabled(false);
|
||||
exportTermsMItem.setEnabled(false);
|
||||
checkIndexMItem.setEnabled(true);
|
||||
}
|
||||
|
||||
|
@ -279,6 +296,7 @@ public final class MenuBarProvider {
|
|||
public void openIndex(LukeState state) {
|
||||
reopenIndexMItem.setEnabled(true);
|
||||
closeIndexMItem.setEnabled(true);
|
||||
exportTermsMItem.setEnabled(true);
|
||||
if (!state.readOnly() && state.hasDirectoryReader()) {
|
||||
optimizeIndexMItem.setEnabled(true);
|
||||
}
|
||||
|
@ -297,6 +315,7 @@ public final class MenuBarProvider {
|
|||
closeIndexMItem.setEnabled(false);
|
||||
optimizeIndexMItem.setEnabled(false);
|
||||
checkIndexMItem.setEnabled(false);
|
||||
exportTermsMItem.setEnabled(false);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -0,0 +1,275 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.luke.app.desktop.components.dialog.menubar;
|
||||
|
||||
import javax.swing.BorderFactory;
|
||||
import javax.swing.BoxLayout;
|
||||
import javax.swing.JButton;
|
||||
import javax.swing.JComboBox;
|
||||
import javax.swing.JDialog;
|
||||
import javax.swing.JFileChooser;
|
||||
import javax.swing.JLabel;
|
||||
import javax.swing.JPanel;
|
||||
import javax.swing.JTextField;
|
||||
import javax.swing.SwingWorker;
|
||||
import java.awt.Color;
|
||||
import java.awt.Dialog;
|
||||
import java.awt.Dimension;
|
||||
import java.awt.FlowLayout;
|
||||
import java.awt.GridLayout;
|
||||
import java.awt.Insets;
|
||||
import java.awt.Window;
|
||||
import java.awt.event.ActionEvent;
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Executors;
|
||||
|
||||
import org.apache.logging.log4j.Logger;
|
||||
import org.apache.lucene.luke.app.IndexHandler;
|
||||
import org.apache.lucene.luke.app.IndexObserver;
|
||||
import org.apache.lucene.luke.app.LukeState;
|
||||
import org.apache.lucene.luke.app.desktop.Preferences;
|
||||
import org.apache.lucene.luke.app.desktop.PreferencesFactory;
|
||||
import org.apache.lucene.luke.app.desktop.util.DialogOpener;
|
||||
import org.apache.lucene.luke.app.desktop.util.ImageUtils;
|
||||
import org.apache.lucene.luke.app.desktop.util.MessageUtils;
|
||||
import org.apache.lucene.luke.app.desktop.util.StyleConstants;
|
||||
import org.apache.lucene.luke.models.LukeException;
|
||||
import org.apache.lucene.luke.models.tools.IndexTools;
|
||||
import org.apache.lucene.luke.models.tools.IndexToolsFactory;
|
||||
import org.apache.lucene.luke.models.util.IndexUtils;
|
||||
import org.apache.lucene.luke.util.LoggerFactory;
|
||||
import org.apache.lucene.util.NamedThreadFactory;
|
||||
import org.apache.lucene.util.SuppressForbidden;
|
||||
|
||||
/**
|
||||
* Factory of export terms dialog
|
||||
*/
|
||||
public final class ExportTermsDialogFactory implements DialogOpener.DialogFactory {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
||||
|
||||
private static ExportTermsDialogFactory instance;
|
||||
|
||||
private final IndexToolsFactory indexToolsFactory = new IndexToolsFactory();
|
||||
|
||||
private final Preferences prefs;
|
||||
|
||||
private final IndexHandler indexHandler;
|
||||
|
||||
private final JComboBox<String> fieldCombo = new JComboBox<String>();
|
||||
|
||||
private final JTextField destDir = new JTextField();
|
||||
|
||||
private final JLabel statusLbl = new JLabel();
|
||||
|
||||
private final JLabel indicatorLbl = new JLabel();
|
||||
|
||||
private final ListenerFunctions listeners = new ListenerFunctions();
|
||||
|
||||
private JDialog dialog;
|
||||
|
||||
private IndexTools toolsModel;
|
||||
|
||||
public synchronized static ExportTermsDialogFactory getInstance() throws IOException {
|
||||
if (instance == null) {
|
||||
instance = new ExportTermsDialogFactory();
|
||||
}
|
||||
return instance;
|
||||
}
|
||||
|
||||
private ExportTermsDialogFactory() throws IOException {
|
||||
this.prefs = PreferencesFactory.getInstance();
|
||||
this.indexHandler = IndexHandler.getInstance();
|
||||
indexHandler.addObserver(new Observer());
|
||||
}
|
||||
|
||||
@Override
|
||||
public JDialog create(Window owner, String title, int width, int height) {
|
||||
dialog = new JDialog(owner, title, Dialog.ModalityType.APPLICATION_MODAL);
|
||||
dialog.add(content());
|
||||
dialog.setSize(new Dimension(width, height));
|
||||
dialog.setLocationRelativeTo(owner);
|
||||
dialog.getContentPane().setBackground(prefs.getColorTheme().getBackgroundColor());
|
||||
return dialog;
|
||||
}
|
||||
|
||||
private JPanel content() {
|
||||
JPanel panel = new JPanel(new GridLayout(5, 1));
|
||||
panel.setOpaque(false);
|
||||
panel.setLayout(new BoxLayout(panel, BoxLayout.PAGE_AXIS));
|
||||
panel.setBorder(BorderFactory.createEmptyBorder(15, 15, 15, 15));
|
||||
|
||||
panel.add(currentOpenIndexPanel());
|
||||
panel.add(fieldComboPanel());
|
||||
panel.add(destinationDirPanel());
|
||||
panel.add(statusPanel());
|
||||
panel.add(actionButtonsPanel());
|
||||
|
||||
return panel;
|
||||
}
|
||||
|
||||
private JPanel currentOpenIndexPanel() {
|
||||
JPanel panel = new JPanel(new FlowLayout(FlowLayout.LEADING));
|
||||
panel.setBorder(BorderFactory.createEmptyBorder());
|
||||
panel.setOpaque(false);
|
||||
JLabel label = new JLabel(MessageUtils.getLocalizedMessage("export.terms.label.index_path"));
|
||||
JLabel value = new JLabel(indexHandler.getState().getIndexPath());
|
||||
value.setToolTipText(indexHandler.getState().getIndexPath());
|
||||
panel.add(label);
|
||||
panel.add(value);
|
||||
return panel;
|
||||
}
|
||||
|
||||
private JPanel fieldComboPanel() {
|
||||
JPanel panel = new JPanel(new GridLayout(2, 1));
|
||||
panel.setOpaque(false);
|
||||
panel.add(new JLabel(MessageUtils.getLocalizedMessage("export.terms.field")));
|
||||
panel.add(fieldCombo);
|
||||
return panel;
|
||||
}
|
||||
|
||||
private JPanel destinationDirPanel() {
|
||||
JPanel panel = new JPanel(new GridLayout(2, 1));
|
||||
panel.setOpaque(false);
|
||||
|
||||
panel.add(new JLabel(MessageUtils.getLocalizedMessage("export.terms.label.output_path")));
|
||||
|
||||
JPanel inputPanel = new JPanel(new FlowLayout(FlowLayout.LEADING));
|
||||
inputPanel.setBorder(BorderFactory.createEmptyBorder());
|
||||
inputPanel.setOpaque(false);
|
||||
destDir.setText(System.getProperty("user.home"));
|
||||
destDir.setColumns(60);
|
||||
destDir.setPreferredSize(new Dimension(200, 30));
|
||||
destDir.setFont(StyleConstants.FONT_MONOSPACE_LARGE);
|
||||
destDir.setEditable(false);
|
||||
destDir.setBackground(Color.white);
|
||||
inputPanel.add(destDir);
|
||||
|
||||
JButton browseBtn = new JButton(MessageUtils.getLocalizedMessage("export.terms.button.browse"));
|
||||
browseBtn.setFont(StyleConstants.FONT_BUTTON_LARGE);
|
||||
browseBtn.setMargin(new Insets(3, 0, 3, 0));
|
||||
browseBtn.addActionListener(listeners::browseDirectory);
|
||||
inputPanel.add(browseBtn);
|
||||
|
||||
panel.add(inputPanel);
|
||||
return panel;
|
||||
}
|
||||
|
||||
private JPanel actionButtonsPanel() {
|
||||
// Buttons
|
||||
JPanel execButtons = new JPanel(new FlowLayout(FlowLayout.TRAILING));
|
||||
execButtons.setOpaque(false);
|
||||
JButton exportBtn = new JButton(MessageUtils.getLocalizedMessage("export.terms.button.export"));
|
||||
exportBtn.setMargin(new Insets(3, 0, 3, 0));
|
||||
exportBtn.addActionListener(listeners::export);
|
||||
execButtons.add(exportBtn);
|
||||
JButton closeBtn = new JButton(MessageUtils.getLocalizedMessage("button.close"));
|
||||
closeBtn.setMargin(new Insets(3, 0, 3, 0));
|
||||
closeBtn.addActionListener(e -> dialog.dispose());
|
||||
execButtons.add(closeBtn);
|
||||
return execButtons;
|
||||
}
|
||||
|
||||
private JPanel statusPanel() {
|
||||
JPanel status = new JPanel(new FlowLayout(FlowLayout.LEADING));
|
||||
status.setOpaque(false);
|
||||
indicatorLbl.setIcon(ImageUtils.createImageIcon("indicator.gif", 20, 20));
|
||||
indicatorLbl.setVisible(false);
|
||||
status.add(statusLbl);
|
||||
status.add(indicatorLbl);
|
||||
return status;
|
||||
}
|
||||
|
||||
private class ListenerFunctions {
|
||||
|
||||
@SuppressForbidden(reason = "JFilechooser#getSelectedFile() returns java.io.File")
|
||||
void browseDirectory(ActionEvent e) {
|
||||
JFileChooser fileChooser = new JFileChooser();
|
||||
fileChooser.setFileSelectionMode(JFileChooser.DIRECTORIES_ONLY);
|
||||
fileChooser.setFileHidingEnabled(false);
|
||||
int retVal = fileChooser.showOpenDialog(dialog);
|
||||
if (retVal == JFileChooser.APPROVE_OPTION) {
|
||||
File f = fileChooser.getSelectedFile();
|
||||
destDir.setText(f.getAbsolutePath());
|
||||
}
|
||||
}
|
||||
|
||||
void export(ActionEvent e) {
|
||||
ExecutorService executor = Executors.newSingleThreadExecutor(new NamedThreadFactory("export-terms-dialog"));
|
||||
|
||||
SwingWorker<Void, Void> task = new SwingWorker<Void, Void>() {
|
||||
|
||||
String filename;
|
||||
|
||||
@Override
|
||||
protected Void doInBackground() {
|
||||
setProgress(0);
|
||||
statusLbl.setText("Exporting...");
|
||||
indicatorLbl.setVisible(true);
|
||||
String field = (String) fieldCombo.getSelectedItem();
|
||||
String directory = destDir.getText();
|
||||
try {
|
||||
filename = toolsModel.exportTerms(directory, field);
|
||||
} catch (LukeException e) {
|
||||
log.error("Error while exporting terms from field " + field, e);
|
||||
statusLbl.setText(MessageUtils.getLocalizedMessage("export.terms.label.error", e.getMessage()));
|
||||
} catch (Exception e) {
|
||||
log.error("Error while exporting terms from field " + field, e);
|
||||
statusLbl.setText(MessageUtils.getLocalizedMessage("message.error.unknown"));
|
||||
throw e;
|
||||
} finally {
|
||||
setProgress(100);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void done() {
|
||||
indicatorLbl.setVisible(false);
|
||||
if (filename != null) {
|
||||
statusLbl.setText(MessageUtils.getLocalizedMessage("export.terms.label.success", filename, "[term],[doc frequency]"));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
executor.submit(task);
|
||||
executor.shutdown();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private class Observer implements IndexObserver {
|
||||
|
||||
@Override
|
||||
public void openIndex(LukeState state) {
|
||||
toolsModel = indexToolsFactory.newInstance(state.getIndexReader(), state.useCompound(), state.keepAllCommits());
|
||||
IndexUtils.getFieldNames(state.getIndexReader()).stream().sorted().forEach(fieldCombo::addItem);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void closeIndex() {
|
||||
fieldCombo.removeAllItems();
|
||||
toolsModel = null;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -94,4 +94,13 @@ public interface IndexTools {
|
|||
* @param dataDir - the directory path which contains sample documents (20 Newsgroups).
|
||||
*/
|
||||
void createNewIndex(String dataDir);
|
||||
|
||||
|
||||
/**
|
||||
* Export terms from given field into a new file on the destination directory
|
||||
* @param destDir - destination directory
|
||||
* @param field - field name
|
||||
* @return The file containing the export
|
||||
*/
|
||||
String exportTerms(String destDir, String field);
|
||||
}
|
||||
|
|
|
@ -17,11 +17,15 @@
|
|||
|
||||
package org.apache.lucene.luke.models.tools;
|
||||
|
||||
import java.io.BufferedWriter;
|
||||
import java.io.IOException;
|
||||
import java.io.PrintStream;
|
||||
import java.nio.charset.Charset;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Objects;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
|
@ -30,6 +34,9 @@ import org.apache.lucene.index.CheckIndex;
|
|||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.MultiTerms;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.luke.models.LukeException;
|
||||
import org.apache.lucene.luke.models.LukeModel;
|
||||
import org.apache.lucene.luke.models.util.IndexUtils;
|
||||
|
@ -37,6 +44,7 @@ import org.apache.lucene.luke.models.util.twentynewsgroups.Message;
|
|||
import org.apache.lucene.luke.models.util.twentynewsgroups.MessageFilesParser;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/** Default implementation of {@link IndexTools} */
|
||||
public final class IndexToolsImpl extends LukeModel implements IndexTools {
|
||||
|
@ -184,4 +192,25 @@ public final class IndexToolsImpl extends LukeModel implements IndexTools {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
public String exportTerms(String destDir, String field) {
|
||||
String filename = "terms_" + field + "_" + System.currentTimeMillis() + ".out";
|
||||
Path path = Paths.get(destDir, filename);
|
||||
try {
|
||||
Terms terms = MultiTerms.getTerms(reader, field);
|
||||
if (terms == null) {
|
||||
throw new LukeException(String.format(Locale.US, "Field %s does not contain any terms to be exported", field));
|
||||
}
|
||||
try (BufferedWriter writer = Files.newBufferedWriter(path, Charset.forName("UTF-8"))) {
|
||||
TermsEnum termsEnum = terms.iterator();
|
||||
BytesRef term;
|
||||
while (!Thread.currentThread().isInterrupted() && (term = termsEnum.next()) != null) {
|
||||
writer.write(String.format(Locale.US, "%s,%d\n", term.utf8ToString(), +termsEnum.docFreq()));
|
||||
}
|
||||
return path.toString();
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new LukeException("Terms file export for field [" + field + "] to file [" + filename + "] has failed.", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -51,6 +51,7 @@ menu.item.create_index=Create new index
|
|||
menu.item.close_index=Close index
|
||||
menu.item.exit=Exit
|
||||
menu.item.optimize=Optimize index
|
||||
menu.item.export.terms=Export terms
|
||||
menu.item.check_index=Check index
|
||||
menu.item.theme_gray=Gray
|
||||
menu.item.theme_classic=Classic
|
||||
|
@ -83,6 +84,15 @@ createindex.label.data_link=http://kdd.ics.uci.edu/databases/20newsgroups/20news
|
|||
createindex.label.datadir=Data directory:
|
||||
createindex.textarea.data_help1=You can index sample documents from 20 Newsgroups corpus that is available at here:
|
||||
createindex.textarea.data_help2=Download and extract the tgz file, then select the extracted directory path.\nCreating an index with the full size corpus takes some time... :)
|
||||
# Export terms
|
||||
export.terms.label.index_path=Index directory path:
|
||||
export.terms.label.output_path=Output directory path:
|
||||
export.terms.field=Field to export terms from:
|
||||
export.terms.button.export=Export
|
||||
export.terms.button.browse=Browse
|
||||
export.terms.label.success=<html>Terms successfully exported to: <br>{0}<br><br>Output format is: {1}</html>
|
||||
export.terms.label.error=<html>Failed to export: <br>{0}</html>
|
||||
|
||||
# Optimize index
|
||||
optimize.dialog.title=Optimize index
|
||||
optimize.label.index_path=Index directory path:
|
||||
|
|
Loading…
Reference in New Issue