LUCENE:8945: Allow to change the output file delimiter on Luke "export terms" feature

Signed-off-by: Tomoko Uchida <tomoko@apache.org>
This commit is contained in:
Amish Shah 2019-09-18 19:53:48 +09:00 committed by Tomoko Uchida
parent fd0c8b9e81
commit 369df12c2c
5 changed files with 58 additions and 7 deletions

View File

@ -85,7 +85,7 @@ New Features
* LUCENE-8936: Add SpanishMinimalStemFilter (vinod kumar via Tomoko Uchida) * LUCENE-8936: Add SpanishMinimalStemFilter (vinod kumar via Tomoko Uchida)
* LUCENE-8764: Add "export all terms" feature to Luke. (Leonardo Menezes via Tomoko Uchida) * LUCENE-8764 LUCENE-8945: Add "export all terms and doc freqs" feature to Luke with delimiters. (Leonardo Menezes, Amish Shah via Tomoko Uchida)
* LUCENE-8747: Composite Matches from multiple subqueries now allow access to * LUCENE-8747: Composite Matches from multiple subqueries now allow access to
their submatches, and a new NamedMatches API allows marking of subqueries their submatches, and a new NamedMatches API allows marking of subqueries

View File

@ -269,7 +269,7 @@ public final class MenuBarProvider {
} }
void showExportTermsDialog(ActionEvent e) { void showExportTermsDialog(ActionEvent e) {
new DialogOpener<>(exportTermsDialogFactory).open("Export terms", 600, 400, new DialogOpener<>(exportTermsDialogFactory).open("Export terms", 600, 450,
factory -> { factory -> {
}); });
} }

View File

@ -38,8 +38,10 @@ import java.awt.event.ActionEvent;
import java.io.File; import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.lang.invoke.MethodHandles; import java.lang.invoke.MethodHandles;
import java.util.Arrays;
import java.util.concurrent.ExecutorService; import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors; import java.util.concurrent.Executors;
import java.util.stream.Stream;
import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.Logger;
import org.apache.lucene.luke.app.IndexHandler; import org.apache.lucene.luke.app.IndexHandler;
@ -76,6 +78,8 @@ public final class ExportTermsDialogFactory implements DialogOpener.DialogFactor
private final JComboBox<String> fieldCombo = new JComboBox<String>(); private final JComboBox<String> fieldCombo = new JComboBox<String>();
private final JComboBox<String> delimiterCombo = new JComboBox<String>();
private final JTextField destDir = new JTextField(); private final JTextField destDir = new JTextField();
private final JLabel statusLbl = new JLabel(); private final JLabel statusLbl = new JLabel();
@ -88,6 +92,8 @@ public final class ExportTermsDialogFactory implements DialogOpener.DialogFactor
private IndexTools toolsModel; private IndexTools toolsModel;
private String selectedDelimiter;
public synchronized static ExportTermsDialogFactory getInstance() throws IOException { public synchronized static ExportTermsDialogFactory getInstance() throws IOException {
if (instance == null) { if (instance == null) {
instance = new ExportTermsDialogFactory(); instance = new ExportTermsDialogFactory();
@ -99,6 +105,8 @@ public final class ExportTermsDialogFactory implements DialogOpener.DialogFactor
this.prefs = PreferencesFactory.getInstance(); this.prefs = PreferencesFactory.getInstance();
this.indexHandler = IndexHandler.getInstance(); this.indexHandler = IndexHandler.getInstance();
indexHandler.addObserver(new Observer()); indexHandler.addObserver(new Observer());
Stream.of(Delimiter.values()).forEachOrdered(delimiterVal -> delimiterCombo.addItem(delimiterVal.getDescription()));
delimiterCombo.setSelectedItem(Delimiter.COMMA.getDescription());//Set default delimiter
} }
@Override @Override
@ -120,6 +128,7 @@ public final class ExportTermsDialogFactory implements DialogOpener.DialogFactor
panel.add(currentOpenIndexPanel()); panel.add(currentOpenIndexPanel());
panel.add(fieldComboPanel()); panel.add(fieldComboPanel());
panel.add(destinationDirPanel()); panel.add(destinationDirPanel());
panel.add(delimiterComboPanel());
panel.add(statusPanel()); panel.add(statusPanel());
panel.add(actionButtonsPanel()); panel.add(actionButtonsPanel());
@ -138,6 +147,14 @@ public final class ExportTermsDialogFactory implements DialogOpener.DialogFactor
return panel; return panel;
} }
private JPanel delimiterComboPanel() {
JPanel panel = new JPanel(new GridLayout(2, 1));
panel.setOpaque(false);
panel.add(new JLabel("Select Delimiter: "));
panel.add(delimiterCombo);
return panel;
}
private JPanel fieldComboPanel() { private JPanel fieldComboPanel() {
JPanel panel = new JPanel(new GridLayout(2, 1)); JPanel panel = new JPanel(new GridLayout(2, 1));
panel.setOpaque(false); panel.setOpaque(false);
@ -225,9 +242,11 @@ public final class ExportTermsDialogFactory implements DialogOpener.DialogFactor
statusLbl.setText("Exporting..."); statusLbl.setText("Exporting...");
indicatorLbl.setVisible(true); indicatorLbl.setVisible(true);
String field = (String) fieldCombo.getSelectedItem(); String field = (String) fieldCombo.getSelectedItem();
selectedDelimiter = Delimiter.getSelectedDelimiterValue((String) delimiterCombo.getSelectedItem());
String directory = destDir.getText(); String directory = destDir.getText();
try { try {
filename = toolsModel.exportTerms(directory, field); filename = toolsModel.exportTerms(directory, field, selectedDelimiter);
} catch (LukeException e) { } catch (LukeException e) {
log.error("Error while exporting terms from field " + field, e); log.error("Error while exporting terms from field " + field, e);
statusLbl.setText(MessageUtils.getLocalizedMessage("export.terms.label.error", e.getMessage())); statusLbl.setText(MessageUtils.getLocalizedMessage("export.terms.label.error", e.getMessage()));
@ -245,7 +264,7 @@ public final class ExportTermsDialogFactory implements DialogOpener.DialogFactor
protected void done() { protected void done() {
indicatorLbl.setVisible(false); indicatorLbl.setVisible(false);
if (filename != null) { if (filename != null) {
statusLbl.setText(MessageUtils.getLocalizedMessage("export.terms.label.success", filename, "[term],[doc frequency]")); statusLbl.setText(MessageUtils.getLocalizedMessage("export.terms.label.success", filename, "[term]" + selectedDelimiter + "[doc frequency]"));
} }
} }
}; };
@ -272,4 +291,35 @@ public final class ExportTermsDialogFactory implements DialogOpener.DialogFactor
} }
/**
* Delimiters that can be selected
*/
private enum Delimiter {
COMMA("Comma", ","), WHITESPACE("Whitespace", " "), TAB("Tab", "\t");
private final String description;
private final String separator;
private Delimiter(final String description, final String separator) {
this.description = description;
this.separator = separator;
}
String getDescription() {
return this.description;
}
String getSeparator() {
return this.separator;
}
static String getSelectedDelimiterValue(String delimiter) {
return Arrays.stream(Delimiter.values())
.filter(e -> e.description.equals(delimiter))
.findFirst()
.orElse(COMMA)
.getSeparator();
}
}
} }

View File

@ -100,7 +100,8 @@ public interface IndexTools {
* Export terms from given field into a new file on the destination directory * Export terms from given field into a new file on the destination directory
* @param destDir - destination directory * @param destDir - destination directory
* @param field - field name * @param field - field name
* @param delimiter - delimiter to separate terms and their frequency
* @return The file containing the export * @return The file containing the export
*/ */
String exportTerms(String destDir, String field); String exportTerms(String destDir, String field, String delimiter);
} }

View File

@ -193,7 +193,7 @@ public final class IndexToolsImpl extends LukeModel implements IndexTools {
} }
} }
public String exportTerms(String destDir, String field) { public String exportTerms(String destDir, String field, String delimiter) {
String filename = "terms_" + field + "_" + System.currentTimeMillis() + ".out"; String filename = "terms_" + field + "_" + System.currentTimeMillis() + ".out";
Path path = Paths.get(destDir, filename); Path path = Paths.get(destDir, filename);
try { try {
@ -205,7 +205,7 @@ public final class IndexToolsImpl extends LukeModel implements IndexTools {
TermsEnum termsEnum = terms.iterator(); TermsEnum termsEnum = terms.iterator();
BytesRef term; BytesRef term;
while (!Thread.currentThread().isInterrupted() && (term = termsEnum.next()) != null) { while (!Thread.currentThread().isInterrupted() && (term = termsEnum.next()) != null) {
writer.write(String.format(Locale.US, "%s,%d\n", term.utf8ToString(), +termsEnum.docFreq())); writer.write(String.format(Locale.US, "%s%s%d\n", term.utf8ToString(), delimiter, +termsEnum.docFreq()));
} }
return path.toString(); return path.toString();
} }