LUCENE:8945: Allow to change the output file delimiter on Luke "export terms" feature

Signed-off-by: Tomoko Uchida <tomoko@apache.org>
This commit is contained in:
Amish Shah 2019-09-18 19:53:48 +09:00 committed by Tomoko Uchida
parent fd0c8b9e81
commit 369df12c2c
5 changed files with 58 additions and 7 deletions

View File

@ -85,7 +85,7 @@ New Features
* LUCENE-8936: Add SpanishMinimalStemFilter (vinod kumar via Tomoko Uchida)
* LUCENE-8764: Add "export all terms" feature to Luke. (Leonardo Menezes via Tomoko Uchida)
* LUCENE-8764 LUCENE-8945: Add "export all terms and doc freqs" feature to Luke with delimiters. (Leonardo Menezes, Amish Shah via Tomoko Uchida)
* LUCENE-8747: Composite Matches from multiple subqueries now allow access to
their submatches, and a new NamedMatches API allows marking of subqueries

View File

@ -269,7 +269,7 @@ public final class MenuBarProvider {
}
void showExportTermsDialog(ActionEvent e) {
new DialogOpener<>(exportTermsDialogFactory).open("Export terms", 600, 400,
new DialogOpener<>(exportTermsDialogFactory).open("Export terms", 600, 450,
factory -> {
});
}

View File

@ -38,8 +38,10 @@ import java.awt.event.ActionEvent;
import java.io.File;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.Arrays;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.stream.Stream;
import org.apache.logging.log4j.Logger;
import org.apache.lucene.luke.app.IndexHandler;
@ -76,6 +78,8 @@ public final class ExportTermsDialogFactory implements DialogOpener.DialogFactor
private final JComboBox<String> fieldCombo = new JComboBox<String>();
private final JComboBox<String> delimiterCombo = new JComboBox<String>();
private final JTextField destDir = new JTextField();
private final JLabel statusLbl = new JLabel();
@ -88,6 +92,8 @@ public final class ExportTermsDialogFactory implements DialogOpener.DialogFactor
private IndexTools toolsModel;
private String selectedDelimiter;
public synchronized static ExportTermsDialogFactory getInstance() throws IOException {
if (instance == null) {
instance = new ExportTermsDialogFactory();
@ -99,6 +105,8 @@ public final class ExportTermsDialogFactory implements DialogOpener.DialogFactor
this.prefs = PreferencesFactory.getInstance();
this.indexHandler = IndexHandler.getInstance();
indexHandler.addObserver(new Observer());
Stream.of(Delimiter.values()).forEachOrdered(delimiterVal -> delimiterCombo.addItem(delimiterVal.getDescription()));
delimiterCombo.setSelectedItem(Delimiter.COMMA.getDescription());//Set default delimiter
}
@Override
@ -120,6 +128,7 @@ public final class ExportTermsDialogFactory implements DialogOpener.DialogFactor
panel.add(currentOpenIndexPanel());
panel.add(fieldComboPanel());
panel.add(destinationDirPanel());
panel.add(delimiterComboPanel());
panel.add(statusPanel());
panel.add(actionButtonsPanel());
@ -138,6 +147,14 @@ public final class ExportTermsDialogFactory implements DialogOpener.DialogFactor
return panel;
}
private JPanel delimiterComboPanel() {
JPanel panel = new JPanel(new GridLayout(2, 1));
panel.setOpaque(false);
panel.add(new JLabel("Select Delimiter: "));
panel.add(delimiterCombo);
return panel;
}
private JPanel fieldComboPanel() {
JPanel panel = new JPanel(new GridLayout(2, 1));
panel.setOpaque(false);
@ -225,9 +242,11 @@ public final class ExportTermsDialogFactory implements DialogOpener.DialogFactor
statusLbl.setText("Exporting...");
indicatorLbl.setVisible(true);
String field = (String) fieldCombo.getSelectedItem();
selectedDelimiter = Delimiter.getSelectedDelimiterValue((String) delimiterCombo.getSelectedItem());
String directory = destDir.getText();
try {
filename = toolsModel.exportTerms(directory, field);
filename = toolsModel.exportTerms(directory, field, selectedDelimiter);
} catch (LukeException e) {
log.error("Error while exporting terms from field " + field, e);
statusLbl.setText(MessageUtils.getLocalizedMessage("export.terms.label.error", e.getMessage()));
@ -245,7 +264,7 @@ public final class ExportTermsDialogFactory implements DialogOpener.DialogFactor
protected void done() {
indicatorLbl.setVisible(false);
if (filename != null) {
statusLbl.setText(MessageUtils.getLocalizedMessage("export.terms.label.success", filename, "[term],[doc frequency]"));
statusLbl.setText(MessageUtils.getLocalizedMessage("export.terms.label.success", filename, "[term]" + selectedDelimiter + "[doc frequency]"));
}
}
};
@ -272,4 +291,35 @@ public final class ExportTermsDialogFactory implements DialogOpener.DialogFactor
}
/**
* Delimiters that can be selected
*/
private enum Delimiter {
COMMA("Comma", ","), WHITESPACE("Whitespace", " "), TAB("Tab", "\t");
private final String description;
private final String separator;
private Delimiter(final String description, final String separator) {
this.description = description;
this.separator = separator;
}
String getDescription() {
return this.description;
}
String getSeparator() {
return this.separator;
}
static String getSelectedDelimiterValue(String delimiter) {
return Arrays.stream(Delimiter.values())
.filter(e -> e.description.equals(delimiter))
.findFirst()
.orElse(COMMA)
.getSeparator();
}
}
}

View File

@ -100,7 +100,8 @@ public interface IndexTools {
* Export terms from given field into a new file on the destination directory
* @param destDir - destination directory
* @param field - field name
* @param delimiter - delimiter to separate terms and their frequency
* @return The file containing the export
*/
String exportTerms(String destDir, String field);
String exportTerms(String destDir, String field, String delimiter);
}

View File

@ -193,7 +193,7 @@ public final class IndexToolsImpl extends LukeModel implements IndexTools {
}
}
public String exportTerms(String destDir, String field) {
public String exportTerms(String destDir, String field, String delimiter) {
String filename = "terms_" + field + "_" + System.currentTimeMillis() + ".out";
Path path = Paths.get(destDir, filename);
try {
@ -205,7 +205,7 @@ public final class IndexToolsImpl extends LukeModel implements IndexTools {
TermsEnum termsEnum = terms.iterator();
BytesRef term;
while (!Thread.currentThread().isInterrupted() && (term = termsEnum.next()) != null) {
writer.write(String.format(Locale.US, "%s,%d\n", term.utf8ToString(), +termsEnum.docFreq()));
writer.write(String.format(Locale.US, "%s%s%d\n", term.utf8ToString(), delimiter, +termsEnum.docFreq()));
}
return path.toString();
}