mirror of https://github.com/apache/lucene.git
LUCENE:8945: Allow to change the output file delimiter on Luke "export terms" feature
Signed-off-by: Tomoko Uchida <tomoko@apache.org>
This commit is contained in:
parent
fd0c8b9e81
commit
369df12c2c
|
@ -85,7 +85,7 @@ New Features
|
||||||
|
|
||||||
* LUCENE-8936: Add SpanishMinimalStemFilter (vinod kumar via Tomoko Uchida)
|
* LUCENE-8936: Add SpanishMinimalStemFilter (vinod kumar via Tomoko Uchida)
|
||||||
|
|
||||||
* LUCENE-8764: Add "export all terms" feature to Luke. (Leonardo Menezes via Tomoko Uchida)
|
* LUCENE-8764 LUCENE-8945: Add "export all terms and doc freqs" feature to Luke with delimiters. (Leonardo Menezes, Amish Shah via Tomoko Uchida)
|
||||||
|
|
||||||
* LUCENE-8747: Composite Matches from multiple subqueries now allow access to
|
* LUCENE-8747: Composite Matches from multiple subqueries now allow access to
|
||||||
their submatches, and a new NamedMatches API allows marking of subqueries
|
their submatches, and a new NamedMatches API allows marking of subqueries
|
||||||
|
|
|
@ -269,7 +269,7 @@ public final class MenuBarProvider {
|
||||||
}
|
}
|
||||||
|
|
||||||
void showExportTermsDialog(ActionEvent e) {
|
void showExportTermsDialog(ActionEvent e) {
|
||||||
new DialogOpener<>(exportTermsDialogFactory).open("Export terms", 600, 400,
|
new DialogOpener<>(exportTermsDialogFactory).open("Export terms", 600, 450,
|
||||||
factory -> {
|
factory -> {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
|
@ -38,8 +38,10 @@ import java.awt.event.ActionEvent;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.lang.invoke.MethodHandles;
|
import java.lang.invoke.MethodHandles;
|
||||||
|
import java.util.Arrays;
|
||||||
import java.util.concurrent.ExecutorService;
|
import java.util.concurrent.ExecutorService;
|
||||||
import java.util.concurrent.Executors;
|
import java.util.concurrent.Executors;
|
||||||
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
import org.apache.logging.log4j.Logger;
|
import org.apache.logging.log4j.Logger;
|
||||||
import org.apache.lucene.luke.app.IndexHandler;
|
import org.apache.lucene.luke.app.IndexHandler;
|
||||||
|
@ -76,6 +78,8 @@ public final class ExportTermsDialogFactory implements DialogOpener.DialogFactor
|
||||||
|
|
||||||
private final JComboBox<String> fieldCombo = new JComboBox<String>();
|
private final JComboBox<String> fieldCombo = new JComboBox<String>();
|
||||||
|
|
||||||
|
private final JComboBox<String> delimiterCombo = new JComboBox<String>();
|
||||||
|
|
||||||
private final JTextField destDir = new JTextField();
|
private final JTextField destDir = new JTextField();
|
||||||
|
|
||||||
private final JLabel statusLbl = new JLabel();
|
private final JLabel statusLbl = new JLabel();
|
||||||
|
@ -88,6 +92,8 @@ public final class ExportTermsDialogFactory implements DialogOpener.DialogFactor
|
||||||
|
|
||||||
private IndexTools toolsModel;
|
private IndexTools toolsModel;
|
||||||
|
|
||||||
|
private String selectedDelimiter;
|
||||||
|
|
||||||
public synchronized static ExportTermsDialogFactory getInstance() throws IOException {
|
public synchronized static ExportTermsDialogFactory getInstance() throws IOException {
|
||||||
if (instance == null) {
|
if (instance == null) {
|
||||||
instance = new ExportTermsDialogFactory();
|
instance = new ExportTermsDialogFactory();
|
||||||
|
@ -99,6 +105,8 @@ public final class ExportTermsDialogFactory implements DialogOpener.DialogFactor
|
||||||
this.prefs = PreferencesFactory.getInstance();
|
this.prefs = PreferencesFactory.getInstance();
|
||||||
this.indexHandler = IndexHandler.getInstance();
|
this.indexHandler = IndexHandler.getInstance();
|
||||||
indexHandler.addObserver(new Observer());
|
indexHandler.addObserver(new Observer());
|
||||||
|
Stream.of(Delimiter.values()).forEachOrdered(delimiterVal -> delimiterCombo.addItem(delimiterVal.getDescription()));
|
||||||
|
delimiterCombo.setSelectedItem(Delimiter.COMMA.getDescription());//Set default delimiter
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -120,6 +128,7 @@ public final class ExportTermsDialogFactory implements DialogOpener.DialogFactor
|
||||||
panel.add(currentOpenIndexPanel());
|
panel.add(currentOpenIndexPanel());
|
||||||
panel.add(fieldComboPanel());
|
panel.add(fieldComboPanel());
|
||||||
panel.add(destinationDirPanel());
|
panel.add(destinationDirPanel());
|
||||||
|
panel.add(delimiterComboPanel());
|
||||||
panel.add(statusPanel());
|
panel.add(statusPanel());
|
||||||
panel.add(actionButtonsPanel());
|
panel.add(actionButtonsPanel());
|
||||||
|
|
||||||
|
@ -138,6 +147,14 @@ public final class ExportTermsDialogFactory implements DialogOpener.DialogFactor
|
||||||
return panel;
|
return panel;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private JPanel delimiterComboPanel() {
|
||||||
|
JPanel panel = new JPanel(new GridLayout(2, 1));
|
||||||
|
panel.setOpaque(false);
|
||||||
|
panel.add(new JLabel("Select Delimiter: "));
|
||||||
|
panel.add(delimiterCombo);
|
||||||
|
return panel;
|
||||||
|
}
|
||||||
|
|
||||||
private JPanel fieldComboPanel() {
|
private JPanel fieldComboPanel() {
|
||||||
JPanel panel = new JPanel(new GridLayout(2, 1));
|
JPanel panel = new JPanel(new GridLayout(2, 1));
|
||||||
panel.setOpaque(false);
|
panel.setOpaque(false);
|
||||||
|
@ -225,9 +242,11 @@ public final class ExportTermsDialogFactory implements DialogOpener.DialogFactor
|
||||||
statusLbl.setText("Exporting...");
|
statusLbl.setText("Exporting...");
|
||||||
indicatorLbl.setVisible(true);
|
indicatorLbl.setVisible(true);
|
||||||
String field = (String) fieldCombo.getSelectedItem();
|
String field = (String) fieldCombo.getSelectedItem();
|
||||||
|
selectedDelimiter = Delimiter.getSelectedDelimiterValue((String) delimiterCombo.getSelectedItem());
|
||||||
|
|
||||||
String directory = destDir.getText();
|
String directory = destDir.getText();
|
||||||
try {
|
try {
|
||||||
filename = toolsModel.exportTerms(directory, field);
|
filename = toolsModel.exportTerms(directory, field, selectedDelimiter);
|
||||||
} catch (LukeException e) {
|
} catch (LukeException e) {
|
||||||
log.error("Error while exporting terms from field " + field, e);
|
log.error("Error while exporting terms from field " + field, e);
|
||||||
statusLbl.setText(MessageUtils.getLocalizedMessage("export.terms.label.error", e.getMessage()));
|
statusLbl.setText(MessageUtils.getLocalizedMessage("export.terms.label.error", e.getMessage()));
|
||||||
|
@ -245,7 +264,7 @@ public final class ExportTermsDialogFactory implements DialogOpener.DialogFactor
|
||||||
protected void done() {
|
protected void done() {
|
||||||
indicatorLbl.setVisible(false);
|
indicatorLbl.setVisible(false);
|
||||||
if (filename != null) {
|
if (filename != null) {
|
||||||
statusLbl.setText(MessageUtils.getLocalizedMessage("export.terms.label.success", filename, "[term],[doc frequency]"));
|
statusLbl.setText(MessageUtils.getLocalizedMessage("export.terms.label.success", filename, "[term]" + selectedDelimiter + "[doc frequency]"));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -272,4 +291,35 @@ public final class ExportTermsDialogFactory implements DialogOpener.DialogFactor
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Delimiters that can be selected
|
||||||
|
*/
|
||||||
|
private enum Delimiter {
|
||||||
|
COMMA("Comma", ","), WHITESPACE("Whitespace", " "), TAB("Tab", "\t");
|
||||||
|
|
||||||
|
private final String description;
|
||||||
|
private final String separator;
|
||||||
|
|
||||||
|
private Delimiter(final String description, final String separator) {
|
||||||
|
this.description = description;
|
||||||
|
this.separator = separator;
|
||||||
|
}
|
||||||
|
|
||||||
|
String getDescription() {
|
||||||
|
return this.description;
|
||||||
|
}
|
||||||
|
|
||||||
|
String getSeparator() {
|
||||||
|
return this.separator;
|
||||||
|
}
|
||||||
|
|
||||||
|
static String getSelectedDelimiterValue(String delimiter) {
|
||||||
|
return Arrays.stream(Delimiter.values())
|
||||||
|
.filter(e -> e.description.equals(delimiter))
|
||||||
|
.findFirst()
|
||||||
|
.orElse(COMMA)
|
||||||
|
.getSeparator();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -100,7 +100,8 @@ public interface IndexTools {
|
||||||
* Export terms from given field into a new file on the destination directory
|
* Export terms from given field into a new file on the destination directory
|
||||||
* @param destDir - destination directory
|
* @param destDir - destination directory
|
||||||
* @param field - field name
|
* @param field - field name
|
||||||
|
* @param delimiter - delimiter to separate terms and their frequency
|
||||||
* @return The file containing the export
|
* @return The file containing the export
|
||||||
*/
|
*/
|
||||||
String exportTerms(String destDir, String field);
|
String exportTerms(String destDir, String field, String delimiter);
|
||||||
}
|
}
|
||||||
|
|
|
@ -193,7 +193,7 @@ public final class IndexToolsImpl extends LukeModel implements IndexTools {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public String exportTerms(String destDir, String field) {
|
public String exportTerms(String destDir, String field, String delimiter) {
|
||||||
String filename = "terms_" + field + "_" + System.currentTimeMillis() + ".out";
|
String filename = "terms_" + field + "_" + System.currentTimeMillis() + ".out";
|
||||||
Path path = Paths.get(destDir, filename);
|
Path path = Paths.get(destDir, filename);
|
||||||
try {
|
try {
|
||||||
|
@ -205,7 +205,7 @@ public final class IndexToolsImpl extends LukeModel implements IndexTools {
|
||||||
TermsEnum termsEnum = terms.iterator();
|
TermsEnum termsEnum = terms.iterator();
|
||||||
BytesRef term;
|
BytesRef term;
|
||||||
while (!Thread.currentThread().isInterrupted() && (term = termsEnum.next()) != null) {
|
while (!Thread.currentThread().isInterrupted() && (term = termsEnum.next()) != null) {
|
||||||
writer.write(String.format(Locale.US, "%s,%d\n", term.utf8ToString(), +termsEnum.docFreq()));
|
writer.write(String.format(Locale.US, "%s%s%d\n", term.utf8ToString(), delimiter, +termsEnum.docFreq()));
|
||||||
}
|
}
|
||||||
return path.toString();
|
return path.toString();
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue