mirror of https://github.com/apache/lucene.git
LUCENE:8945: Allow to change the output file delimiter on Luke "export terms" feature
Signed-off-by: Tomoko Uchida <tomoko@apache.org>
This commit is contained in:
parent
4e20227ee5
commit
91f5d2ff79
|
@ -26,7 +26,7 @@ New Features
|
|||
|
||||
* LUCENE-8936: Add SpanishMinimalStemFilter (vinod kumar via Tomoko Uchida)
|
||||
|
||||
* LUCENE-8764: Add "export all terms" feature to Luke. (Leonardo Menezes via Tomoko Uchida)
|
||||
* LUCENE-8764 LUCENE-8945: Add "export all terms and doc freqs" feature to Luke with delimiters. (Leonardo Menezes, Amish Shah via Tomoko Uchida)
|
||||
|
||||
* LUCENE-8747: Composite Matches from multiple subqueries now allow access to
|
||||
their submatches, and a new NamedMatches API allows marking of subqueries
|
||||
|
|
|
@ -269,7 +269,7 @@ public final class MenuBarProvider {
|
|||
}
|
||||
|
||||
void showExportTermsDialog(ActionEvent e) {
|
||||
new DialogOpener<>(exportTermsDialogFactory).open("Export terms", 600, 400,
|
||||
new DialogOpener<>(exportTermsDialogFactory).open("Export terms", 600, 450,
|
||||
factory -> {
|
||||
});
|
||||
}
|
||||
|
|
|
@ -38,8 +38,10 @@ import java.awt.event.ActionEvent;
|
|||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.util.Arrays;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Executors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import org.apache.logging.log4j.Logger;
|
||||
import org.apache.lucene.luke.app.IndexHandler;
|
||||
|
@ -76,6 +78,8 @@ public final class ExportTermsDialogFactory implements DialogOpener.DialogFactor
|
|||
|
||||
private final JComboBox<String> fieldCombo = new JComboBox<String>();
|
||||
|
||||
private final JComboBox<String> delimiterCombo = new JComboBox<String>();
|
||||
|
||||
private final JTextField destDir = new JTextField();
|
||||
|
||||
private final JLabel statusLbl = new JLabel();
|
||||
|
@ -88,6 +92,8 @@ public final class ExportTermsDialogFactory implements DialogOpener.DialogFactor
|
|||
|
||||
private IndexTools toolsModel;
|
||||
|
||||
private String selectedDelimiter;
|
||||
|
||||
public synchronized static ExportTermsDialogFactory getInstance() throws IOException {
|
||||
if (instance == null) {
|
||||
instance = new ExportTermsDialogFactory();
|
||||
|
@ -99,6 +105,8 @@ public final class ExportTermsDialogFactory implements DialogOpener.DialogFactor
|
|||
this.prefs = PreferencesFactory.getInstance();
|
||||
this.indexHandler = IndexHandler.getInstance();
|
||||
indexHandler.addObserver(new Observer());
|
||||
Stream.of(Delimiter.values()).forEachOrdered(delimiterVal -> delimiterCombo.addItem(delimiterVal.getDescription()));
|
||||
delimiterCombo.setSelectedItem(Delimiter.COMMA.getDescription());//Set default delimiter
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -120,6 +128,7 @@ public final class ExportTermsDialogFactory implements DialogOpener.DialogFactor
|
|||
panel.add(currentOpenIndexPanel());
|
||||
panel.add(fieldComboPanel());
|
||||
panel.add(destinationDirPanel());
|
||||
panel.add(delimiterComboPanel());
|
||||
panel.add(statusPanel());
|
||||
panel.add(actionButtonsPanel());
|
||||
|
||||
|
@ -138,6 +147,14 @@ public final class ExportTermsDialogFactory implements DialogOpener.DialogFactor
|
|||
return panel;
|
||||
}
|
||||
|
||||
private JPanel delimiterComboPanel() {
|
||||
JPanel panel = new JPanel(new GridLayout(2, 1));
|
||||
panel.setOpaque(false);
|
||||
panel.add(new JLabel("Select Delimiter: "));
|
||||
panel.add(delimiterCombo);
|
||||
return panel;
|
||||
}
|
||||
|
||||
private JPanel fieldComboPanel() {
|
||||
JPanel panel = new JPanel(new GridLayout(2, 1));
|
||||
panel.setOpaque(false);
|
||||
|
@ -225,9 +242,11 @@ public final class ExportTermsDialogFactory implements DialogOpener.DialogFactor
|
|||
statusLbl.setText("Exporting...");
|
||||
indicatorLbl.setVisible(true);
|
||||
String field = (String) fieldCombo.getSelectedItem();
|
||||
selectedDelimiter = Delimiter.getSelectedDelimiterValue((String) delimiterCombo.getSelectedItem());
|
||||
|
||||
String directory = destDir.getText();
|
||||
try {
|
||||
filename = toolsModel.exportTerms(directory, field);
|
||||
filename = toolsModel.exportTerms(directory, field, selectedDelimiter);
|
||||
} catch (LukeException e) {
|
||||
log.error("Error while exporting terms from field " + field, e);
|
||||
statusLbl.setText(MessageUtils.getLocalizedMessage("export.terms.label.error", e.getMessage()));
|
||||
|
@ -245,7 +264,7 @@ public final class ExportTermsDialogFactory implements DialogOpener.DialogFactor
|
|||
protected void done() {
|
||||
indicatorLbl.setVisible(false);
|
||||
if (filename != null) {
|
||||
statusLbl.setText(MessageUtils.getLocalizedMessage("export.terms.label.success", filename, "[term],[doc frequency]"));
|
||||
statusLbl.setText(MessageUtils.getLocalizedMessage("export.terms.label.success", filename, "[term]" + selectedDelimiter + "[doc frequency]"));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
@ -272,4 +291,35 @@ public final class ExportTermsDialogFactory implements DialogOpener.DialogFactor
|
|||
|
||||
}
|
||||
|
||||
/**
|
||||
* Delimiters that can be selected
|
||||
*/
|
||||
private enum Delimiter {
|
||||
COMMA("Comma", ","), WHITESPACE("Whitespace", " "), TAB("Tab", "\t");
|
||||
|
||||
private final String description;
|
||||
private final String separator;
|
||||
|
||||
private Delimiter(final String description, final String separator) {
|
||||
this.description = description;
|
||||
this.separator = separator;
|
||||
}
|
||||
|
||||
String getDescription() {
|
||||
return this.description;
|
||||
}
|
||||
|
||||
String getSeparator() {
|
||||
return this.separator;
|
||||
}
|
||||
|
||||
static String getSelectedDelimiterValue(String delimiter) {
|
||||
return Arrays.stream(Delimiter.values())
|
||||
.filter(e -> e.description.equals(delimiter))
|
||||
.findFirst()
|
||||
.orElse(COMMA)
|
||||
.getSeparator();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -100,7 +100,8 @@ public interface IndexTools {
|
|||
* Export terms from given field into a new file on the destination directory
|
||||
* @param destDir - destination directory
|
||||
* @param field - field name
|
||||
* @param delimiter - delimiter to separate terms and their frequency
|
||||
* @return The file containing the export
|
||||
*/
|
||||
String exportTerms(String destDir, String field);
|
||||
String exportTerms(String destDir, String field, String delimiter);
|
||||
}
|
||||
|
|
|
@ -193,7 +193,7 @@ public final class IndexToolsImpl extends LukeModel implements IndexTools {
|
|||
}
|
||||
}
|
||||
|
||||
public String exportTerms(String destDir, String field) {
|
||||
public String exportTerms(String destDir, String field, String delimiter) {
|
||||
String filename = "terms_" + field + "_" + System.currentTimeMillis() + ".out";
|
||||
Path path = Paths.get(destDir, filename);
|
||||
try {
|
||||
|
@ -205,7 +205,7 @@ public final class IndexToolsImpl extends LukeModel implements IndexTools {
|
|||
TermsEnum termsEnum = terms.iterator();
|
||||
BytesRef term;
|
||||
while (!Thread.currentThread().isInterrupted() && (term = termsEnum.next()) != null) {
|
||||
writer.write(String.format(Locale.US, "%s,%d\n", term.utf8ToString(), +termsEnum.docFreq()));
|
||||
writer.write(String.format(Locale.US, "%s%s%d\n", term.utf8ToString(), delimiter, +termsEnum.docFreq()));
|
||||
}
|
||||
return path.toString();
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue