diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 0a24caaf2ea..2475154c137 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -90,6 +90,8 @@ Improvements best for WEIGHT_MATCHES mode. Consequently queries produced by ComplexPhraseQueryParser and the surround QueryParser will now highlight correctly. (David Smiley) +* LUCENE-8793: Luke enhanced UI for CustomAnalyzer: show detailed analysis steps. (Jun Ohtani via Tomoko Uchida) + Optimizations * LUCENE-8796: Use exponential search instead of binary search in diff --git a/lucene/luke/src/java/org/apache/lucene/luke/app/desktop/components/AnalysisPanelProvider.java b/lucene/luke/src/java/org/apache/lucene/luke/app/desktop/components/AnalysisPanelProvider.java index 70c2291bbca..864dfcd57ff 100644 --- a/lucene/luke/src/java/org/apache/lucene/luke/app/desktop/components/AnalysisPanelProvider.java +++ b/lucene/luke/src/java/org/apache/lucene/luke/app/desktop/components/AnalysisPanelProvider.java @@ -20,14 +20,13 @@ package org.apache.lucene.luke.app.desktop.components; import javax.swing.BorderFactory; import javax.swing.ButtonGroup; import javax.swing.JButton; +import javax.swing.JCheckBox; import javax.swing.JLabel; import javax.swing.JPanel; import javax.swing.JRadioButton; import javax.swing.JScrollPane; import javax.swing.JSplitPane; -import javax.swing.JTable; import javax.swing.JTextArea; -import javax.swing.ListSelectionModel; import java.awt.BorderLayout; import java.awt.Color; import java.awt.FlowLayout; @@ -37,11 +36,9 @@ import java.awt.event.ActionEvent; import java.awt.event.MouseAdapter; import java.awt.event.MouseEvent; import java.io.IOException; -import java.util.List; import java.util.Objects; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; -import java.util.stream.Collectors; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.custom.CustomAnalyzer; @@ -54,13 +51,16 @@ import org.apache.lucene.luke.app.desktop.components.fragments.analysis.CustomAn import org.apache.lucene.luke.app.desktop.components.fragments.analysis.CustomAnalyzerPanelProvider; import org.apache.lucene.luke.app.desktop.components.fragments.analysis.PresetAnalyzerPanelOperator; import org.apache.lucene.luke.app.desktop.components.fragments.analysis.PresetAnalyzerPanelProvider; +import org.apache.lucene.luke.app.desktop.components.fragments.analysis.SimpleAnalyzeResultPanelOperator; +import org.apache.lucene.luke.app.desktop.components.fragments.analysis.SimpleAnalyzeResultPanelProvider; +import org.apache.lucene.luke.app.desktop.components.fragments.analysis.StepByStepAnalyzeResultPanelOperator; +import org.apache.lucene.luke.app.desktop.components.fragments.analysis.StepByStepAnalyzeResultPanelProvider; import org.apache.lucene.luke.app.desktop.components.fragments.search.AnalyzerTabOperator; import org.apache.lucene.luke.app.desktop.components.fragments.search.MLTTabOperator; import org.apache.lucene.luke.app.desktop.util.DialogOpener; import org.apache.lucene.luke.app.desktop.util.FontUtils; import org.apache.lucene.luke.app.desktop.util.MessageUtils; import org.apache.lucene.luke.app.desktop.util.StyleConstants; -import org.apache.lucene.luke.app.desktop.util.TableUtils; import org.apache.lucene.luke.models.analysis.Analysis; import org.apache.lucene.luke.models.analysis.AnalysisFactory; import org.apache.lucene.luke.models.analysis.CustomAnalyzerConfig; @@ -97,12 +97,16 @@ public final class AnalysisPanelProvider implements AnalysisTabOperator { private final JTextArea inputArea = new JTextArea(); - private final JTable tokensTable = new JTable(); + private final JPanel lowerPanel = new JPanel(new BorderLayout()); + + private final JPanel simpleResult; + + private final JPanel stepByStepResult; + + private final JCheckBox stepByStepCB = new JCheckBox(); private final ListenerFunctions listeners = new ListenerFunctions(); - private List tokens; - private Analysis analysisModel; public AnalysisPanelProvider() throws IOException { @@ -117,11 +121,15 @@ public final class AnalysisPanelProvider implements AnalysisTabOperator { this.analysisModel = new AnalysisFactory().newInstance(); analysisModel.createAnalyzerFromClassName(StandardAnalyzer.class.getName()); + this.simpleResult = new SimpleAnalyzeResultPanelProvider(tokenAttrDialogFactory).get(); + this.stepByStepResult = new StepByStepAnalyzeResultPanelProvider(tokenAttrDialogFactory).get(); + operatorRegistry.register(AnalysisTabOperator.class, this); operatorRegistry.get(PresetAnalyzerPanelOperator.class).ifPresent(operator -> { // Scanning all Analyzer types will take time... - ExecutorService executorService = Executors.newFixedThreadPool(1, new NamedThreadFactory("load-preset-analyzer-types")); + ExecutorService executorService = + Executors.newFixedThreadPool(1, new NamedThreadFactory("load-preset-analyzer-types")); executorService.execute(() -> { operator.setPresetAnalyzers(analysisModel.getPresetAnalyzerTypes()); operator.setSelectedAnalyzer(analysisModel.currentAnalyzer().getClass()); @@ -209,53 +217,39 @@ public final class AnalysisPanelProvider implements AnalysisTabOperator { inputArea.setText(MessageUtils.getLocalizedMessage("analysis.textarea.prompt")); input.add(new JScrollPane(inputArea)); - JButton executeBtn = new JButton(FontUtils.elegantIconHtml("", MessageUtils.getLocalizedMessage("analysis.button.test"))); + JButton executeBtn = new JButton(FontUtils.elegantIconHtml("", + MessageUtils.getLocalizedMessage("analysis.button.test"))); executeBtn.setFont(StyleConstants.FONT_BUTTON_LARGE); executeBtn.setMargin(new Insets(3, 3, 3, 3)); executeBtn.addActionListener(listeners::executeAnalysis); input.add(executeBtn); + stepByStepCB.setText(MessageUtils.getLocalizedMessage("analysis.checkbox.step_by_step")); + stepByStepCB.setSelected(false); + stepByStepCB.setOpaque(false); + stepByStepCB.setVisible(false); + input.add(stepByStepCB); + JButton clearBtn = new JButton(MessageUtils.getLocalizedMessage("button.clear")); clearBtn.setFont(StyleConstants.FONT_BUTTON_LARGE); clearBtn.setMargin(new Insets(5, 5, 5, 5)); clearBtn.addActionListener(e -> { inputArea.setText(""); - TableUtils.setupTable(tokensTable, ListSelectionModel.SINGLE_SELECTION, new TokensTableModel(), - null, - TokensTableModel.Column.TERM.getColumnWidth(), - TokensTableModel.Column.ATTR.getColumnWidth()); + operatorRegistry.get(SimpleAnalyzeResultPanelOperator.class).ifPresent( + SimpleAnalyzeResultPanelOperator::clearTable); + operatorRegistry.get(StepByStepAnalyzeResultPanelOperator.class).ifPresent( + StepByStepAnalyzeResultPanelOperator::clearTable); }); input.add(clearBtn); inner1.add(input, BorderLayout.CENTER); - JPanel inner2 = new JPanel(new BorderLayout()); - inner2.setOpaque(false); + lowerPanel.setOpaque(false); + lowerPanel.setBorder(BorderFactory.createEmptyBorder(3, 3, 3, 3)); + lowerPanel.add(inner1, BorderLayout.PAGE_START); + lowerPanel.add(this.simpleResult, BorderLayout.CENTER); - JPanel hint = new JPanel(new FlowLayout(FlowLayout.LEADING)); - hint.setOpaque(false); - hint.add(new JLabel(MessageUtils.getLocalizedMessage("analysis.hint.show_attributes"))); - inner2.add(hint, BorderLayout.PAGE_START); - - - TableUtils.setupTable(tokensTable, ListSelectionModel.SINGLE_SELECTION, new TokensTableModel(), - new MouseAdapter() { - @Override - public void mouseClicked(MouseEvent e) { - listeners.showAttributeValues(e); - } - }, - TokensTableModel.Column.TERM.getColumnWidth(), - TokensTableModel.Column.ATTR.getColumnWidth()); - inner2.add(new JScrollPane(tokensTable), BorderLayout.CENTER); - - JPanel panel = new JPanel(new BorderLayout()); - panel.setOpaque(false); - panel.setBorder(BorderFactory.createEmptyBorder(3, 3, 3, 3)); - panel.add(inner1, BorderLayout.PAGE_START); - panel.add(inner2, BorderLayout.CENTER); - - return panel; + return lowerPanel; } // control methods @@ -269,7 +263,8 @@ public final class AnalysisPanelProvider implements AnalysisTabOperator { operator.setPresetAnalyzers(analysisModel.getPresetAnalyzerTypes()); operator.setSelectedAnalyzer(analysisModel.currentAnalyzer().getClass()); }); - + stepByStepCB.setSelected(false); + stepByStepCB.setVisible(false); } else if (command.equalsIgnoreCase(TYPE_CUSTOM)) { mainPanel.remove(preset); mainPanel.add(custom, BorderLayout.CENTER); @@ -278,6 +273,7 @@ public final class AnalysisPanelProvider implements AnalysisTabOperator { operator.setAnalysisModel(analysisModel); operator.resetAnalysisComponents(); }); + stepByStepCB.setVisible(true); } mainPanel.setVisible(false); mainPanel.setVisible(true); @@ -289,11 +285,32 @@ public final class AnalysisPanelProvider implements AnalysisTabOperator { messageBroker.showStatusMessage(MessageUtils.getLocalizedMessage("analysis.message.empry_input")); } - tokens = analysisModel.analyze(text); - tokensTable.setModel(new TokensTableModel(tokens)); - tokensTable.setShowGrid(true); - tokensTable.getColumnModel().getColumn(TokensTableModel.Column.TERM.getIndex()).setPreferredWidth(TokensTableModel.Column.TERM.getColumnWidth()); - tokensTable.getColumnModel().getColumn(TokensTableModel.Column.ATTR.getIndex()).setPreferredWidth(TokensTableModel.Column.ATTR.getColumnWidth()); + lowerPanel.remove(stepByStepResult); + lowerPanel.add(simpleResult, BorderLayout.CENTER); + + operatorRegistry.get(SimpleAnalyzeResultPanelOperator.class).ifPresent(operator -> { + operator.setAnalysisModel(analysisModel); + operator.executeAnalysis(text); + }); + + lowerPanel.setVisible(false); + lowerPanel.setVisible(true); + } + + void executeAnalysisStepByStep() { + String text = inputArea.getText(); + if (Objects.isNull(text) || text.isEmpty()) { + messageBroker.showStatusMessage(MessageUtils.getLocalizedMessage("analysis.message.empry_input")); + } + lowerPanel.remove(simpleResult); + lowerPanel.add(stepByStepResult, BorderLayout.CENTER); + operatorRegistry.get(StepByStepAnalyzeResultPanelOperator.class).ifPresent(operator -> { + operator.setAnalysisModel(analysisModel); + operator.executeAnalysisStepByStep(text); + }); + + lowerPanel.setVisible(false); + lowerPanel.setVisible(true); } void showAnalysisChainDialog() { @@ -306,17 +323,6 @@ public final class AnalysisPanelProvider implements AnalysisTabOperator { } } - void showAttributeValues(int selectedIndex) { - String term = tokens.get(selectedIndex).getTerm(); - List attributes = tokens.get(selectedIndex).getAttributes(); - new DialogOpener<>(tokenAttrDialogFactory).open("Token Attributes", 650, 400, - factory -> { - factory.setTerm(term); - factory.setAttributes(attributes); - }); - } - - @Override public void setAnalyzerByType(String analyzerType) { analysisModel.createAnalyzerFromClassName(analyzerType); @@ -359,81 +365,14 @@ public final class AnalysisPanelProvider implements AnalysisTabOperator { } void executeAnalysis(ActionEvent e) { - AnalysisPanelProvider.this.executeAnalysis(); - } - - void showAttributeValues(MouseEvent e) { - if (e.getClickCount() != 2 || e.isConsumed()) { - return; - } - int selectedIndex = tokensTable.rowAtPoint(e.getPoint()); - if (selectedIndex < 0 || selectedIndex >= tokensTable.getRowCount()) { - return; - } - AnalysisPanelProvider.this.showAttributeValues(selectedIndex); - } - - } - - static final class TokensTableModel extends TableModelBase { - - enum Column implements TableColumnInfo { - TERM("Term", 0, String.class, 150), - ATTR("Attributes", 1, String.class, 1000); - - private final String colName; - private final int index; - private final Class type; - private final int width; - - Column(String colName, int index, Class type, int width) { - this.colName = colName; - this.index = index; - this.type = type; - this.width = width; - } - - @Override - public String getColName() { - return colName; - } - - @Override - public int getIndex() { - return index; - } - - @Override - public Class getType() { - return type; - } - - @Override - public int getColumnWidth() { - return width; + if (AnalysisPanelProvider.this.stepByStepCB.isSelected()) { + AnalysisPanelProvider.this.executeAnalysisStepByStep(); + } else { + AnalysisPanelProvider.this.executeAnalysis(); } } - TokensTableModel() { - super(); - } - - TokensTableModel(List tokens) { - super(tokens.size()); - for (int i = 0; i < tokens.size(); i++) { - Analysis.Token token = tokens.get(i); - data[i][Column.TERM.getIndex()] = token.getTerm(); - List attValues = token.getAttributes().stream() - .flatMap(att -> att.getAttValues().entrySet().stream() - .map(e -> e.getKey() + "=" + e.getValue())) - .collect(Collectors.toList()); - data[i][Column.ATTR.getIndex()] = String.join(",", attValues); - } - } - - @Override - protected Column[] columnInfos() { - return Column.values(); + void executeAnalysisStepByStep(ActionEvent e) { } } diff --git a/lucene/luke/src/java/org/apache/lucene/luke/app/desktop/components/SearchPanelProvider.java b/lucene/luke/src/java/org/apache/lucene/luke/app/desktop/components/SearchPanelProvider.java index f94517a813e..395d8359702 100644 --- a/lucene/luke/src/java/org/apache/lucene/luke/app/desktop/components/SearchPanelProvider.java +++ b/lucene/luke/src/java/org/apache/lucene/luke/app/desktop/components/SearchPanelProvider.java @@ -255,7 +255,7 @@ public final class SearchPanelProvider implements SearchTabOperator { c.insets = new Insets(2, 0, 2, 2); panel.add(termQueryCB, c); - queryStringTA.setRows(4); + queryStringTA.setRows(3); queryStringTA.setLineWrap(true); queryStringTA.setText("*:*"); c.gridx = 0; @@ -273,7 +273,7 @@ public final class SearchPanelProvider implements SearchTabOperator { c.insets = new Insets(8, 0, 2, 2); panel.add(labelPQ, c); - parsedQueryTA.setRows(4); + parsedQueryTA.setRows(3); parsedQueryTA.setLineWrap(true); parsedQueryTA.setEditable(false); c.gridx = 0; diff --git a/lucene/luke/src/java/org/apache/lucene/luke/app/desktop/components/fragments/analysis/SimpleAnalyzeResultPanelOperator.java b/lucene/luke/src/java/org/apache/lucene/luke/app/desktop/components/fragments/analysis/SimpleAnalyzeResultPanelOperator.java new file mode 100644 index 00000000000..5641479cbc3 --- /dev/null +++ b/lucene/luke/src/java/org/apache/lucene/luke/app/desktop/components/fragments/analysis/SimpleAnalyzeResultPanelOperator.java @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.luke.app.desktop.components.fragments.analysis; + + +import org.apache.lucene.luke.app.desktop.components.ComponentOperatorRegistry; +import org.apache.lucene.luke.models.analysis.Analysis; + +/** Operator of the simple analyze result panel */ +public interface SimpleAnalyzeResultPanelOperator extends ComponentOperatorRegistry.ComponentOperator { + + void setAnalysisModel(Analysis analysisModel); + + void executeAnalysis(String text); + + void clearTable(); +} diff --git a/lucene/luke/src/java/org/apache/lucene/luke/app/desktop/components/fragments/analysis/SimpleAnalyzeResultPanelProvider.java b/lucene/luke/src/java/org/apache/lucene/luke/app/desktop/components/fragments/analysis/SimpleAnalyzeResultPanelProvider.java new file mode 100644 index 00000000000..5e0c077dd69 --- /dev/null +++ b/lucene/luke/src/java/org/apache/lucene/luke/app/desktop/components/fragments/analysis/SimpleAnalyzeResultPanelProvider.java @@ -0,0 +1,196 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.luke.app.desktop.components.fragments.analysis; + +import javax.swing.JLabel; +import javax.swing.JPanel; +import javax.swing.JScrollPane; +import javax.swing.JTable; +import javax.swing.ListSelectionModel; + +import java.awt.BorderLayout; +import java.awt.FlowLayout; +import java.awt.event.MouseAdapter; +import java.awt.event.MouseEvent; +import java.util.List; +import java.util.stream.Collectors; + +import org.apache.lucene.luke.app.desktop.components.ComponentOperatorRegistry; +import org.apache.lucene.luke.app.desktop.components.TableColumnInfo; +import org.apache.lucene.luke.app.desktop.components.TableModelBase; +import org.apache.lucene.luke.app.desktop.components.dialog.analysis.TokenAttributeDialogFactory; +import org.apache.lucene.luke.app.desktop.util.DialogOpener; +import org.apache.lucene.luke.app.desktop.util.MessageUtils; +import org.apache.lucene.luke.app.desktop.util.TableUtils; +import org.apache.lucene.luke.models.analysis.Analysis; + +/** Provider of the simple analyze result panel */ +public class SimpleAnalyzeResultPanelProvider implements SimpleAnalyzeResultPanelOperator { + + private final ComponentOperatorRegistry operatorRegistry; + + private final TokenAttributeDialogFactory tokenAttrDialogFactory; + + private final JTable tokensTable = new JTable(); + + private final ListenerFunctions listeners = new ListenerFunctions(); + + private Analysis analysisModel; + + private List tokens; + + public SimpleAnalyzeResultPanelProvider(TokenAttributeDialogFactory tokenAttrDialogFactory) { + this.operatorRegistry = ComponentOperatorRegistry.getInstance(); + operatorRegistry.register(SimpleAnalyzeResultPanelOperator.class, this); + this.tokenAttrDialogFactory = tokenAttrDialogFactory; + } + + public JPanel get() { + JPanel panel = new JPanel(new BorderLayout()); + panel.setOpaque(false); + + JPanel hint = new JPanel(new FlowLayout(FlowLayout.LEADING)); + hint.setOpaque(false); + hint.add(new JLabel(MessageUtils.getLocalizedMessage("analysis.hint.show_attributes"))); + panel.add(hint, BorderLayout.PAGE_START); + + TableUtils.setupTable(tokensTable, ListSelectionModel.SINGLE_SELECTION, new TokensTableModel(), + new MouseAdapter() { + @Override + public void mouseClicked(MouseEvent e) { + listeners.showAttributeValues(e); + } + }, + TokensTableModel.Column.TERM.getColumnWidth(), + TokensTableModel.Column.ATTR.getColumnWidth()); + panel.add(new JScrollPane(tokensTable), BorderLayout.CENTER); + + return panel; + } + + @Override + public void setAnalysisModel(Analysis analysisModel) { + this.analysisModel = analysisModel; + } + + @Override + public void executeAnalysis(String text) { + tokens = analysisModel.analyze(text); + tokensTable.setModel(new TokensTableModel(tokens)); + tokensTable.setShowGrid(true); + tokensTable.getColumnModel().getColumn(TokensTableModel.Column.TERM.getIndex()) + .setPreferredWidth(TokensTableModel.Column.TERM.getColumnWidth()); + tokensTable.getColumnModel().getColumn(TokensTableModel.Column.ATTR.getIndex()) + .setPreferredWidth(TokensTableModel.Column.ATTR.getColumnWidth()); + } + + @Override + public void clearTable() { + TableUtils.setupTable(tokensTable, ListSelectionModel.SINGLE_SELECTION, new TokensTableModel(), + null, + TokensTableModel.Column.TERM.getColumnWidth(), + TokensTableModel.Column.ATTR.getColumnWidth()); + } + + private void showAttributeValues(int selectedIndex) { + String term = tokens.get(selectedIndex).getTerm(); + List attributes = tokens.get(selectedIndex).getAttributes(); + new DialogOpener<>(tokenAttrDialogFactory).open("Token Attributes", 650, 400, + factory -> { + factory.setTerm(term); + factory.setAttributes(attributes); + }); + } + + private class ListenerFunctions { + + void showAttributeValues(MouseEvent e) { + if (e.getClickCount() != 2 || e.isConsumed()) { + return; + } + int selectedIndex = tokensTable.rowAtPoint(e.getPoint()); + if (selectedIndex < 0 || selectedIndex >= tokensTable.getRowCount()) { + return; + } + SimpleAnalyzeResultPanelProvider.this.showAttributeValues(selectedIndex); + } + } + + /** Table model for simple result */ + private static class TokensTableModel extends TableModelBase { + + enum Column implements TableColumnInfo { + TERM("Term", 0, String.class, 150), + ATTR("Attributes", 1, String.class, 1000); + + private final String colName; + private final int index; + private final Class type; + private final int width; + + Column(String colName, int index, Class type, int width) { + this.colName = colName; + this.index = index; + this.type = type; + this.width = width; + } + + @Override + public String getColName() { + return colName; + } + + @Override + public int getIndex() { + return index; + } + + @Override + public Class getType() { + return type; + } + + @Override + public int getColumnWidth() { + return width; + } + } + + TokensTableModel() { + super(); + } + + TokensTableModel(List tokens) { + super(tokens.size()); + for (int i = 0; i < tokens.size(); i++) { + Analysis.Token token = tokens.get(i); + data[i][Column.TERM.getIndex()] = token.getTerm(); + List attValues = token.getAttributes().stream() + .flatMap(att -> att.getAttValues().entrySet().stream() + .map(e -> e.getKey() + "=" + e.getValue())) + .collect(Collectors.toList()); + data[i][Column.ATTR.getIndex()] = String.join(",", attValues); + } + } + + @Override + protected Column[] columnInfos() { + return Column.values(); + } + } +} diff --git a/lucene/luke/src/java/org/apache/lucene/luke/app/desktop/components/fragments/analysis/StepByStepAnalyzeResultPanelOperator.java b/lucene/luke/src/java/org/apache/lucene/luke/app/desktop/components/fragments/analysis/StepByStepAnalyzeResultPanelOperator.java new file mode 100644 index 00000000000..2311e59a0d9 --- /dev/null +++ b/lucene/luke/src/java/org/apache/lucene/luke/app/desktop/components/fragments/analysis/StepByStepAnalyzeResultPanelOperator.java @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.luke.app.desktop.components.fragments.analysis; + +import org.apache.lucene.luke.app.desktop.components.ComponentOperatorRegistry; +import org.apache.lucene.luke.models.analysis.Analysis; + +/** Operator of the Step by step analyze result panel */ +public interface StepByStepAnalyzeResultPanelOperator extends ComponentOperatorRegistry.ComponentOperator { + + void setAnalysisModel(Analysis analysisModel); + + void executeAnalysisStepByStep(String text); + + void clearTable(); +} diff --git a/lucene/luke/src/java/org/apache/lucene/luke/app/desktop/components/fragments/analysis/StepByStepAnalyzeResultPanelProvider.java b/lucene/luke/src/java/org/apache/lucene/luke/app/desktop/components/fragments/analysis/StepByStepAnalyzeResultPanelProvider.java new file mode 100644 index 00000000000..2ef696b430f --- /dev/null +++ b/lucene/luke/src/java/org/apache/lucene/luke/app/desktop/components/fragments/analysis/StepByStepAnalyzeResultPanelProvider.java @@ -0,0 +1,415 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.luke.app.desktop.components.fragments.analysis; + +import javax.swing.JLabel; +import javax.swing.JPanel; +import javax.swing.JScrollPane; +import javax.swing.JSplitPane; +import javax.swing.JTable; +import javax.swing.ListSelectionModel; +import javax.swing.table.AbstractTableModel; + +import java.awt.BorderLayout; +import java.awt.Dimension; +import java.awt.FlowLayout; +import java.awt.event.MouseAdapter; +import java.awt.event.MouseEvent; +import java.util.List; +import java.util.Map; +import java.util.TreeMap; + +import org.apache.lucene.luke.app.desktop.components.ComponentOperatorRegistry; +import org.apache.lucene.luke.app.desktop.components.TableColumnInfo; +import org.apache.lucene.luke.app.desktop.components.TableModelBase; +import org.apache.lucene.luke.app.desktop.components.dialog.analysis.TokenAttributeDialogFactory; +import org.apache.lucene.luke.app.desktop.util.DialogOpener; +import org.apache.lucene.luke.app.desktop.util.MessageUtils; +import org.apache.lucene.luke.app.desktop.util.TableUtils; +import org.apache.lucene.luke.models.analysis.Analysis; + +/** Provider of the Step by step analyze result panel */ +public class StepByStepAnalyzeResultPanelProvider implements StepByStepAnalyzeResultPanelOperator { + + private final ComponentOperatorRegistry operatorRegistry; + + private final TokenAttributeDialogFactory tokenAttrDialogFactory; + + private final JTable charfilterTextsTable = new JTable(); + + private final JTable charfilterTextsRowHeader = new JTable(); + + private final JTable namedTokensTable = new JTable(); + + private final JTable namedTokensRowHeader = new JTable(); + + private final ListenerFunctions listeners = new ListenerFunctions(); + + private Analysis analysisModel; + + private Analysis.StepByStepResult result; + + public StepByStepAnalyzeResultPanelProvider(TokenAttributeDialogFactory tokenAttrDialogFactory) { + this.operatorRegistry = ComponentOperatorRegistry.getInstance(); + operatorRegistry.register(StepByStepAnalyzeResultPanelOperator.class, this); + this.tokenAttrDialogFactory = tokenAttrDialogFactory; + } + + public JPanel get() { + JPanel panel = new JPanel(new BorderLayout()); + panel.setOpaque(false); + + JPanel hint = new JPanel(new FlowLayout(FlowLayout.LEADING)); + hint.setOpaque(false); + hint.add(new JLabel(MessageUtils.getLocalizedMessage("analysis.hint.show_attributes_step_by_step"))); + panel.add(hint, BorderLayout.PAGE_START); + + TableUtils.setupTable(charfilterTextsRowHeader, ListSelectionModel.SINGLE_SELECTION, new RowHeaderTableModel(), + null); + TableUtils.setupTable(charfilterTextsTable, ListSelectionModel.SINGLE_SELECTION, new CharfilterTextTableModel(), + null); + + TableUtils.setupTable(namedTokensRowHeader, ListSelectionModel.SINGLE_SELECTION, new RowHeaderTableModel(), + null); + TableUtils.setupTable(namedTokensTable, ListSelectionModel.SINGLE_SELECTION, new NamedTokensTableModel(), + new MouseAdapter() { + @Override + public void mouseClicked(MouseEvent e) { + listeners.showAttributeValues(e); + } + }); + namedTokensTable.setColumnSelectionAllowed(true); + JSplitPane inner = new JSplitPane(JSplitPane.VERTICAL_SPLIT, initResultScroll(charfilterTextsTable, charfilterTextsRowHeader), initResultScroll(namedTokensTable, namedTokensRowHeader)); + inner.setDividerLocation(60); + + panel.add(inner, BorderLayout.CENTER); + return panel; + } + + private JScrollPane initResultScroll(JTable table, JTable header) { + JScrollPane scroll = new JScrollPane(table); + scroll.setRowHeaderView(header); + scroll.setCorner(JScrollPane.UPPER_LEFT_CORNER, header.getTableHeader()); + Dimension tsz = new Dimension(200, header.getPreferredSize().height); + scroll.getRowHeader().setPreferredSize(tsz); + return scroll; + } + + + @Override + public void setAnalysisModel(Analysis analysisModel) { + this.analysisModel = analysisModel; + } + + @Override + public void executeAnalysisStepByStep(String text) { + result = analysisModel.analyzeStepByStep(text); + RowHeaderTableModel charfilterTextsHeaderModel = new RowHeaderTableModel(result.getCharfilteredTexts()); + charfilterTextsRowHeader.setModel(charfilterTextsHeaderModel); + charfilterTextsRowHeader.setShowGrid(true); + + CharfilterTextTableModel charfilterTextTableModel = new CharfilterTextTableModel(result.getCharfilteredTexts()); + charfilterTextsTable.setModel(charfilterTextTableModel); + charfilterTextsTable.setShowGrid(true); + + RowHeaderTableModel namedTokensHeaderModel = new RowHeaderTableModel(result.getNamedTokens()); + namedTokensRowHeader.setModel(namedTokensHeaderModel); + namedTokensRowHeader.setShowGrid(true); + + NamedTokensTableModel tableModel = new NamedTokensTableModel(result.getNamedTokens()); + namedTokensTable.setModel(tableModel); + namedTokensTable.setShowGrid(true); + for (int i = 0; i < tableModel.getColumnCount(); i++) { + namedTokensTable.getColumnModel().getColumn(i).setPreferredWidth(tableModel.getColumnWidth(i)); + } + } + + @Override + public void clearTable() { + TableUtils.setupTable(charfilterTextsRowHeader, ListSelectionModel.SINGLE_SELECTION, new RowHeaderTableModel(), + null); + TableUtils.setupTable(charfilterTextsTable, ListSelectionModel.SINGLE_SELECTION, new CharfilterTextTableModel(), + null); + + TableUtils.setupTable(namedTokensRowHeader, ListSelectionModel.SINGLE_SELECTION, new RowHeaderTableModel(), + null); + TableUtils.setupTable(namedTokensTable, ListSelectionModel.SINGLE_SELECTION, new NamedTokensTableModel(), + null); + } + + private void showAttributeValues(int rowIndex, int columnIndex) { + Analysis.NamedTokens namedTokens = + this.result.getNamedTokens().get(rowIndex); + List tokens = namedTokens.getTokens(); + + if (rowIndex <= tokens.size()) { + String term = "\"" + tokens.get(columnIndex).getTerm() + "\" BY " + namedTokens.getName(); + List attributes = tokens.get(columnIndex).getAttributes(); + new DialogOpener<>(tokenAttrDialogFactory).open("Token Attributes", 650, 400, + factory -> { + factory.setTerm(term); + factory.setAttributes(attributes); + }); + } + } + + private class ListenerFunctions { + void showAttributeValues(MouseEvent e) { + if (e.getClickCount() != 2 || e.isConsumed()) { + return; + } + int rowIndex = namedTokensTable.rowAtPoint(e.getPoint()); + int columnIndex = namedTokensTable.columnAtPoint(e.getPoint()); + if (rowIndex < 0 || rowIndex >= namedTokensTable.getRowCount()) { + return; + } else if (columnIndex < 0 || columnIndex >= namedTokensTable.getColumnCount()) { + return; + } + StepByStepAnalyzeResultPanelProvider.this.showAttributeValues(rowIndex, columnIndex); + } + } + + /** Table model for row header (display charfilter/tokenizer/filter name) */ + private static class RowHeaderTableModel extends TableModelBase { + + enum Column implements TableColumnInfo { + NAME("Name", 0, String.class, 200); + + private final String colName; + private final int index; + private final Class type; + private final int width; + + Column(String colName, int index, Class type, int width) { + this.colName = colName; + this.index = index; + this.type = type; + this.width = width; + } + + @Override + public String getColName() { + return colName; + } + + @Override + public int getIndex() { + return index; + } + + @Override + public Class getType() { + return type; + } + + @Override + public int getColumnWidth() { + return width; + } + } + + RowHeaderTableModel() { + super(); + } + + RowHeaderTableModel(List namedObjects) { + super(namedObjects.size()); + for (int i = 0; i < namedObjects.size(); i++) { + data[i][0] = shortenName(namedObjects.get(i).getName()); + } + } + + @Override + protected Column[] columnInfos() { + return Column.values(); + } + } + + /** Table model for charfilter result */ + private static class CharfilterTextTableModel extends TableModelBase { + + enum Column implements TableColumnInfo { + TEXT("Text", 0, String.class, 1000); + + private final String colName; + private final int index; + private final Class type; + private final int width; + + Column(String colName, int index, Class type, int width) { + this.colName = colName; + this.index = index; + this.type = type; + this.width = width; + } + + @Override + public String getColName() { + return colName; + } + + @Override + public int getIndex() { + return index; + } + + @Override + public Class getType() { + return type; + } + + @Override + public int getColumnWidth() { + return width; + } + } + + CharfilterTextTableModel() { + super(); + } + + CharfilterTextTableModel(List charfilteredTexts) { + super(charfilteredTexts.size()); + for (int i = 0; i < charfilteredTexts.size(); i++) { + data[i][Column.TEXT.getIndex()] = charfilteredTexts.get(i).getText(); + } + } + + @Override + protected Column[] columnInfos() { + return Column.values(); + } + } + + /** Table model for tokenizer/filter result */ + private static class NamedTokensTableModel extends AbstractTableModel { + + class Column implements TableColumnInfo { + + private final String colName; + private final int index; + private final Class type; + private final int width; + + Column(String colName, int index, Class type, int width) { + this.colName = colName; + this.index = index; + this.type = type; + this.width = width; + } + + @Override + public String getColName() { + return colName; + } + + @Override + public int getIndex() { + return index; + } + + @Override + public Class getType() { + return type; + } + + @Override + public int getColumnWidth() { + return width; + } + } + + private final Map columnMap = new TreeMap<>(); + + private final Object[][] data; + + + NamedTokensTableModel() { + this.data = new Object[0][0]; + } + + // Currently this only show each tokenizer/filters result independently, + // so the result doesn't show deletion/separation by next filter, + // e.g. "library" by WordDelimiterFilter is different position between other output. + NamedTokensTableModel(List namedTokens) { + int maxColumnSize = 0; + Analysis.NamedTokens namedToken; + for (Analysis.NamedTokens tokens : namedTokens) { + namedToken = tokens; + if (maxColumnSize < namedToken.getTokens().size()) { + maxColumnSize = namedToken.getTokens().size(); + } + } + int rowSize = namedTokens.size(); + this.data = new Object[rowSize][maxColumnSize]; + + for (int i = 0; i < namedTokens.size(); i++) { + namedToken = namedTokens.get(i); + data[i][0] = shortenName(namedToken.getName()); + for (int j = 0; j < namedToken.getTokens().size(); j++) { + Analysis.Token token = namedToken.getTokens().get(j); + data[i][j] = token.getTerm(); + if (maxColumnSize == namedToken.getTokens().size()) { + columnMap.put(j, new Column(String.valueOf(j), j, String.class, 200)); + } + } + } + } + + @Override + public int getRowCount() { + return data.length; + } + + @Override + public int getColumnCount() { + return columnMap.size(); + } + + + @Override + public String getColumnName(int colIndex) { + if (columnMap.containsKey(colIndex)) { + return columnMap.get(colIndex).getColName(); + } + return ""; + } + + @Override + public Class getColumnClass(int colIndex) { + if (columnMap.containsKey(colIndex)) { + return columnMap.get(colIndex).getType(); + } + return Object.class; + } + + @Override + public Object getValueAt(int rowIndex, int columnIndex) { + return data[rowIndex][columnIndex]; + } + + public int getColumnWidth(int columnIndex) { + return columnMap.get(columnIndex).getColumnWidth(); + } + } + + private static String shortenName(String name) { + return name.substring(name.lastIndexOf('.') + 1); + } + +} diff --git a/lucene/luke/src/java/org/apache/lucene/luke/models/analysis/Analysis.java b/lucene/luke/src/java/org/apache/lucene/luke/models/analysis/Analysis.java index 8b640ee2dd0..8e299982361 100644 --- a/lucene/luke/src/java/org/apache/lucene/luke/models/analysis/Analysis.java +++ b/lucene/luke/src/java/org/apache/lucene/luke/models/analysis/Analysis.java @@ -88,6 +88,73 @@ public interface Analysis { } } + + /** Base class for named object */ + abstract class NamedObject { + private final String name; + + NamedObject(String name) { + this.name = name; + } + + public String getName() { + return name; + } + } + + /** + * Holder for a pair tokenizer/filter and token list + */ + class NamedTokens extends NamedObject { + private final List tokens; + + NamedTokens(String name, List tokens) { + super(name); + this.tokens = tokens; + } + + public List getTokens() { + return tokens; + } + } + + /** + * Holder for a charfilter name and text that output by the charfilter + */ + class CharfilteredText extends NamedObject { + private final String text; + + public CharfilteredText(String name, String text) { + super(name); + this.text = text; + } + + public String getText() { + return text; + } + } + + /** + * Step-by-step analysis result holder. + */ + class StepByStepResult { + private List charfilteredTexts; + private List namedTokens; + + public StepByStepResult(List charfilteredTexts, List namedTokens) { + this.charfilteredTexts = charfilteredTexts; + this.namedTokens = namedTokens; + } + + public List getCharfilteredTexts() { + return charfilteredTexts; + } + + public List getNamedTokens() { + return namedTokens; + } + } + /** * Returns built-in {@link Analyzer}s. */ @@ -149,4 +216,13 @@ public interface Analysis { */ void addExternalJars(List jarFiles); + + /** + * Analyzes given text with the current Analyzer. + * + * @param text - text string to analyze + * @return the list of text by charfilter and the list of pair of Tokenizer/TokenFilter name and tokens + */ + StepByStepResult analyzeStepByStep(String text); + } diff --git a/lucene/luke/src/java/org/apache/lucene/luke/models/analysis/AnalysisImpl.java b/lucene/luke/src/java/org/apache/lucene/luke/models/analysis/AnalysisImpl.java index 7d76b8f32a8..be821a82077 100644 --- a/lucene/luke/src/java/org/apache/lucene/luke/models/analysis/AnalysisImpl.java +++ b/lucene/luke/src/java/org/apache/lucene/luke/models/analysis/AnalysisImpl.java @@ -18,6 +18,8 @@ package org.apache.lucene.luke.models.analysis; import java.io.IOException; +import java.io.Reader; +import java.io.StringReader; import java.lang.reflect.Modifier; import java.net.URL; import java.net.URLClassLoader; @@ -39,6 +41,7 @@ import java.util.stream.Collectors; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.custom.CustomAnalyzer; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.util.CharFilterFactory; @@ -47,6 +50,8 @@ import org.apache.lucene.analysis.util.TokenizerFactory; import org.apache.lucene.luke.models.LukeException; import org.apache.lucene.luke.util.reflection.ClassScanner; import org.apache.lucene.util.AttributeImpl; +import org.apache.lucene.util.AttributeSource; +import org.apache.lucene.util.IOUtils; /** Default implementation of {@link AnalysisImpl} */ public final class AnalysisImpl implements Analysis { @@ -132,7 +137,6 @@ public final class AnalysisImpl implements Analysis { try { List result = new ArrayList<>(); - TokenStream stream = analyzer.tokenStream("", text); stream.reset(); @@ -140,19 +144,7 @@ public final class AnalysisImpl implements Analysis { // iterate tokens while (stream.incrementToken()) { - List attributes = new ArrayList<>(); - Iterator itr = stream.getAttributeImplsIterator(); - - while (itr.hasNext()) { - AttributeImpl att = itr.next(); - Map attValues = new LinkedHashMap<>(); - att.reflectWith((attClass, key, value) -> { - if (value != null) - attValues.put(key, value.toString()); - }); - attributes.add(new TokenAttribute(att.getClass().getSimpleName(), attValues)); - } - + List attributes = copyAttributes(stream, charAtt); result.add(new Token(charAtt.toString(), attributes)); } stream.close(); @@ -163,6 +155,21 @@ public final class AnalysisImpl implements Analysis { } } + private List copyAttributes(TokenStream tokenStream, CharTermAttribute charAtt) { + List attributes = new ArrayList<>(); + Iterator itr = tokenStream.getAttributeImplsIterator(); + while(itr.hasNext()) { + AttributeImpl att = itr.next(); + Map attValues = new LinkedHashMap<>(); + att.reflectWith((attClass, key, value) -> { + if (value != null) + attValues.put(key, value.toString()); + }); + attributes.add(new TokenAttribute(att.getClass().getSimpleName(), attValues)); + } + return attributes; + } + @Override public Analyzer createAnalyzerFromClassName(String analyzerType) { Objects.requireNonNull(analyzerType); @@ -214,4 +221,160 @@ public final class AnalysisImpl implements Analysis { return analyzer; } + @Override + public StepByStepResult analyzeStepByStep(String text){ + Objects.requireNonNull(text); + if (analyzer == null) { + throw new LukeException("Analyzer is not set."); + } + + if (!(analyzer instanceof CustomAnalyzer)) { + throw new LukeException("Analyzer is not CustomAnalyzer."); + } + + List namedTokens = new ArrayList<>(); + List charfilteredTexts = new ArrayList<>(); + + try { + CustomAnalyzer customAnalyzer = (CustomAnalyzer)analyzer; + final List charFilterFactories = customAnalyzer.getCharFilterFactories(); + Reader reader = new StringReader(text); + String charFilteredSource = text; + if (charFilterFactories.size() > 0) { + Reader cs = reader; + for (CharFilterFactory charFilterFactory : charFilterFactories) { + cs = charFilterFactory.create(reader); + Reader readerForWriteOut = new StringReader(charFilteredSource); + readerForWriteOut = charFilterFactory.create(readerForWriteOut); + charFilteredSource = writeCharStream(readerForWriteOut); + charfilteredTexts.add(new CharfilteredText(readerForWriteOut.getClass().getName(), charFilteredSource)); + } + reader = cs; + } + + final TokenizerFactory tokenizerFactory = customAnalyzer.getTokenizerFactory(); + final List tokenFilterFactories = customAnalyzer.getTokenFilterFactories(); + + TokenStream tokenStream = tokenizerFactory.create(); + ((Tokenizer)tokenStream).setReader(reader); + List tokens = new ArrayList<>(); + List attributeSources = analyzeTokenStream(tokenStream, tokens); + namedTokens.add(new NamedTokens(tokenStream.getClass().getName(), tokens)); + ListBasedTokenStream listBasedTokenStream = new ListBasedTokenStream(tokenStream, attributeSources); + for (TokenFilterFactory tokenFilterFactory : tokenFilterFactories) { + tokenStream = tokenFilterFactory.create(listBasedTokenStream); + tokens = new ArrayList<>(); + attributeSources = analyzeTokenStream(tokenStream, tokens); + namedTokens.add(new NamedTokens(tokenStream.getClass().getName(), tokens)); + try { + listBasedTokenStream.close(); + } catch (IOException e) { + // do nothing; + } + listBasedTokenStream = new ListBasedTokenStream(listBasedTokenStream, attributeSources); + } + try { + listBasedTokenStream.close(); + } catch (IOException e) { + // do nothing. + } finally { + reader.close(); + } + return new StepByStepResult(charfilteredTexts, namedTokens); + } catch (Exception e) { + throw new LukeException(e.getMessage(), e); + } + } + + /** + * Analyzes the given TokenStream, collecting the Tokens it produces. + * + * @param tokenStream TokenStream to analyze + * + * @return List of tokens produced from the TokenStream + */ + private List analyzeTokenStream(TokenStream tokenStream, List result) { + final List tokens = new ArrayList<>(); + try { + tokenStream.reset(); + CharTermAttribute charAtt = tokenStream.getAttribute(CharTermAttribute.class); + while (tokenStream.incrementToken()) { + tokens.add(tokenStream.cloneAttributes()); + List attributes = copyAttributes(tokenStream, charAtt); + result.add(new Token(charAtt.toString(), attributes)); + } + tokenStream.end(); + } catch (IOException ioe) { + throw new RuntimeException("Error occurred while iterating over TokenStream", ioe); + } finally { + IOUtils.closeWhileHandlingException(tokenStream); + } + return tokens; + } + + /** + * TokenStream that iterates over a list of pre-existing Tokens + * see org.apache.solr.handler.AnalysisRequestHandlerBase#ListBasedTokenStream + */ + protected final static class ListBasedTokenStream extends TokenStream { + private final List tokens; + private Iterator tokenIterator; + + /** + * Creates a new ListBasedTokenStream which uses the given tokens as its token source. + * + * @param attributeSource source of the attribute factory and attribute impls + * @param tokens Source of tokens to be used + */ + ListBasedTokenStream(AttributeSource attributeSource, List tokens) { + super(attributeSource.getAttributeFactory()); + this.tokens = tokens; + // Make sure all the attributes of the source are here too + addAttributes(attributeSource); + } + + @Override + public void reset() throws IOException { + super.reset(); + tokenIterator = tokens.iterator(); + } + + @Override + public boolean incrementToken() { + if (tokenIterator.hasNext()) { + clearAttributes(); + AttributeSource next = tokenIterator.next(); + addAttributes(next); + next.copyTo(this); + return true; + } else { + return false; + } + } + + void addAttributes(AttributeSource attributeSource) { + Iterator atts = attributeSource.getAttributeImplsIterator(); + while (atts.hasNext()) { + addAttributeImpl(atts.next()); // adds both impl & interfaces + } + } + } + + private static String writeCharStream(Reader input ){ + final int BUFFER_SIZE = 1024; + char[] buf = new char[BUFFER_SIZE]; + int len = 0; + StringBuilder sb = new StringBuilder(); + do { + try { + len = input.read( buf, 0, BUFFER_SIZE ); + } catch (IOException e) { + throw new RuntimeException("Error occurred while iterating over charfiltering", e); + } + if( len > 0 ) + sb.append(buf, 0, len); + } while( len == BUFFER_SIZE ); + return sb.toString(); + } + } diff --git a/lucene/luke/src/resources/org/apache/lucene/luke/app/desktop/messages/messages.properties b/lucene/luke/src/resources/org/apache/lucene/luke/app/desktop/messages/messages.properties index 94fe4063140..e6fed08d49e 100644 --- a/lucene/luke/src/resources/org/apache/lucene/luke/app/desktop/messages/messages.properties +++ b/lucene/luke/src/resources/org/apache/lucene/luke/app/desktop/messages/messages.properties @@ -172,6 +172,7 @@ analysis.radio.custom=Custom analysis.button.browse=Browse analysis.button.build_analyzser=Build Analyzer analysis.button.test=Test Analyzer +analysis.checkbox.step_by_step=Step By Step analysis.hyperlink.load_jars=Load external jars analysis.textarea.prompt=Apache Lucene is a high-performance, full-featured text search engine library. analysis.dialog.title.char_filter_params=CharFilter parameters @@ -186,6 +187,7 @@ analysis.dialog.chain.label.tokenfilters=Token Filters: analysis.message.build_success=Custom analyzer built successfully. analysis.message.empry_input=Please input text to analyze. analysis.hint.show_attributes=Hint: Double click the row to show all token attributes. +analysis.hint.show_attributes_step_by_step=Hint: Double click the cell to show all token attributes. analysis_preset.label.preset=Preset analyzers: analysis_custom.label.charfilters=Char Filters analysis_custom.label.tokenizer=Tokenizer diff --git a/lucene/luke/src/test/org/apache/lucene/luke/models/analysis/AnalysisImplTest.java b/lucene/luke/src/test/org/apache/lucene/luke/models/analysis/AnalysisImplTest.java index 39e8eca1e78..b0a2c6868dd 100644 --- a/lucene/luke/src/test/org/apache/lucene/luke/models/analysis/AnalysisImplTest.java +++ b/lucene/luke/src/test/org/apache/lucene/luke/models/analysis/AnalysisImplTest.java @@ -132,5 +132,45 @@ public class AnalysisImplTest extends LuceneTestCase { analysis.analyze(text); } + @Test(expected = LukeException.class) + public void testAnalyzeStepByStep_preset() { + AnalysisImpl analysis = new AnalysisImpl(); + String analyzerType = "org.apache.lucene.analysis.standard.StandardAnalyzer"; + Analyzer analyzer = analysis.createAnalyzerFromClassName(analyzerType); + assertEquals(analyzerType, analyzer.getClass().getName()); + String text = "This test must fail."; + analysis.analyzeStepByStep(text); + } + + @Test + public void testAnalyzeStepByStep_custom() { + AnalysisImpl analysis = new AnalysisImpl(); + Map tkParams = new HashMap<>(); + tkParams.put("maxTokenLen", "128"); + CustomAnalyzerConfig.Builder builder = new CustomAnalyzerConfig.Builder("keyword", tkParams) + .addTokenFilterConfig("lowercase", Collections.emptyMap()) + .addCharFilterConfig("htmlstrip", Collections.emptyMap()); + CustomAnalyzer analyzer = (CustomAnalyzer) analysis.buildCustomAnalyzer(builder.build()); + assertEquals("org.apache.lucene.analysis.custom.CustomAnalyzer", analyzer.getClass().getName()); + assertEquals("org.apache.lucene.analysis.charfilter.HTMLStripCharFilterFactory", + analyzer.getCharFilterFactories().get(0).getClass().getName()); + assertEquals("org.apache.lucene.analysis.core.KeywordTokenizerFactory", + analyzer.getTokenizerFactory().getClass().getName()); + assertEquals("org.apache.lucene.analysis.core.LowerCaseFilterFactory", + analyzer.getTokenFilterFactories().get(0).getClass().getName()); + + String text = "Apache Lucene"; + Analysis.StepByStepResult result = analysis.analyzeStepByStep(text); + assertNotNull(result); + assertNotNull(result.getCharfilteredTexts()); + assertEquals(1,result.getCharfilteredTexts().size()); + assertEquals("org.apache.lucene.analysis.charfilter.HTMLStripCharFilter", result.getCharfilteredTexts().get(0).getName()); + + assertNotNull(result.getNamedTokens()); + assertEquals(2, result.getNamedTokens().size()); + //FIXME check each namedTokensList + assertEquals("org.apache.lucene.analysis.core.KeywordTokenizer", result.getNamedTokens().get(0).getName()); + assertEquals("org.apache.lucene.analysis.core.LowerCaseFilter", result.getNamedTokens().get(1).getName()); + } }