mirror of https://github.com/apache/lucene.git
117 lines
3.9 KiB
Perl
117 lines
3.9 KiB
Perl
#!/usr/bin/perl
|
|
#
|
|
# Licensed to the Apache Software Foundation (ASF) under one or more
|
|
# contributor license agreements. See the NOTICE file distributed with
|
|
# this work for additional information regarding copyright ownership.
|
|
# The ASF licenses this file to You under the Apache License, Version 2.0
|
|
# (the "License"); you may not use this file except in compliance with
|
|
# the License. You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
#
|
|
# ------------------------------------------
|
|
# compare.shingle.benchmark.jira.tables.pl
|
|
#
|
|
# Takes as cmdline parameters two JIRA-formatted benchmark results, as produced
|
|
# by shingle.bm2jira.pl (located in the same directory as this script), and
|
|
# outputs a third JIRA-formatted comparison table.
|
|
#
|
|
# The difference is calculated as a percentage:
|
|
#
|
|
# 100 * (unpatched-elapsed - patched-elapsed / patched-elapsed)
|
|
#
|
|
# where (un)patched-elapsed values have had the no-shingle-filter
|
|
# (StandardAnalyzer) elapsed time subtracted from them.
|
|
#
|
|
#
|
|
# Example shingle.bm2jira.pl output:
|
|
# ----------------------------------
|
|
# JAVA:
|
|
# java version "1.5.0_15"
|
|
# Java(TM) 2 Runtime Environment, Standard Edition (build 1.5.0_15-b04)
|
|
# Java HotSpot(TM) 64-Bit Server VM (build 1.5.0_15-b04, mixed mode)
|
|
#
|
|
# OS:
|
|
# cygwin
|
|
# WinVistaService Pack 2
|
|
# Service Pack 26060022202561
|
|
#
|
|
# ||Max Shingle Size||Unigrams?||Elapsed||
|
|
# |1 (Unigrams)|yes|2.19s|
|
|
# |2|no|4.74s|
|
|
# |2|yes|4.90s|
|
|
# |4|no|5.82s|
|
|
# |4|yes|5.97s|
|
|
|
|
use strict;
|
|
use warnings;
|
|
|
|
my $usage = "Usage: $0 <unpatched-file> <patched-file>\n";
|
|
|
|
die $usage unless ($#ARGV == 1 && -f $ARGV[0] && -f $ARGV[1]);
|
|
|
|
my %stats = ();
|
|
|
|
open UNPATCHED, "<$ARGV[0]" || die "ERROR opening '$ARGV[0]': $!";
|
|
my $table_encountered = 0;
|
|
my $standard_analyzer_elapsed = 0;
|
|
my %unpatched_stats = ();
|
|
my %patched_stats = ();
|
|
while (<UNPATCHED>) {
|
|
unless ($table_encountered) {
|
|
if (/\Q||Max Shingle Size||Unigrams?||Elapsed||\E/) {
|
|
$table_encountered = 1;
|
|
} else {
|
|
print;
|
|
}
|
|
} elsif (/\|([^|]+)\|([^|]+)\|([\d.]+)s\|/) {
|
|
my $max_shingle_size = $1;
|
|
my $output_unigrams = $2;
|
|
my $elapsed = $3;
|
|
if ($max_shingle_size =~ /Unigrams/) {
|
|
$standard_analyzer_elapsed = $elapsed;
|
|
} else {
|
|
$unpatched_stats{$max_shingle_size}{$output_unigrams} = $elapsed;
|
|
}
|
|
}
|
|
}
|
|
close UNPATCHED;
|
|
|
|
open PATCHED, "<$ARGV[1]" || die "ERROR opening '$ARGV[1]': $!";
|
|
while (<PATCHED>) {
|
|
if (/\|([^|]+)\|([^|]+)\|([\d.]+)s\|/) {
|
|
my $max_shingle_size = $1;
|
|
my $output_unigrams = $2;
|
|
my $elapsed = $3;
|
|
if ($max_shingle_size =~ /Unigrams/) {
|
|
$standard_analyzer_elapsed = $elapsed
|
|
if ($elapsed < $standard_analyzer_elapsed);
|
|
} else {
|
|
$patched_stats{$max_shingle_size}{$output_unigrams} = $elapsed;
|
|
}
|
|
}
|
|
}
|
|
close PATCHED;
|
|
|
|
print "||Max Shingle Size||Unigrams?||Unpatched||Patched||StandardAnalyzer||Improvement||\n";
|
|
for my $max_shingle_size (sort { $a <=> $b } keys %unpatched_stats) {
|
|
for my $output_unigrams (sort keys %{$unpatched_stats{$max_shingle_size}}) {
|
|
my $improvement
|
|
= ( $unpatched_stats{$max_shingle_size}{$output_unigrams}
|
|
- $patched_stats{$max_shingle_size}{$output_unigrams})
|
|
/ ( $patched_stats{$max_shingle_size}{$output_unigrams}
|
|
- $standard_analyzer_elapsed);
|
|
$improvement = int($improvement * 1000 + .5) / 10; # Round and truncate
|
|
printf "|$max_shingle_size|$output_unigrams"
|
|
."|$unpatched_stats{$max_shingle_size}{$output_unigrams}s"
|
|
."|$patched_stats{$max_shingle_size}{$output_unigrams}s"
|
|
."|${standard_analyzer_elapsed}s|%2.1f%%|\n", $improvement;
|
|
}
|
|
}
|