mirror of
https://github.com/apache/druid.git
synced 2025-03-08 18:39:45 +00:00
254 lines
5.9 KiB
R
254 lines
5.9 KiB
R
library(stringr)
|
|
library(xtable)
|
|
library(plyr)
|
|
library(ggplot2)
|
|
|
|
stringToDF <- function(x, ncol){
|
|
m <- matrix(unlist(str_split(x, "\t|\n")), ncol = ncol, byrow = TRUE)
|
|
df <- data.frame(m[-1, ], stringsAsFactors = FALSE)
|
|
names(df) <- m[1, ]
|
|
df
|
|
}
|
|
|
|
##print(xtable(stringToDF(x, 3)), include.rownames = FALSE)
|
|
|
|
stringToDF2 <- function(x, query){
|
|
m <- matrix(unlist(str_split(x, "\t|\n")), ncol = 3, byrow = TRUE)
|
|
df <- data.frame(m[-1, ], stringsAsFactors = FALSE)
|
|
names(df) <- m[1, ]
|
|
df[, 2] <- as.numeric(str_replace_all(df[, 2], ",", ""))
|
|
df[, 3] <- as.numeric(str_replace_all(df[, 3], ",", ""))
|
|
df2 <- ldply(1:nrow(df), function(i) gsub("^\\s+|\\s+$", "", str_split(df[i, 1], ",")[[1]]))
|
|
names(df2) <- c("cores", "nodes", "configuration")
|
|
df2$query <- query
|
|
cbind(df, df2)
|
|
}
|
|
|
|
x2 <- "Cluster
|
|
Cluster scan rate (rows/sec)
|
|
Core scan rate
|
|
15-core, 100 nodes, in-memory
|
|
26,610,386,635
|
|
17,740,258
|
|
15-core, 75 nodes, mmap
|
|
25,224,873,928
|
|
22,422,110
|
|
15-core, 50 nodes, mmap
|
|
20,387,152,160
|
|
27,182,870
|
|
15-core, 25 nodes, mmap
|
|
11,910,388,894
|
|
31,761,037
|
|
4-core, 131 nodes, in-memory
|
|
10,008,730,163
|
|
19,100,630
|
|
4-core, 131 nodes, mmap
|
|
10,129,695,120
|
|
19,331,479
|
|
4-core, 50 nodes, mmap
|
|
6,626,570,688
|
|
33,132,853"
|
|
|
|
|
|
x3 <- "Cluster
|
|
Cluster scan rate (rows/sec)
|
|
Core scan rate
|
|
15-core, 100 nodes, in-memory
|
|
16,223,081,703
|
|
10,815,388
|
|
15-core, 75 nodes, mmap
|
|
9,860,968,285
|
|
8,765,305
|
|
15-core, 50 nodes, mmap
|
|
8,093,611,909
|
|
10,791,483
|
|
15-core, 25 nodes, mmap
|
|
4,126,502,352
|
|
11,004,006
|
|
4-core, 131 nodes, in-memory
|
|
5,755,274,389
|
|
10,983,348
|
|
4-core, 131 nodes, mmap
|
|
5,032,185,657
|
|
9,603,408
|
|
4-core, 50 nodes, mmap
|
|
1,720,238,609
|
|
8,601,193"
|
|
|
|
x4 <- "Cluster
|
|
Cluster scan rate (rows/sec)
|
|
Core scan rate
|
|
15-core, 100 nodes, in-memory
|
|
7,591,604,822
|
|
5,061,070
|
|
15-core, 75 nodes, mmap
|
|
4,319,179,995
|
|
3,839,271
|
|
15-core, 50 nodes, mmap
|
|
3,406,554,102
|
|
4,542,072
|
|
15-core, 25 nodes, mmap
|
|
1,826,451,888
|
|
4,870,538
|
|
4-core, 131 nodes, in-memory
|
|
1,936,648,601
|
|
3,695,894
|
|
4-core, 131 nodes, mmap
|
|
2,210,367,152
|
|
4,218,258
|
|
4-core, 50 nodes, mmap
|
|
1,002,291,562
|
|
5,011,458"
|
|
|
|
x5 <- "Cluster
|
|
Cluster scan rate (rows/sec)
|
|
Core scan rate
|
|
15-core, 100 nodes, in-memory
|
|
10,241,183,745
|
|
6,827,456
|
|
15-core, 75 nodes, mmap
|
|
4,891,097,559
|
|
4,347,642
|
|
15-core, 50 nodes, mmap
|
|
3,616,707,511
|
|
4,822,277
|
|
15-core, 25 nodes, mmap
|
|
1,665,053,263
|
|
4,440,142
|
|
4-core, 131 nodes, in-memory
|
|
4,388,159,569
|
|
8,374,350
|
|
4-core, 131 nodes, mmap
|
|
2,444,344,232
|
|
4,664,779
|
|
4-core, 50 nodes, mmap
|
|
1,215,737,558
|
|
6,078,688"
|
|
|
|
x6 <- "Cluster
|
|
Cluster scan rate (rows/sec)
|
|
Core scan rate
|
|
15-core, 100 nodes, in-memory
|
|
7,309,984,688
|
|
4,873,323
|
|
15-core, 75 nodes, mmap
|
|
3,333,628,777
|
|
2,963,226
|
|
15-core, 50 nodes, mmap
|
|
2,555,300,237
|
|
3,407,067
|
|
15-core, 25 nodes, mmap
|
|
1,384,674,717
|
|
3,692,466
|
|
4-core, 131 nodes, in-memory
|
|
3,237,907,984
|
|
6,179,214
|
|
4-core, 131 nodes, mmap
|
|
1,740,481,380
|
|
3,321,529
|
|
4-core, 50 nodes, mmap
|
|
863,170,420
|
|
4,315,852"
|
|
|
|
x7 <- "Cluster
|
|
Cluster scan rate (rows/sec)
|
|
Core scan rate
|
|
15-core, 100 nodes, in-memory
|
|
4,064,424,274
|
|
2,709,616
|
|
15-core, 75 nodes, mmap
|
|
2,014,067,386
|
|
1,790,282
|
|
15-core, 50 nodes, mmap
|
|
1,499,452,617
|
|
1,999,270
|
|
15-core, 25 nodes, mmap
|
|
810,143,518
|
|
2,160,383
|
|
4-core, 131 nodes, in-memory
|
|
1,670,214,695
|
|
3,187,433
|
|
4-core, 131 nodes, mmap
|
|
1,116,635,690
|
|
2,130,984
|
|
4-core, 50 nodes, mmap
|
|
531,389,163
|
|
2,656,946"
|
|
|
|
dat <- ldply(2:7, function(i){
|
|
df <- eval(parse(text = paste("x", i, sep = "")))
|
|
stringToDF2(df, paste("Query", i - 1, sep = " "))
|
|
})
|
|
|
|
ggplot(data = dat, aes(
|
|
x = as.numeric(str_extract(nodes, "[0-9]+")),
|
|
y = `Cluster scan rate (rows/sec)` / 1e9,
|
|
shape = configuration,
|
|
color = query,
|
|
size = factor(cores, levels = c("4-core", "15-core"))
|
|
)) +
|
|
scale_size_manual(values = c(3, 6), guide = guide_legend(title = "Cores Per Node")) +
|
|
scale_color_discrete(guide = guide_legend(title = "Query")) +
|
|
scale_shape_discrete(guide = guide_legend(title = "Configuration")) +
|
|
geom_point() +
|
|
scale_y_log10(breaks = 2^(-1:5)) +
|
|
facet_grid(configuration~.) +
|
|
xlab("Number of Nodes") +
|
|
ylab("Cluster Scan Rate (billion rows/sec.)")
|
|
|
|
dat2 <- subset(dat, cores == "15-core" & configuration == "mmap")
|
|
dat2$x <- as.numeric(str_extract(dat2$nodes, "[0-9]+"))
|
|
baseRate <- list()
|
|
d_ply(subset(dat2, x == 25), .(query), function(df) baseRate[df$query] <<- df$`Cluster scan rate (rows/sec)`)
|
|
dat2 <- ddply(dat2, .(query, x), function(df){
|
|
df$projected <- df$x / 25 * unlist(baseRate[df$query])
|
|
df
|
|
})
|
|
|
|
ggplot(data = dat2, aes(
|
|
x = as.numeric(str_extract(nodes, "[0-9]+")),
|
|
y = `Cluster scan rate (rows/sec)` / 1e9,
|
|
color = query,
|
|
shape = query
|
|
)) +
|
|
# scale_y_log10(breaks = 2^(-1:5)) +
|
|
# scale_color_discrete(guide = guide_legend(title = "Query")) +
|
|
geom_point(size = 3) +
|
|
# geom_path(aes(y = projected)) +
|
|
geom_line(aes(y = projected / 1e9)) +
|
|
# scale_y_log10() +
|
|
xlab("Number of Nodes") +
|
|
ylab("Cluster Scan Rate (billion rows/sec.)")
|
|
ggsave("../figures/cluster_scan_rate.pdf", width = 4, height = 3)
|
|
|
|
|
|
|
|
## ggplot(data = dat, aes(
|
|
## x = as.numeric(str_extract(nodes, "[0-9]+")),
|
|
## y = `Core scan rate` / 1e6,
|
|
## shape = configuration,
|
|
## color = query,
|
|
## size = factor(cores, levels = c("4-core", "15-core"))
|
|
## )) +
|
|
## scale_size_manual(values = c(3, 6), guide = guide_legend(title = "Cores Per Node")) +
|
|
## scale_color_discrete(guide = guide_legend(title = "Query")) +
|
|
## scale_shape_discrete(guide = guide_legend(title = "Configuration")) +
|
|
## geom_point() +
|
|
## scale_y_log10(breaks = 2^(-1:5)) +
|
|
## xlab("Number of Nodes") +
|
|
## ylab("Core Scan Rate (million rows/sec.)")
|
|
|
|
ggplot(data = dat2, aes(
|
|
x = as.numeric(str_extract(nodes, "[0-9]+")),
|
|
y = `Core scan rate` / 1e6,
|
|
color = query,
|
|
shape = query
|
|
)) +
|
|
|
|
geom_point(size = 3) +
|
|
xlab("Number of Nodes") +
|
|
ylab("Core Scan Rate (million rows/sec.)")
|
|
ggsave("../figures/core_scan_rate.pdf", width = 4, height = 3)
|
|
|
|
|