--- title: "BLAS libraries benchmarks" author: "Andrzej Wójtowicz" output: html_document: keep_md: yes --- Document generation date: `r Sys.time()` ```{r checkpoint, echo=FALSE, warning=FALSE, message=FALSE} library(checkpoint) checkpoint("2016-04-01", scanForPackages=FALSE, verbose=FALSE) library(reshape2) library(dplyr) library(ggplot2) library(RColorBrewer) ``` ## Table of Contents 1. [Configuration](#configuration) 2. [Results per host](#results-per-host) * [Intel Core i7-4790K + MSI GeForce GTX 980 Ti Lightning](#intel-core-i7-4790k--msi-geforce-gtx-980-ti-lightning) * [Intel Core i5-4590 + NVIDIA GeForce GT 430](#intel-core-i5-4590--nvidia-geforce-gt-430) * [Intel Core i5-4590 + NVIDIA GeForce GTX 750 Ti](#intel-core-i5-4590--nvidia-geforce-gtx-750-ti) * [Intel Core i5-3570](#intel-core-i5-3570) * [Intel Core i3-2120](#intel-core-i3-2120) * [Intel Core i3-3120M](#intel-core-i3-3120m) * [Intel Core i5-3317U + NVIDIA GeForce GT 630M](#intel-core-i5-3317u--nvidia-geforce-gt-630m) * [Intel Pentium Dual-Core E5300](#intel-pentium-dual-core-e5300) 3. [Results per library](#results-per-library) * [Netlib](#netlib) * [Atlas (st)](#atlas-st) * [OpenBLAS](#openblas) * [Atlas (mt)](#atlas-mt) * [GotoBLAS2](#gotoblas2) * [MKL](#mkl) * [BLIS](#blis) * [cuBLAS](#cublas) *** ## Configuration **OS**: Debian Jessie, kernel 4.4 **R software**: [Microsoft R Open](https://mran.microsoft.com/open/) (3.2.4) **Libraries**: |CPU (single-threaded)|CPU (multi-threaded)|GPU| |---|---|---| |[Netlib](http://www.netlib.org/) (debian package, blas 1.2.20110419, lapack 3.5.0)|[OpenBLAS](http://www.openblas.net/) (debian package, 0.2.12)|[NVIDIA cuBLAS](https://developer.nvidia.com/cublas) (NVBLAS 6.5 + Intel MKL)| |[ATLAS](http://math-atlas.sourceforge.net/) (debian package, 3.10.2)|[ATLAS](http://math-atlas.sourceforge.net/) (dev branch, 3.11.38)| | | |[GotoBLAS2](https://prs.ism.ac.jp/~nakama/SurviveGotoBLAS2/) (Survive fork, 3.141)| | | |[Intel MKL](https://mran.microsoft.com/download/) (part of RevoMath package, 3.2.4)| | | |[BLIS](https://github.com/flame/blis) (dev branch, 0.2.0+/17.05.2016)| | **Hosts**: |No.|CPU|GPU| |---|---|---| |1.|[Intel Core i7-4790K](http://ark.intel.com/products/80807/Intel-Core-i7-4790K-Processor-8M-Cache-up-to-4_40-GHz) (OC 4.5 GHz)|[MSI GeForce GTX 980 Ti Lightning](https://us.msi.com/Graphics-card/GTX-980-Ti-LIGHTNING.html#hero-specification)| |2.|[Intel Core i5-4590](http://ark.intel.com/products/80815/Intel-Core-i5-4590-Processor-6M-Cache-up-to-3_70-GHz)|[NVIDIA GeForce GT 430](http://www.geforce.com/hardware/desktop-gpus/geforce-gt-430/specifications)| |3.|[Intel Core i5-4590](http://ark.intel.com/products/80815/Intel-Core-i5-4590-Processor-6M-Cache-up-to-3_70-GHz)|[NVIDIA GeForce GTX 750 Ti](http://www.geforce.com/hardware/desktop-gpus/geforce-gtx-750-ti/specifications)| |4.|[Intel Core i5-3570](http://ark.intel.com/products/65702/Intel-Core-i5-3570-Processor-6M-Cache-up-to-3_80-GHz)| - | |5.|[Intel Core i3-2120](http://ark.intel.com/products/53426/Intel-Core-i3-2120-Processor-3M-Cache-3_30-GHz)| - | |6.|[Intel Core i3-3120M](http://ark.intel.com/products/71465/Intel-Core-i3-3120M-Processor-3M-Cache-2_50-GHz)| - | |7.|[Intel Core i5-3317U](http://ark.intel.com/products/65707/Intel-Core-i5-3317U-Processor-3M-Cache-up-to-2_60-GHz)|[NVIDIA GeForce GT 630M](http://www.geforce.com/hardware/notebook-gpus/geforce-gt-630m/specifications)| |8.|[Intel Pentium Dual-Core E5300](http://ark.intel.com/products/35300/Intel-Pentium-Processor-E5300-2M-Cache-2_60-GHz-800-MHz-FSB)| - | **Benchmarks**: [R-benchmark-25](http://r.research.att.com/benchmarks/R-benchmark-25.R), [Revolution](https://gist.github.com/andrie/24c9672f1ea39af89c66#file-rro-mkl-benchmark-r), [Gcbd](https://cran.r-project.org/web/packages/gcbd/vignettes/gcbd.pdf). ```{r config, echo=FALSE} HOSTNAMES = suppressWarnings(readLines("hosts-list.txt")) BENCHMARKS = c("urbanek", "revolution", "gcbd") LIBRARIES = c("netlib", "atlas_st", "openblas", "atlas_mt", "gotoblas2", "mkl", "blis", "cublas") LIBRARY.REF = "netlib" HOST.REF = "Intel Core i3-3120M" HOST.REF.GPU = "NVIDIA GeForce GT 430" HOST.INFO.PATTERN = "host-info-.log" BENCHMKAR.PATTERN = "test---.rds" RESULTS.DIR = "results" GEN.DIR = "gen" DATA.DIR = file.path(GEN.DIR, "data") IMAGES.DIR = file.path(GEN.DIR, "img") ``` ```{r read-write-data, echo=FALSE} for (d in c(GEN.DIR, DATA.DIR, IMAGES.DIR)) if (!dir.exists(d)) dir.create(d) hosts.info = data.frame(Host=character(), CPU=character(), GPU=character()) for (hostname in HOSTNAMES) { fp = file.path(RESULTS.DIR, gsub("", hostname, HOST.INFO.PATTERN)) if (file.exists(fp)) { dr = readLines(fp) dr = unname(sapply(dr, trimws)) hosts.info = rbind(hosts.info, data.frame(Host= hostname, CPU = gsub("\\(.*?\\)", "", dr[1]), GPU = ifelse(length(dr)==3, gsub("(NVIDIA|MSI).*\\[(.*)\\]", "\\1 \\2", dr[3]), NA))) } else { warning(paste("File", fp, "does not exist")) } } saveRDS(hosts.info, file.path(DATA.DIR, "hosts.info.rds")) benchmark.results = data.frame(Host=character(), Benchmark=character(), Library=character(), Test=character(), Runs=integer(), Size=integer(), Time=numeric()) for (host in hosts.info$Host) { for (benchmark in BENCHMARKS) { for (lib in LIBRARIES) { if (lib=="cublas" && is.na(hosts.info[which(hosts.info$Host==host), "GPU"])) next fp = file.path(RESULTS.DIR, gsub("", host, ( gsub("", lib, ( gsub("", benchmark, BENCHMKAR.PATTERN))))) ) if (file.exists(fp)) { db = readRDS(fp) runs = NA if (length(attr(db, "runs"))==1) { runs = attr(db, "runs") } else { # gcbd db = melt(cbind(db, runs=attr(db, "runs"), size=attr(db, "size")), id.vars=c("runs", "size"), variable.name="name", value.name="time") runs = db$runs } size = NA if ("size" %in% colnames(db)) # gcbd size = db$size benchmark.results = rbind(benchmark.results, data.frame(Host=host, Benchmark=benchmark, Library=lib, Test=db$name, Runs=runs, Size=size, Time=db$time)) } else { warning(paste("File", fp, "does not exist")) } } } } saveRDS(benchmark.results, file.path(DATA.DIR, "benchmark.results.rds")) ``` ```{r per-host, echo=FALSE, results='asis'} cat("# Results per host\n\n") for (host in hosts.info$Host) { host.cpu = hosts.info[which(hosts.info$Host==host), "CPU"] host.gpu = hosts.info[which(hosts.info$Host==host), "GPU"] cat(paste("##", host.cpu)) if (!is.na(host.gpu)) cat(paste(" +", host.gpu)) cat("\n\n\n\n") for (benchmark in BENCHMARKS) { cat(paste0("### ", ifelse(benchmark=="urbanek", "R-benchmark-25", # fix to common name paste(toupper(substring(benchmark, 1,1)), substring(benchmark, 2), " benchmark", sep="", collapse=" ")), "\n\n")) testnames = benchmark.results %>% filter(Benchmark == benchmark) %>% select(Test) %>% unique if (benchmark == "urbanek") # remove tests with almost the same results testnames = testnames %>% filter(row_number() %in% c(4, 5, 7, 8, 9, 10, 15)) for (i in 1:nrow(testnames)) { test = as.character(testnames[i, "Test"]) data.to.plot = benchmark.results %>% filter(Host == host & Benchmark == benchmark & Test == test) if (benchmark != "gcbd") data.to.plot = data.to.plot %>% mutate(PerfGain=data.to.plot[which(data.to.plot$Library==LIBRARY.REF), "Time"]/Time) renamedLibs = list( "cuBLAS"="cublas", "BLIS"="blis", "MKL"="mkl", "ATLAS (mt)"="atlas_mt", "OpenBLAS"="openblas", "GotoBLAS2"="gotoblas2", "ATLAS (st)"="atlas_st", "Netlib"="netlib") if (benchmark != "gcbd") levels(data.to.plot$Library) = renamedLibs else levels(data.to.plot$Library) = rev(renamedLibs) data.to.plot = data.to.plot %>% mutate(Color=sapply(Library, function(x){ if(x=="Netlib") "A" else if(x=="ATLAS (st)") "B" else if(x=="cuBLAS") "D" else "C"})) cat(paste0("#### ", # fix urbanek errors in names gsub(" \\(.*\\)", "", gsub("Linear regr. over a 3000x3000 matrix", "Linear regr. over a 2000x2000 matrix", gsub("Eigenvalues of a 640x640", "Eigenvalues of a 600x600", test)) ) ), "\n\n") if (any(data.to.plot$Time == Inf)) { cat(paste(as.character(data.to.plot[which(data.to.plot$Time == Inf), "Library"])), "hangs in this test\n\n") } data.to.plot = data.to.plot[data.to.plot$Time != Inf, ] image.path = file.path(IMAGES.DIR, paste0("img_ph", "_h", which(host == hosts.info$Host), "_b", which(benchmark == BENCHMARKS), "_t", i, ".png")) cat(paste0("Time in seconds ", ifelse(benchmark=="gcbd", "regarding matrix size - right panel on log scale", ""), " - ", ifelse(benchmark!="gcbd", data.to.plot$Runs[1], paste(" from ", max(data.to.plot$Runs), " to ", min(data.to.plot$Runs))), " runs - lower is better\n\n")) myColors = c("#E6191A", "#F25F1E", "#636363", "#5AAC45") names(myColors) = levels(data.to.plot$Color) colScale = scale_fill_manual(name = "Color", values = myColors) if (benchmark != "gcbd") { png(image.path, width=600, height=35*nrow(data.to.plot)) print(ggplot(data.to.plot, aes(x=reorder(Library, -Time), y=Time, fill=Color)) + theme_classic() + theme( panel.border = element_blank(), axis.title.x=element_blank(), axis.title.y=element_blank(), axis.text.x = element_text(colour="grey60"), axis.ticks.x=element_line(color="grey60"), axis.ticks.y=element_line(color="grey60"), legend.position="none" ) + scale_y_continuous(expand = c(0, 0), limits = c(0, 1.20*max(data.to.plot$Time))) + geom_bar(stat="identity", width=.7) + coord_flip() + geom_text(aes(y=Time, x=Library, label=format(Time, digits=2, nsmall=2)), hjust=-0.25, size=3, colour="grey45") + geom_text(aes(y=1.2*max(data.to.plot$Time), x=Library, label=paste0("x", trimws(format(PerfGain, digits=1, nsmall=1)))), hjust=1, size=4) + geom_hline(yintercept = 0, color="grey") + geom_vline(xintercept = 0.4, color="grey") + annotate("text", x=nrow(data.to.plot)+0.55, y=1.2*max(data.to.plot$Time), label="Performance gain", hjust=1) + colScale ) dev.off() cat(paste0("![](", image.path, ")\n\n\n\n")) } else { rbColors = c("#F40000","#FF8000","#0094FF","#E900FF","#B35807","#7F00FF","#FFBC70","#00DD0E") png(image.path, width=400, height=350, type="cairo") print(ggplot(data.to.plot, aes(x=Size, y=Time, group=Library, color=Library, shape=Library, fill=Library)) + theme_classic() + theme( panel.border = element_blank(), axis.title.x=element_blank(), axis.title.y=element_blank(), axis.text.x = element_text(colour="grey60"), axis.text.y = element_text(colour="grey60"), axis.ticks.x=element_line(color="grey60"), axis.ticks.y=element_line(color="grey60"), legend.margin=unit(c(-13,8), "mm"), legend.title=element_blank() ) + geom_line(size=1) + geom_point(size=2, color="grey30") + scale_x_continuous(expand = c(0, 0), limits = c(0, 5000)) + scale_y_continuous(expand = c(0, 0)) + coord_cartesian(ylim=c(0, max(20, max(data.to.plot$Time)/3))) + geom_hline(yintercept = 0, color="grey") + geom_vline(xintercept = 0.4, color="grey") + scale_shape_manual(values=c(21,24,22,25,23,21,22,23)) + scale_color_manual(values=rbColors) + scale_fill_manual(values=rbColors) ) dev.off() cat(paste0("![](", image.path, ")")) png(gsub(".png", "b.png", image.path), width=350, height=350, type="cairo") print(ggplot(data.to.plot, aes(x=Size, y=Time, group=Library, color=Library, shape=Library, fill=Library)) + theme_classic() + theme( panel.border = element_blank(), axis.title.x=element_blank(), axis.title.y=element_blank(), axis.text.x = element_text(colour="grey60"), axis.text.y = element_text(colour="grey60"), axis.ticks.x=element_line(color="grey60"), axis.ticks.y=element_line(color="grey60"), legend.position="none" ) + geom_line(size=1) + geom_point(size=2, color="grey30") + scale_x_log10(expand=c(0,0), breaks=c(100, 200, 400, 800, 1600, 3200, 5000), limits=c(100, 5500)) + scale_y_log10() + scale_shape_manual(values=c(21,24,22,25,23,21,22,23)) + geom_hline(yintercept = 0, color="grey") + geom_vline(xintercept = min(data.to.plot$Size), color="grey") + scale_color_manual(values=rbColors) + scale_fill_manual(values=rbColors) ) dev.off() cat(paste0("![](", gsub(".png", "b.png", image.path), ")\n\n\n\n")) } } } } ``` ```{r per-library, echo=FALSE, results='asis'} cat("# Results per library\n\n") renamedLibs = list( "cublas"="cuBLAS", "blis"="BLIS", "mkl"="MKL", "atlas_mt"="ATLAS (mt)", "openblas"="OpenBLAS", "gotoblas2"="GotoBLAS2", "atlas_st"="ATLAS (st)", "netlib"="Netlib") for (lib in LIBRARIES) { cat(paste("##", renamedLibs[[lib]])) cat("\n\n\n\n") for (benchmark in BENCHMARKS) { cat(paste0("### ", ifelse(benchmark=="urbanek", "R-benchmark-25", # fix to common name paste(toupper(substring(benchmark, 1,1)), substring(benchmark, 2), " benchmark", sep="", collapse=" ")), "\n\n")) testnames = benchmark.results %>% filter(Benchmark == benchmark) %>% select(Test) %>% unique if (benchmark == "urbanek") # remove tests with almost the same results testnames = testnames %>% filter(row_number() %in% c(4, 5, 7, 8, 9, 10, 15)) for (i in 1:nrow(testnames)) { test = as.character(testnames[i, "Test"]) data.to.plot = benchmark.results %>% filter(Library == lib & Benchmark == benchmark & Test == test) if (benchmark != "gcbd") { if (lib!="cublas") data.to.plot = data.to.plot %>% mutate(PerfGain=data.to.plot[which(hosts.info$CPU==HOST.REF), "Time"]/Time) else data.to.plot = data.to.plot %>% mutate(PerfGain=data.to.plot[which(hosts.info$GPU==HOST.REF.GPU), "Time"]/Time) } data.to.plot = data.to.plot %>% rowwise() %>% mutate(CPUGPU=hosts.info[which(hosts.info$Host==Host), ifelse(lib!="cublas", "CPU", "GPU")]) cat(paste0("#### ", # fix urbanek errors in names gsub(" \\(.*\\)", "", gsub("Linear regr. over a 3000x3000 matrix", "Linear regr. over a 2000x2000 matrix", gsub("Eigenvalues of a 640x640", "Eigenvalues of a 600x600", test)) ) ), "\n\n") if (any(data.to.plot$Time == Inf)) { cat(paste(as.character(as.data.frame(data.to.plot)[which(data.to.plot$Time == Inf), "CPUGPU"])), "hangs in this test\n\n") } data.to.plot = data.to.plot[data.to.plot$Time != Inf, ] image.path = file.path(IMAGES.DIR, paste0("img_pl", "_l", which(lib == LIBRARIES), "_b", which(benchmark == BENCHMARKS), "_t", i, ".png")) cat(paste0("Time in seconds ", ifelse(benchmark=="gcbd", "regarding matrix size - right panel on log scale", ""), " - ", ifelse(benchmark!="gcbd", data.to.plot$Runs[1], paste(" from ", max(data.to.plot$Runs), " to ", min(data.to.plot$Runs))), " runs - lower is better\n\n")) myColors = c("#5AAC45", "#636363", "#636363", "#636363", "#E6191A", "#636363", "#636363") if (lib=="cublas") myColors = c("#5AAC45", "#E6191A", "#636363", "#636363")#, "#5AAC45", "#636363") colScale = scale_fill_manual(name = "Color", values = myColors) data.to.plot = unique(select(data.to.plot, -Host)) if (benchmark != "gcbd") { png(image.path, width=600, height=50*length(unique(data.to.plot$CPUGPU))+ifelse(nrow(data.to.plot)>1, 0, 20)) print(ggplot(data.to.plot, aes(x=reorder(CPUGPU, -Time), y=Time, fill=CPUGPU)) + theme_classic() + theme( panel.border = element_blank(), axis.title.x=element_blank(), axis.title.y=element_blank(), axis.text.x = element_text(colour="grey60"), axis.ticks.x=element_line(color="grey60"), axis.ticks.y=element_line(color="grey60"), legend.position="none" ) + scale_y_continuous(expand = c(0, 0), limits = c(0, 1.20*max(data.to.plot$Time))) + geom_bar(stat="identity", width=.7) + coord_flip() + geom_text(aes(y=Time, x=CPUGPU, label=format(Time, digits=2, nsmall=2)), hjust=-0.25, size=3, colour="grey45") + geom_text(aes(y=1.2*max(data.to.plot$Time), x=CPUGPU, label=paste0("x", trimws(format(PerfGain, digits=1, nsmall=1)))), hjust=1, size=4) + geom_hline(yintercept = 0, color="grey") + geom_vline(xintercept = 0.4, color="grey") + annotate("text", x=length(unique(data.to.plot$CPUGPU))+ifelse(nrow(data.to.plot)>1, 0.55, 0.45), y=1.2*max(data.to.plot$Time), label="Performance gain", hjust=1) + colScale ) dev.off() cat(paste0("![](", image.path, ")\n\n\n\n")) } else { rbColors = c("#F40000","#FF8000","#0094FF","#7F00FF","#E900FF","#00DD0E","#B35807") png(image.path, width=400, height=350, type="cairo") print(ggplot(data.to.plot, aes(x=Size, y=Time, group=CPUGPU, color=CPUGPU, shape=CPUGPU, fill=CPUGPU)) + theme_classic() + theme( panel.border = element_blank(), axis.title.x=element_blank(), axis.title.y=element_blank(), axis.text.x = element_text(colour="grey60"), axis.text.y = element_text(colour="grey60"), axis.ticks.x=element_line(color="grey60"), axis.ticks.y=element_line(color="grey60"), legend.margin=unit(c(0,-60,0,40), "mm"), legend.title=element_blank() ) + geom_line(size=1) + geom_point(size=2, color="grey30") + scale_x_continuous(expand = c(0, 0), limits = c(0, 5000)) + scale_y_continuous(expand = c(0, 0)) + geom_hline(yintercept = 0, color="grey") + geom_vline(xintercept = 0.4, color="grey") + scale_shape_manual(values=c(25,24,22,21,23,25,24)) + scale_color_manual(values=rbColors) + scale_fill_manual(values=rbColors) ) dev.off() cat(paste0("![](", image.path, ")")) png(gsub(".png", "b.png", image.path), width=350, height=350, type="cairo") print(ggplot(data.to.plot, aes(x=Size, y=Time, group=CPUGPU, color=CPUGPU, shape=CPUGPU, fill=CPUGPU)) + theme_classic() + theme( panel.border = element_blank(), axis.title.x=element_blank(), axis.title.y=element_blank(), axis.text.x = element_text(colour="grey60"), axis.text.y = element_text(colour="grey60"), axis.ticks.x=element_line(color="grey60"), axis.ticks.y=element_line(color="grey60"), legend.position="none" ) + geom_line(size=1) + geom_point(size=2, color="grey30") + scale_x_log10(expand=c(0,0), breaks=c(100, 200, 400, 800, 1600, 3200, 5000), limits=c(100, 5500)) + scale_y_log10() + scale_shape_manual(values=c(25,24,22,21,23,25,24)) + geom_hline(yintercept = 0, color="grey") + geom_vline(xintercept = min(data.to.plot$Size), color="grey") + scale_color_manual(values=rbColors) + scale_fill_manual(values=rbColors) ) dev.off() cat(paste0("![](", gsub(".png", "b.png", image.path), ")\n\n\n\n")) } } } } ```