install.packages("ggplot2") install.packages("treemap") install.packages("hrbrthemes") library(hrbrthemes) library(treemap) library(ggplot2) library(dplyr) # Dataset loading #### imdb_raw <- read.csv("D:\\Studia\\JupyterProjects\\WIZ\\imdb.csv", header=TRUE, sep=",") imdb_raw %>% select(Series_Title, Released_Year, Runtime, Genre, IMDB_Rating, Meta_score, Director, Star1, Star2, Star3, Star4, No_of_Votes, Gross) -> imdb # Plot 1 #### # Plot 2 #### imdb %>% select(Genre, Gross) -> imdb_2 imdb_2 %>% group_by(Genre) %>% summarise(Income = sum(Gross)) %>% arrange(desc(Income)) %>% top_n(25) %>% treemap(imdb_2, # data index="Genre", vSize="Income", type="index", # Main title="", palette="Dark2", # Borders: border.col=c("black"), border.lwds=1, # Labels fontsize.labels=0.7, fontcolor.labels="white", fontface.labels=1, bg.labels=c("transparent"), align.labels=c("left", "top"), overlap.labels=0.5, inflate.labels=T ) imdb %>% count(Genre) -> imdb_2b # Plot 2b #### imdb_2b %>% arrange(desc(Genre)) %>% top_n(25) %>% treemap(imdb_2b, # data index="Genre", vSize="n", type="index", # Main title="", palette="Dark2", # Borders: border.col=c("black"), border.lwds=1, # Labels fontsize.labels=0.7, fontcolor.labels="white", fontface.labels=1, bg.labels=c("transparent"), align.labels=c("left", "top"), overlap.labels=0.5, inflate.labels=T ) # Plot 3 #### # Plot 4 #### # Prepare data imdb_3_d <- imdb %>% select(Director) %>% count(Director) %>% arrange(desc(n)) %>% top_n(15) %>% cbind(Role='Director') names(imdb_3_d)[names(imdb_3_d) == "n"] <- "Movies" names(imdb_3_d)[names(imdb_3_d) == "Director"] <- "Name" imdb_3_s1 <- imdb %>% select(Star1) %>% count(Star1) names(imdb_3_s1)[names(imdb_3_s1) == 'Star1'] <- 'Star' imdb_3_s2 <- imdb %>% select(Star2) %>% count(Star2) names(imdb_3_s2)[names(imdb_3_s2) == 'Star2'] <- 'Star' imdb_3_s3 <- imdb %>% select(Star3) %>% count(Star3) names(imdb_3_s3)[names(imdb_3_s3) == 'Star3'] <- 'Star' imdb_3_s4 <- imdb %>% select(Star4) %>% count(Star4) names(imdb_3_s4)[names(imdb_3_s4) == 'Star4'] <- 'Star' imdb_3_s <- bind_rows(imdb_3_s1, imdb_3_s2, imdb_3_s3, imdb_3_s4) %>% group_by(Star) %>% summarise_all(funs(sum(., na.rm = TRUE))) %>% arrange(desc(n)) %>% top_n(15) %>% cbind(Role='Star') names(imdb_3_s)[names(imdb_3_s) == "n"] <- "Movies" names(imdb_3_s)[names(imdb_3_s) == "Star"] <- "Name" imdb_3 <- bind_rows(imdb_3_d, imdb_3_s) %>% #group_by(Role) %>% #arrange(desc(Movies)) arrange(Role) # Set a number of 'empty bar' to add at the end of each group imdb_3 = rbind(imdb_3[1:15,],NA, NA, NA, imdb_3[16:30,], NA, NA, NA) #imdb_3 <- imdb_3 %>% arrange(Role) imdb_3$id <- seq(1, nrow(imdb_3)) # Get the name and the y position of each label label_imdb_3 <- imdb_3 number_of_bar <- nrow(label_imdb_3) angle <- 90 - 360 * (label_imdb_3$id-0.5) /number_of_bar # I substract 0.5 because the letter must have the angle of the center of the bars. Not extreme right(1) or extreme left (0) label_imdb_3$hjust <- ifelse( angle < -90, 1, 0) label_imdb_3$angle <- ifelse(angle < -90, angle+180, angle) # Make the plot p <- ggplot(imdb_3, aes(x=as.factor(id), y=Movies, fill=Role)) + # Note that id is a factor. If x is numeric, there is some space between the first bar geom_bar(aes(x=as.factor(id), y=Movies, fill=Role), stat="identity", alpha=0.5) + # Add a val=100/75/50/25 lines. I do it at the beginning to make sur barplots are OVER it. # Add text showing the value of each 100/75/50/25 lines annotate("text", x = rep(max(imdb_3$id)-1,4), y = c(5, 10, 15, 20), label = c("5", "10", "15", "20") , color="grey", size=3 , angle=0, fontface="bold", hjust=1) + annotate("text", x = rep(17,4), y = c(5, 10, 15, 20), label = c("5", "10", "15", "20") , color="grey", size=3 , angle=0, fontface="bold", hjust=1) + geom_bar(aes(x=as.factor(id), y=Movies, fill=Role), stat="identity", alpha=0.5) + ylim(-20,20) + theme_minimal() + theme( legend.position = "none", axis.text = element_blank(), axis.title = element_blank(), panel.grid = element_blank(), plot.margin = unit(rep(1,4), "cm") ) + coord_polar() + geom_text(data=label_imdb_3, aes(x=id, y=Movies+5, label=Name, hjust=hjust), color="black", fontface="bold",alpha=0.6, size=2.5, angle= label_imdb_3$angle, inherit.aes = FALSE ) p # Plot 5 #### imdb_4 <- imdb %>% select(Runtime, IMDB_Rating, Meta_score, No_of_Votes) #imdb4_year_rating <- imdb_4 %>% group_by(Runtime) %>% summarise(me = mean(IMDB_Rating)) imdb_4 %>% group_by(Runtime) %>% summarise(Avg_IMDB_Rating = mean(IMDB_Rating)) %>% ggplot(aes(x = Runtime, y = Avg_IMDB_Rating)) + geom_bar(stat = "identity") #png(filename="C:/Users/pkopy/OneDrive/Pulpit/WIZ_plot5_2.png", width=1980, height=1080) imdb_4 %>% group_by(Runtime) %>% summarise(Avg_IMDB_Rating = mean(IMDB_Rating)) %>% arrange(Avg_IMDB_Rating) %>% ggplot( aes(x=Runtime, y=Avg_IMDB_Rating) ) + geom_segment( aes(x=Runtime ,xend=Runtime, y=0, yend=Avg_IMDB_Rating), color="grey") + geom_point(size=3, color="#69b3a2") + #coord_flip() + theme_ipsum() + theme( axis.text.x = element_text(angle = 90, hjust = 1), panel.grid.major.x = element_blank(), panel.border = element_blank(), axis.ticks.x = element_blank(), legend.position="none", ) + xlab("Runtime") + ylab("Average IMDB Rating") #dev.off()