library(hrbrthemes)
library(ggridges)
library(plotly)
library(tidyverse)
library(worldfootballR)
haaland_summary_2022 <- fb_player_match_logs("https://fbref.com/en/players/1f44ac21/Erling-Haaland", season_end_year = 2022, stat_type = 'summary')
haaland_summary_2023 <- fb_player_match_logs("https://fbref.com/en/players/1f44ac21/Erling-Haaland", season_end_year = 2023, stat_type = 'summary')
haaland_game_logs <- haaland_summary_2022 %>%
rbind(haaland_summary_2023) %>%
as_tibble()
injury_days <- tibble(Date = as.Date('2022-10-29'),
Comp = 'Premier League',
Round = 'Matchweek 14',
Opponent = 'Leicester City',
Gls = NA,
xG_Expected = NA)
haaland_prepped <- haaland_game_logs %>%
filter(Comp %in% c('Bundesliga', 'Premier League')) %>%
rename(Gls = Gls_Performance) %>%
select(Date, Comp, Round, Opponent, Gls, xG_Expected) %>%
mutate(Date = as.Date(Date)) %>%
rbind(injury_days) %>%
arrange(Date) %>%
mutate(rownum = row_number(),
running_Gls_avg = cummean(ifelse(is.na(Gls), 0, Gls)))
haaland_gls_vctrs <- NULL
for(i in 1:max(haaland_prepped$rownum)){
tmp_vector <- haaland_prepped %>%
filter(rownum <= i & is.na(Gls) == FALSE) %>%
select(Gls) %>%
as.vector()
tmp_tibble <- i %>%
as_tibble %>%
mutate(gls_vector = tmp_vector) %>%
rename(rownum = value)
haaland_gls_vctrs <- haaland_gls_vctrs %>%
rbind(tmp_tibble)
}
haaland_prepped_final <- haaland_prepped %>%
group_by(Comp, rownum, Gls) %>%
nest() %>%
inner_join(haaland_gls_vctrs) %>%
filter(Comp == 'Premier League') %>%
ungroup() %>%
mutate(total_gls = cumsum(ifelse(is.na(Gls), 0, Gls)))
mc_sim <- function(goal_vector, n_games, goals_scored){
sample_sim <- NULL
for(i in 1:10000){
sim <- sum(sample(goal_vector, size = n_games, replace = T)) + goals_scored
sample_sim[i] <- sim
}
sample_sim
#quantile(sample_sim, c(.05, .5, .95))
}
poisson_sim <- function(goal_vector, n_games, goals_scored){
p_sim <- NULL
for(i in 1:10000){
sim <- sum(rpois(n_games, mean(goal_vector))) + goals_scored
p_sim[i] <- sim
}
p_sim
}
haaland_simmed <- haaland_prepped_final %>%
mutate(games_left = 38 - row_number()) %>%
#rowwise() %>%
mutate(simmed_data = pmap(list(gls_vector, games_left, total_gls), mc_sim)) %>%
mutate(p_simmed_data = pmap(list(gls_vector, games_left, total_gls), poisson_sim)) %>%
unnest(data) %>%
rowwise() %>%
mutate(median_sim = median(simmed_data),
lower_025 = quantile(simmed_data, .025),
lower_05 = quantile(simmed_data, .05),
upper_95 = quantile(simmed_data, .95),
upper_975 = quantile(simmed_data, .975)) %>%
mutate(Matchweek = gsub('Matchweek ', '', Round)) %>%
ungroup() %>%
mutate(Matchweek = fct_reorder(Matchweek, -rownum))
haaland_sim_trend_chart <- haaland_simmed %>%
ggplot(aes(x = Date,
y = median_sim,
text = paste('Date:', Date,
"<br>Goals:", Gls,
"<br>Goals to Date:", total_gls,
"<br>Forecasted Total:", median_sim,
"<br><br>90% Range:", lower_05, "to", upper_95,
"<br>95% Range:", lower_025, "to", upper_975)
))+
geom_ribbon(aes(ymin = lower_025, ymax = upper_975),
fill = 'grey80',
alpha = 0.5,
group = 1)+
geom_ribbon(aes(ymin = lower_05, ymax = upper_95),
fill = 'grey70',
alpha = 0.5,
group = 1)+
geom_line(group = 1, color = '#6CABDD')+
geom_hline(yintercept = 32, linetype = 'dashed')+
geom_text(aes(as.Date('2022-08-14'),32, label = "Salah's Record", vjust = 6), size = 3)+
ggtitle('Erling Haaland 2022 Projected Goal Total by Week')+
theme_ipsum()
ggplotly(haaland_sim_trend_chart, tooltip = 'text') %>%
layout(hoverlabel = list(bgcolor = "#6CABDD"))