Fight Odds Analysis

Description

This script analyzes UFC fight odds data.

Libraries

library(tidyverse)
library(knitr)

Examine Data

Load data.

load("./Datasets/df_master.RData")

Get summary.

summary(df_master)

##      NAME               Date              Event               City          
##  Length:5986        Length:5986        Length:5986        Length:5986       
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##     State             Country          FightWeightClass       Round     
##  Length:5986        Length:5986        Length:5986        Min.   :1.00  
##  Class :character   Class :character   Class :character   1st Qu.:1.00  
##  Mode  :character   Mode  :character   Mode  :character   Median :3.00  
##                                                           Mean   :2.43  
##                                                           3rd Qu.:3.00  
##                                                           Max.   :5.00  
##                                                                         
##     Method          Winner_Odds         Loser_Odds            Sex           
##  Length:5986        Length:5986        Length:5986        Length:5986       
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##     fight_id       Result          FighterWeight   FighterWeightClass
##  Min.   :   1   Length:5986        Min.   :115.0   Length:5986       
##  1st Qu.: 749   Class :character   1st Qu.:135.0   Class :character  
##  Median :1497   Mode  :character   Median :155.0   Mode  :character  
##  Mean   :1497                      Mean   :163.8                     
##  3rd Qu.:2245                      3rd Qu.:185.0                     
##  Max.   :2993                      Max.   :265.0                     
##                                                                      
##      REACH            SLPM             SAPM             STRA       
##  Min.   :58.00   Min.   : 0.000   Min.   : 0.100   Min.   :0.0000  
##  1st Qu.:69.00   1st Qu.: 2.680   1st Qu.: 2.630   1st Qu.:0.3900  
##  Median :72.00   Median : 3.440   Median : 3.230   Median :0.4400  
##  Mean   :71.77   Mean   : 3.531   Mean   : 3.435   Mean   :0.4417  
##  3rd Qu.:75.00   3rd Qu.: 4.250   3rd Qu.: 4.030   3rd Qu.:0.4900  
##  Max.   :84.00   Max.   :11.140   Max.   :23.330   Max.   :0.8800  
##  NA's   :215                                                       
##       STRD              TD              TDA              TDD        
##  Min.   :0.0900   Min.   : 0.000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.5100   1st Qu.: 0.560   1st Qu.:0.2700   1st Qu.:0.5100  
##  Median :0.5600   Median : 1.210   Median :0.3700   Median :0.6400  
##  Mean   :0.5527   Mean   : 1.518   Mean   :0.3745   Mean   :0.6157  
##  3rd Qu.:0.6000   3rd Qu.: 2.160   3rd Qu.:0.5000   3rd Qu.:0.7600  
##  Max.   :0.9200   Max.   :14.190   Max.   :1.0000   Max.   :1.0000  
##                                                                     
##       SUBA        
##  Min.   : 0.0000  
##  1st Qu.: 0.1000  
##  Median : 0.4000  
##  Mean   : 0.5516  
##  3rd Qu.: 0.8000  
##  Max.   :12.1000  
## 

Redefine variables.

df_master$NAME = as.factor(df_master$NAME)
df_master$Date = as.Date(df_master$Date)
df_master$Event = as.factor(df_master$Event)
df_master$City= as.factor(df_master$City)
df_master$State = as.factor(df_master$State)
df_master$Country = as.factor(df_master$Country)
df_master$FightWeightClass = as.factor(df_master$FightWeightClass)
df_master$Method = as.factor(df_master$Method)
df_master$Winner_Odds = as.numeric(df_master$Winner_Odds)
df_master$Loser_Odds = as.numeric(df_master$Loser_Odds)
df_master$fight_id = as.factor(df_master$fight_id)
df_master$Sex = as.factor(df_master$Sex)
df_master$Result = as.factor(df_master$Result)
df_master$FighterWeightClass = as.factor(df_master$FighterWeightClass)

Summarize again… There are infinite odds and overturned / DQ fight outcomes. These will have to be removed.

summary(df_master)

##                  NAME           Date           
##  Donald Cerrone    :  24   Min.   :2013-04-27  
##  Ovince Saint Preux:  21   1st Qu.:2015-08-23  
##  Jim Miller        :  19   Median :2017-05-28  
##  Neil Magny        :  19   Mean   :2017-06-19  
##  Derrick Lewis     :  18   3rd Qu.:2019-04-20  
##  Tim Means         :  18   Max.   :2021-02-06  
##  (Other)           :5867                       
##                                   Event                  City     
##  UFC Fight Night: Chiesa vs. Magny   :  28   Las Vegas     :1246  
##  UFC Fight Night: Poirier vs. Gaethje:  28   Abu Dhabi     : 258  
##  UFC Fight Night: Whittaker vs. Till :  28   Boston        : 124  
##  UFC 190: Rousey vs Correia          :  26   Rio de Janeiro: 124  
##  UFC 193: Rousey vs Holm             :  26   Chicago       : 118  
##  UFC 210: Cormier vs. Johnson 2      :  26   Newark        : 114  
##  (Other)                             :5824   (Other)       :4002  
##         State                      Country          FightWeightClass
##  Nevada    :1246   USA                 :3464   Welterweight : 986   
##  Abu Dhabi : 258   Brazil              : 532   Lightweight  : 984   
##  Texas     : 256   Canada              : 378   Bantamweight : 852   
##  New York  : 252   United Arab Emirates: 258   Featherweight: 724   
##  California: 250   Australia           : 236   Middleweight : 654   
##  Florida   : 176   United Kingdom      : 184   Flyweight    : 498   
##  (Other)   :3548   (Other)             : 934   (Other)      :1288   
##      Round             Method      Winner_Odds     Loser_Odds       Sex      
##  Min.   :1.00   DQ        :  14   Min.   :1.06   Min.   :1.07   Female: 766  
##  1st Qu.:1.00   KO/TKO    :1910   1st Qu.:1.42   1st Qu.:1.77   Male  :5220  
##  Median :3.00   M-DEC     :  34   Median :1.71   Median :2.38                
##  Mean   :2.43   Overturned:  20   Mean   : Inf   Mean   : Inf                
##  3rd Qu.:3.00   S-DEC     : 628   3rd Qu.:2.33   3rd Qu.:3.36                
##  Max.   :5.00   SUB       :1060   Max.   : Inf   Max.   : Inf                
##                 U-DEC     :2320                                              
##     fight_id       Result     FighterWeight       FighterWeightClass
##  1      :   2   Loser :2993   Min.   :115.0   Welterweight :1007    
##  2      :   2   Winner:2993   1st Qu.:135.0   Lightweight  : 980    
##  3      :   2                 Median :155.0   Bantamweight : 799    
##  4      :   2                 Mean   :163.8   Featherweight: 731    
##  5      :   2                 3rd Qu.:185.0   Middleweight : 659    
##  6      :   2                 Max.   :265.0   Flyweight    : 561    
##  (Other):5974                                 (Other)      :1249    
##      REACH            SLPM             SAPM             STRA       
##  Min.   :58.00   Min.   : 0.000   Min.   : 0.100   Min.   :0.0000  
##  1st Qu.:69.00   1st Qu.: 2.680   1st Qu.: 2.630   1st Qu.:0.3900  
##  Median :72.00   Median : 3.440   Median : 3.230   Median :0.4400  
##  Mean   :71.77   Mean   : 3.531   Mean   : 3.435   Mean   :0.4417  
##  3rd Qu.:75.00   3rd Qu.: 4.250   3rd Qu.: 4.030   3rd Qu.:0.4900  
##  Max.   :84.00   Max.   :11.140   Max.   :23.330   Max.   :0.8800  
##  NA's   :215                                                       
##       STRD              TD              TDA              TDD        
##  Min.   :0.0900   Min.   : 0.000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.5100   1st Qu.: 0.560   1st Qu.:0.2700   1st Qu.:0.5100  
##  Median :0.5600   Median : 1.210   Median :0.3700   Median :0.6400  
##  Mean   :0.5527   Mean   : 1.518   Mean   :0.3745   Mean   :0.6157  
##  3rd Qu.:0.6000   3rd Qu.: 2.160   3rd Qu.:0.5000   3rd Qu.:0.7600  
##  Max.   :0.9200   Max.   :14.190   Max.   :1.0000   Max.   :1.0000  
##                                                                     
##       SUBA        
##  Min.   : 0.0000  
##  1st Qu.: 0.1000  
##  Median : 0.4000  
##  Mean   : 0.5516  
##  3rd Qu.: 0.8000  
##  Max.   :12.1000  
## 

How many events does the dataset include?

length(unique(df_master$Event))

## [1] 261

How many fights?

length(unique(df_master$fight_id))

## [1] 2993

Over what time frame?

range(sort(unique(df_master$Date)))

## [1] "2013-04-27" "2021-02-06"

Analyse Odds

Make copy for analysis.

df_odds = df_master
rm(df_master)

Filter out controversial results and infinite odds.

df_odds %>%
  dplyr::filter(
    (Method != "DQ") & (Method != "Overturned")
    , is.finite(Winner_Odds)
    , is.finite(Loser_Odds)
  ) -> df_odds

Get rid of fighter-specifics so that we can spread the data frame. This will give us one event per row.

df_odds %>%
  dplyr::select(-c(FighterWeight:SUBA)) %>%
  spread(Result, NAME) -> df_odds_short

How often were the (best) odds equal?

mean(df_odds$Winner_Odds == df_odds$Loser_Odds)

## [1] 0.005410889

sum(df_odds$Winner_Odds == df_odds$Loser_Odds)

## [1] 32

Filter out equal odds and identify if Favorite won the fight.

df_odds_short %>%
  dplyr::filter(Winner_Odds != Loser_Odds) %>%  # filter out equal odds
  dplyr::mutate(
    Favorite_was_Winner = ifelse(Winner_Odds < Loser_Odds, T, F)
    , Favorite_Unit_Profit = ifelse(Favorite_was_Winner, Winner_Odds - 1, -1)
    , Underdog_Unit_Profit = ifelse(!Favorite_was_Winner, Winner_Odds - 1, -1)
  ) -> df_odds_short

What was the mean unit profit (i.e. ROI) if one bet solely on the Favorite?

mean(df_odds_short$Favorite_Unit_Profit)

## [1] -0.02309419

What was the mean unit profit if one bet solely on the Underdog?

mean(df_odds_short$Underdog_Unit_Profit)

## [1] -0.002040122

What proportion of the time does the Favorite win?

mean(df_odds_short$Favorite_was_Winner)

## [1] 0.6460388

Calculate implied probability of each fight based on odds.

df_odds_short %>% dplyr::mutate(
  Favorite_Probability = ifelse(Favorite_was_Winner, 1/Winner_Odds, 1/Loser_Odds)
  , Underdog_Probability = ifelse(!Favorite_was_Winner,  1/Winner_Odds, 1/Loser_Odds)
) -> df_odds_short

Calculate overround for each fight.

NOTE: these odds are the best available odds for each fight / fighter. Therefore, this is not overround in the traditional sense (looking at one particular odds maker).

df_odds_short %>%
  dplyr::mutate(
    Total_Probability = Favorite_Probability + Underdog_Probability
    , Overround = Total_Probability - 1
  ) -> df_odds_short

There is very little overround. This is because we are picking the best odds for each fight / fighter. By picking the best odds, we are counteracting the built-in overround of any particular odds-maker (typically around 5% as a rough estimate).

mean(df_odds_short$Overround)

## [1] 0.004461755

mean(df_odds_short$Total_Probability)

## [1] 1.004462

Odds Performance

Add year as variable.

df_odds_short %>%
  dplyr::mutate(
    Year = format(Date,"%Y")
  ) -> df_odds_short

Compute Adjusted Implied Probability to account for the overround and get an unbiased estimate of the probability of victory implied by the odds.

df_odds_short %>%
  dplyr::mutate(
    Adjusted_Favorite_Probability = Favorite_Probability - Overround/2
    , Adjusted_Underdog_Probability = Underdog_Probability - Overround/2
    , Adjusted_Total_Probability = Adjusted_Favorite_Probability + Adjusted_Underdog_Probability
  ) -> df_odds_short

Looking at summary, we see that Adjusted Total Probability is always equal to 100%. Moreover, the Favorite Probability never dips below 50%, whereas the Underdog Probability never exceeds it.

summary(df_odds_short)

##       Date                                             Event     
##  Min.   :2013-04-27   UFC Fight Night: Chiesa vs. Magny   :  14  
##  1st Qu.:2015-08-23   UFC Fight Night: Poirier vs. Gaethje:  14  
##  Median :2017-05-13   UFC Fight Night: Whittaker vs. Till :  14  
##  Mean   :2017-06-17   UFC 190: Rousey vs Correia          :  13  
##  3rd Qu.:2019-04-20   UFC 193: Rousey vs Holm             :  13  
##  Max.   :2021-02-06   UFC 210: Cormier vs. Johnson 2      :  13  
##                       (Other)                             :2860  
##              City             State                      Country    
##  Las Vegas     : 607   Nevada    : 607   USA                 :1699  
##  Abu Dhabi     : 127   Abu Dhabi : 127   Brazil              : 258  
##  Rio de Janeiro:  60   Texas     : 127   Canada              : 187  
##  Boston        :  59   California: 123   United Arab Emirates: 127  
##  Chicago       :  57   New York  : 123   Australia           : 117  
##  Newark        :  57   Florida   :  88   United Kingdom      :  92  
##  (Other)       :1974   (Other)   :1746   (Other)             : 461  
##       FightWeightClass     Round              Method      Winner_Odds    
##  Welterweight :486     Min.   :1.000   DQ        :   0   Min.   : 1.060  
##  Lightweight  :484     1st Qu.:2.000   KO/TKO    : 942   1st Qu.: 1.420  
##  Bantamweight :420     Median :3.000   M-DEC     :  17   Median : 1.710  
##  Featherweight:355     Mean   :2.435   Overturned:   0   Mean   : 1.975  
##  Middleweight :316     3rd Qu.:3.000   S-DEC     : 312   3rd Qu.: 2.300  
##  Flyweight    :246     Max.   :5.000   SUB       : 521   Max.   :12.990  
##  (Other)      :634                     U-DEC     :1149                   
##    Loser_Odds         Sex          fight_id                Loser     
##  Min.   : 1.070   Female: 378   1      :   1   Jim Miller     :  10  
##  1st Qu.: 1.760   Male  :2563   2      :   1   Ross Pearson   :  10  
##  Median : 2.380                 3      :   1   Angela Hill    :   9  
##  Mean   : 2.813                 4      :   1   Donald Cerrone :   9  
##  3rd Qu.: 3.350                 5      :   1   Gian Villante  :   9  
##  Max.   :14.050                 6      :   1   Jeremy Stephens:   9  
##                                 (Other):2935   (Other)        :2885  
##                 Winner     Favorite_was_Winner Favorite_Unit_Profit
##  Donald Cerrone    :  15   Mode :logical       Min.   :-1.00000    
##  Derrick Lewis     :  14   FALSE:1041          1st Qu.:-1.00000    
##  Francisco Trinaldo:  13   TRUE :1900          Median : 0.31000    
##  Neil Magny        :  13                       Mean   :-0.02309    
##  Dustin Poirier    :  12                       3rd Qu.: 0.57000    
##  Max Holloway      :  12                       Max.   : 1.10000    
##  (Other)           :2862                                           
##  Underdog_Unit_Profit Favorite_Probability Underdog_Probability
##  Min.   :-1.00000     Min.   :0.4000       Min.   :0.07117     
##  1st Qu.:-1.00000     1st Qu.:0.5780       1st Qu.:0.27397     
##  Median :-1.00000     Median :0.6410       Median :0.35971     
##  Mean   :-0.00204     Mean   :0.6579       Mean   :0.34658     
##  3rd Qu.: 1.30000     3rd Qu.:0.7299       3rd Qu.:0.42553     
##  Max.   :11.99000     Max.   :0.9434       Max.   :0.52356     
##                                                                
##  Total_Probability   Overround             Year          
##  Min.   :0.7639    Min.   :-0.236148   Length:2941       
##  1st Qu.:0.9988    1st Qu.:-0.001198   Class :character  
##  Median :1.0085    Median : 0.008472   Mode  :character  
##  Mean   :1.0045    Mean   : 0.004462                     
##  3rd Qu.:1.0147    3rd Qu.: 0.014713                     
##  Max.   :1.0684    Max.   : 0.068376                     
##                                                          
##  Adjusted_Favorite_Probability Adjusted_Underdog_Probability
##  Min.   :0.5012                Min.   :0.0673               
##  1st Qu.:0.5780                1st Qu.:0.2725               
##  Median :0.6408                Median :0.3592               
##  Mean   :0.6557                Mean   :0.3443               
##  3rd Qu.:0.7275                3rd Qu.:0.4220               
##  Max.   :0.9327                Max.   :0.4988               
##                                                             
##  Adjusted_Total_Probability
##  Min.   :1                 
##  1st Qu.:1                 
##  Median :1                 
##  Mean   :1                 
##  3rd Qu.:1                 
##  Max.   :1                 
## 

Create function to graphically assess over performance as a function of several variables. These are not inferential analyses but are instead meant to visualize the data to observe trends for further analysis. Use adjusted implied probabilities along with unit profits derived from non-adjusted odds to simulate what one actually would have won using best available odds.

gauge_over_performance = function(num_bin = 10, min_bin_size = 30, variable = NULL) {

  # get bins for Favorite
  df_odds_short$Favorite_Probability_Bin = cut(df_odds_short$Adjusted_Favorite_Probability, num_bin)
  # get bins for Underdog
  df_odds_short$Underdog_Probability_Bin = cut(df_odds_short$Adjusted_Underdog_Probability, num_bin)

  if (is.null(variable)) {
    # check over/under performance for Favorites
    df_odds_short %>%
      dplyr::group_by(Favorite_Probability_Bin) %>%
      dplyr::summarise(
        Prop_of_Victory = mean(Favorite_was_Winner)
        , Size_of_Bin = length(Favorite_was_Winner)
        , ROI = mean(Favorite_Unit_Profit)
      ) -> fav_perf
  } else {

    # create dummy variable for function
    df_odds_short$Dummy = df_odds_short[
      ,which(colnames(df_odds_short) == sprintf("%s", variable))
    ]

    # check over/under performance for Favorites
    df_odds_short %>%
      dplyr::group_by(Favorite_Probability_Bin, Dummy) %>%
      dplyr::summarise(
        Prop_of_Victory = mean(Favorite_was_Winner)
        , Size_of_Bin = length(Favorite_was_Winner)
        , ROI = mean(Favorite_Unit_Profit)
      ) -> fav_perf
  }

  # extract bins
  fav_labs <- as.character(fav_perf$Favorite_Probability_Bin)
  fav_bins = as.data.frame(
    cbind(
      lower = as.numeric( sub("\\((.+),.*", "\\1", fav_labs) )
      , upper = as.numeric( sub("[^,]*,([^]]*)\\]", "\\1", fav_labs) )
    )
  )
  # get value in middle of bin
  fav_bins %>% dplyr::mutate(mid_bin = (lower + upper)/2 ) -> fav_bins
  # add mid bin column
  fav_perf$Mid_Bin = fav_bins$mid_bin
  # add Over performance column
  fav_perf %>% dplyr::mutate(Over_Performance = Prop_of_Victory - Mid_Bin) -> fav_perf


  if (is.null(variable)) {

    # plot over/under performance
    fav_perf %>%
      dplyr::filter(Size_of_Bin >= min_bin_size) %>%
      ggplot(aes(x=Mid_Bin*100, y=Over_Performance * 100))+
      geom_point()+
      geom_smooth(se=F)+
      geom_hline(yintercept = 0, linetype = "dotted")+
      ylab("Over Performance (%)")+
      xlab("Adjusted Implied Probability (%)")+
      ggtitle("Favorites")->gg
    print(gg)

    # plot over/under performance
    fav_perf %>%
      dplyr::filter(Size_of_Bin >= min_bin_size) %>%
      ggplot(aes(x=Mid_Bin * 100, y=Prop_of_Victory*100))+
      geom_point()+
      geom_smooth(se=F)+
      ylab("Probability of Victory (%)")+
      xlab("Adjusted Implied Probability (%)")+
      geom_abline(slope=1, intercept=0, linetype = "dotted")+
      ggtitle("Favorites")->gg
    print(gg)

    # plot ROI - only real difference is scale along y axis
    fav_perf %>%
      dplyr::filter(Size_of_Bin >= min_bin_size) %>%
      ggplot(aes(x=Mid_Bin*100, y= ROI* 100))+
      geom_point()+
      geom_smooth(se=F)+
      geom_hline(yintercept = 0, linetype = "dotted")+
      ylab("ROI (%)")+
      xlab("Adjusted Implied Probability (%)")+
      ggtitle("Favorites") -> gg
    print(gg)

  } else {
    # plot over/under performance
    fav_perf %>%
      dplyr::filter(Size_of_Bin >= min_bin_size) %>%
      ggplot(aes(x=Mid_Bin*100, y=Over_Performance * 100, group=Dummy, colour = Dummy))+
      geom_point()+
      geom_smooth(se=F)+
      geom_hline(yintercept = 0, linetype = "dotted")+
      ylab("Over Performance (%)")+
      xlab("Adjusted Implied Probability (%)")+
      ggtitle("Favorites")+
      labs(color=sprintf("%s", variable)) -> gg
    print(gg)

    # plot ROI - only real difference is scale along y axis
    fav_perf %>%
      dplyr::filter(Size_of_Bin >= min_bin_size) %>%
      ggplot(aes(x=Mid_Bin*100, y= ROI* 100, group=Dummy, colour = Dummy))+
      geom_point()+
      geom_smooth(se=F)+
      geom_hline(yintercept = 0, linetype = "dotted")+
      ylab("ROI (%)")+
      xlab("Adjusted Implied Probability (%)")+
      ggtitle("Favorites")+
      labs(color=sprintf("%s", variable)) -> gg
    print(gg)
  }


  if (is.null(variable)) {

    # check over/under performance for Underdogs
    df_odds_short %>%
      dplyr::group_by(Underdog_Probability_Bin) %>%
      dplyr::summarise(
        Prop_of_Victory = mean(!Favorite_was_Winner)
        , Size_of_Bin = length(!Favorite_was_Winner)
        , ROI = mean(Underdog_Unit_Profit)
      ) -> under_perf

  } else {

    # check over/under performance for Underdogs
    df_odds_short %>%
      dplyr::group_by(Underdog_Probability_Bin, Dummy) %>%
      dplyr::summarise(
        Prop_of_Victory = mean(!Favorite_was_Winner)
        , Size_of_Bin = length(!Favorite_was_Winner)
        , ROI = mean(Underdog_Unit_Profit)
      ) -> under_perf
  }

  # extract bins
  under_labs <- as.character(under_perf$Underdog_Probability_Bin)
  under_bins = as.data.frame(
    cbind(
      lower = as.numeric( sub("\\((.+),.*", "\\1", under_labs) )
      , upper = as.numeric( sub("[^,]*,([^]]*)\\]", "\\1", under_labs) )
    )
  )
  # get value in middle of bin
  under_bins %>% dplyr::mutate(mid_bin = (lower + upper)/2 ) -> under_bins
  # add mid bin column
  under_perf$Mid_Bin = under_bins$mid_bin
  # add Over performance column
  under_perf %>% dplyr::mutate(Over_Performance = Prop_of_Victory - Mid_Bin) -> under_perf


  if (is.null(variable)) {
    # plot over/under performance
    under_perf %>%
      dplyr::filter(Size_of_Bin >= min_bin_size) %>%
      ggplot(aes(x=Mid_Bin*100, y=Over_Performance * 100))+
      geom_point()+
      geom_smooth(se=F)+
      geom_hline(yintercept = 0, linetype = "dotted")+
      ylab("Over Performance (%)")+
      xlab("Adjusted Implied Probability (%)")+
      ggtitle("Underdogs")->gg
    print(gg)

    # plot over/under performance
    under_perf %>%
      dplyr::filter(Size_of_Bin >= min_bin_size) %>%
      ggplot(aes(x=Mid_Bin * 100, y=Prop_of_Victory*100))+
      geom_point()+
      geom_smooth(se=F)+
      ylab("Probability of Victory (%)")+
      xlab("Adjusted Implied Probability (%)")+
      geom_abline(slope=1, intercept=0, linetype = "dotted")+
      ggtitle("Underdogs")->gg
    print(gg)

    under_perf %>%
      dplyr::filter(Size_of_Bin >= min_bin_size) %>%
      ggplot(aes(x=Mid_Bin*100, y=ROI * 100))+
      geom_point()+
      geom_smooth(se=F)+
      geom_hline(yintercept = 0, linetype = "dotted")+
      ylab("ROI (%)")+
      xlab("Adjusted Implied Probability (%)")+
      ggtitle("Underdogs")-> gg
    print(gg)

  } else {

    # plot over/under performance
    under_perf %>%
      dplyr::filter(Size_of_Bin >= min_bin_size) %>%
      ggplot(aes(x=Mid_Bin*100, y=Over_Performance * 100, group=Dummy, colour = Dummy))+
      geom_point()+
      geom_smooth(se=F)+
      geom_hline(yintercept = 0, linetype = "dotted")+
      ylab("Over Performance (%)")+
      xlab("Adjusted Implied Probability (%)")+
      ggtitle("Underdogs")+
      labs(color=sprintf("%s", variable)) -> gg
    print(gg)

    under_perf %>%
      dplyr::filter(Size_of_Bin >= min_bin_size) %>%
      ggplot(aes(x=Mid_Bin*100, y=ROI * 100, group=Dummy, colour = Dummy))+
      geom_point()+
      geom_smooth(se=F)+
      geom_hline(yintercept = 0, linetype = "dotted")+
      ylab("ROI (%)")+
      xlab("Adjusted Implied Probability (%)")+
      ggtitle("Underdogs")+
      labs(color=sprintf("%s", variable)) -> gg
    print(gg)
  }
  
  # process to return()
  under_perf$Is_Fav = F
  under_perf %>%
    rename(Probability_Bin = Underdog_Probability_Bin) -> under_perf
  
  fav_perf$Is_Fav = T
  fav_perf %>%
    rename(Probability_Bin = Favorite_Probability_Bin) -> fav_perf
  
  return(rbind(fav_perf, under_perf))

}

Look at how expected performance predicts over performance.

odds_perf = gauge_over_performance(num_bin = 10, min_bin_size = 100, variable = NULL)

kable(odds_perf)

Probability_Bin	Prop_of_Victory	Size_of_Bin	ROI	Mid_Bin	Over_Performance	Is_Fav
(0.501,0.544]	0.5160494	405	-0.0155802	0.52250	-0.0064506	TRUE
(0.544,0.587]	0.5361050	457	-0.0585558	0.56550	-0.0293950	TRUE
(0.587,0.631]	0.5376984	504	-0.1169643	0.60900	-0.0713016	TRUE
(0.631,0.674]	0.6410256	390	-0.0204615	0.65250	-0.0114744	TRUE
(0.674,0.717]	0.7078947	380	0.0163947	0.69550	0.0123947	TRUE
(0.717,0.76]	0.7589577	307	0.0231922	0.73850	0.0204577	TRUE
(0.76,0.803]	0.8384279	229	0.0692576	0.78150	0.0569279	TRUE
(0.803,0.846]	0.8271605	162	-0.0021605	0.82450	0.0026605	TRUE
(0.846,0.89]	0.8961039	77	0.0325974	0.86800	0.0281039	TRUE
(0.89,0.933]	0.9333333	30	0.0236667	0.91150	0.0218333	TRUE
(0.0669,0.11]	0.0666667	30	-0.2100000	0.08845	-0.0217833	FALSE
(0.11,0.154]	0.1038961	77	-0.1935065	0.13200	-0.0281039	FALSE
(0.154,0.197]	0.1728395	162	-0.0545062	0.17550	-0.0026605	FALSE
(0.197,0.24]	0.1615721	229	-0.2699127	0.21850	-0.0569279	FALSE
(0.24,0.283]	0.2410423	307	-0.0922150	0.26150	-0.0204577	FALSE
(0.283,0.326]	0.2921053	380	-0.0502105	0.30450	-0.0123947	FALSE
(0.326,0.369]	0.3589744	390	0.0181795	0.34750	0.0114744	FALSE
(0.369,0.413]	0.4623016	504	0.1802976	0.39100	0.0713016	FALSE
(0.413,0.456]	0.4638950	457	0.0675055	0.43450	0.0293950	FALSE
(0.456,0.499]	0.4839506	405	0.0109136	0.47750	0.0064506	FALSE

Is there any stability across years? Need to reduce minimum bin size to get estimates. As a result, estimates will be more noisy.

odds_perf_by_year = gauge_over_performance(num_bin = 10, min_bin_size = 30, variable = "Year")

kable(odds_perf_by_year)

Probability_Bin	Dummy	Prop_of_Victory	Size_of_Bin	ROI	Mid_Bin	Over_Performance	Is_Fav
(0.501,0.544]	2013	0.6000000	10	0.1580000	0.52250	0.0775000	TRUE
(0.501,0.544]	2014	0.6086957	23	0.1791304	0.52250	0.0861957	TRUE
(0.501,0.544]	2015	0.4791667	48	-0.0906250	0.52250	-0.0433333	TRUE
(0.501,0.544]	2016	0.5777778	90	0.0936667	0.52250	0.0552778	TRUE
(0.501,0.544]	2017	0.5869565	46	0.1126087	0.52250	0.0644565	TRUE
(0.501,0.544]	2018	0.3859649	57	-0.2568421	0.52250	-0.1365351	TRUE
(0.501,0.544]	2019	0.4383562	73	-0.1578082	0.52250	-0.0841438	TRUE
(0.501,0.544]	2020	0.5714286	56	0.0882143	0.52250	0.0489286	TRUE
(0.501,0.544]	2021	0.5000000	2	-0.0250000	0.52250	-0.0225000	TRUE
(0.544,0.587]	2013	0.4444444	9	-0.1977778	0.56550	-0.1210556	TRUE
(0.544,0.587]	2014	0.4324324	37	-0.2456757	0.56550	-0.1330676	TRUE
(0.544,0.587]	2015	0.4848485	66	-0.1515152	0.56550	-0.0806515	TRUE
(0.544,0.587]	2016	0.4590164	61	-0.1957377	0.56550	-0.1064836	TRUE
(0.544,0.587]	2017	0.5915493	71	0.0374648	0.56550	0.0260493	TRUE
(0.544,0.587]	2018	0.6000000	55	0.0556364	0.56550	0.0345000	TRUE
(0.544,0.587]	2019	0.6025641	78	0.0608974	0.56550	0.0370641	TRUE
(0.544,0.587]	2020	0.5479452	73	-0.0369863	0.56550	-0.0175548	TRUE
(0.544,0.587]	2021	0.4285714	7	-0.2457143	0.56550	-0.1369286	TRUE
(0.587,0.631]	2013	0.5000000	18	-0.1722222	0.60900	-0.1090000	TRUE
(0.587,0.631]	2014	0.6046512	43	-0.0016279	0.60900	-0.0043488	TRUE
(0.587,0.631]	2015	0.4545455	66	-0.2554545	0.60900	-0.1544545	TRUE
(0.587,0.631]	2016	0.4791667	96	-0.2204167	0.60900	-0.1298333	TRUE
(0.587,0.631]	2017	0.6557377	61	0.0716393	0.60900	0.0467377	TRUE
(0.587,0.631]	2018	0.5000000	84	-0.1680952	0.60900	-0.1090000	TRUE
(0.587,0.631]	2019	0.5068493	73	-0.1689041	0.60900	-0.1021507	TRUE
(0.587,0.631]	2020	0.6315789	57	0.0371930	0.60900	0.0225789	TRUE
(0.587,0.631]	2021	0.8333333	6	0.3666667	0.60900	0.2243333	TRUE
(0.631,0.674]	2013	0.6666667	15	0.0280000	0.65250	0.0141667	TRUE
(0.631,0.674]	2014	0.5744681	47	-0.1161702	0.65250	-0.0780319	TRUE
(0.631,0.674]	2015	0.6029412	68	-0.0769118	0.65250	-0.0495588	TRUE
(0.631,0.674]	2016	0.7500000	56	0.1391071	0.65250	0.0975000	TRUE
(0.631,0.674]	2017	0.5476190	42	-0.1669048	0.65250	-0.1048810	TRUE
(0.631,0.674]	2018	0.6400000	50	-0.0222000	0.65250	-0.0125000	TRUE
(0.631,0.674]	2019	0.6938776	49	0.0622449	0.65250	0.0413776	TRUE
(0.631,0.674]	2020	0.6491228	57	-0.0077193	0.65250	-0.0033772	TRUE
(0.631,0.674]	2021	0.6666667	6	0.0016667	0.65250	0.0141667	TRUE
(0.674,0.717]	2013	0.8235294	17	0.2058824	0.69550	0.1280294	TRUE
(0.674,0.717]	2014	0.7254902	51	0.0450980	0.69550	0.0299902	TRUE
(0.674,0.717]	2015	0.6551724	58	-0.0601724	0.69550	-0.0403276	TRUE
(0.674,0.717]	2016	0.6956522	69	-0.0079710	0.69550	0.0001522	TRUE
(0.674,0.717]	2017	0.7272727	33	0.0348485	0.69550	0.0317727	TRUE
(0.674,0.717]	2018	0.8372093	43	0.2097674	0.69550	0.1417093	TRUE
(0.674,0.717]	2019	0.6250000	40	-0.1007500	0.69550	-0.0705000	TRUE
(0.674,0.717]	2020	0.7142857	63	0.0231746	0.69550	0.0187857	TRUE
(0.674,0.717]	2021	0.3333333	6	-0.5216667	0.69550	-0.3621667	TRUE
(0.717,0.76]	2013	0.9411765	17	0.2811765	0.73850	0.2026765	TRUE
(0.717,0.76]	2014	0.8250000	40	0.1090000	0.73850	0.0865000	TRUE
(0.717,0.76]	2015	0.7187500	32	-0.0362500	0.73850	-0.0197500	TRUE
(0.717,0.76]	2016	0.7872340	47	0.0582979	0.73850	0.0487340	TRUE
(0.717,0.76]	2017	0.7500000	48	0.0143750	0.73850	0.0115000	TRUE
(0.717,0.76]	2018	0.7380952	42	-0.0042857	0.73850	-0.0004048	TRUE
(0.717,0.76]	2019	0.7352941	34	-0.0094118	0.73850	-0.0032059	TRUE
(0.717,0.76]	2020	0.6666667	42	-0.0973810	0.73850	-0.0718333	TRUE
(0.717,0.76]	2021	0.8000000	5	0.0600000	0.73850	0.0615000	TRUE
(0.76,0.803]	2013	0.9090909	11	0.1609091	0.78150	0.1275909	TRUE
(0.76,0.803]	2014	0.7500000	24	-0.0437500	0.78150	-0.0315000	TRUE
(0.76,0.803]	2015	0.7560976	41	-0.0409756	0.78150	-0.0254024	TRUE
(0.76,0.803]	2016	0.9090909	33	0.1600000	0.78150	0.1275909	TRUE
(0.76,0.803]	2017	0.8857143	35	0.1317143	0.78150	0.1042143	TRUE
(0.76,0.803]	2018	0.8214286	28	0.0521429	0.78150	0.0399286	TRUE
(0.76,0.803]	2019	0.8400000	25	0.0748000	0.78150	0.0585000	TRUE
(0.76,0.803]	2020	0.8709677	31	0.1067742	0.78150	0.0894677	TRUE
(0.76,0.803]	2021	1.0000000	1	0.2900000	0.78150	0.2185000	TRUE
(0.803,0.846]	2013	0.8000000	10	-0.0350000	0.82450	-0.0245000	TRUE
(0.803,0.846]	2014	0.8571429	21	0.0380952	0.82450	0.0326429	TRUE
(0.803,0.846]	2015	0.8205128	39	-0.0102564	0.82450	-0.0039872	TRUE
(0.803,0.846]	2016	0.7368421	19	-0.1110526	0.82450	-0.0876579	TRUE
(0.803,0.846]	2017	0.7619048	21	-0.0819048	0.82450	-0.0625952	TRUE
(0.803,0.846]	2018	0.8636364	22	0.0309091	0.82450	0.0391364	TRUE
(0.803,0.846]	2019	0.8666667	15	0.0506667	0.82450	0.0421667	TRUE
(0.803,0.846]	2020	0.9166667	12	0.1108333	0.82450	0.0921667	TRUE
(0.803,0.846]	2021	1.0000000	3	0.2200000	0.82450	0.1755000	TRUE
(0.846,0.89]	2013	1.0000000	6	0.1500000	0.86800	0.1320000	TRUE
(0.846,0.89]	2014	0.8000000	15	-0.0713333	0.86800	-0.0680000	TRUE
(0.846,0.89]	2015	0.8823529	17	0.0088235	0.86800	0.0143529	TRUE
(0.846,0.89]	2016	1.0000000	5	0.1420000	0.86800	0.1320000	TRUE
(0.846,0.89]	2017	0.8571429	7	-0.0142857	0.86800	-0.0108571	TRUE
(0.846,0.89]	2018	1.0000000	10	0.1530000	0.86800	0.1320000	TRUE
(0.846,0.89]	2019	0.8750000	8	0.0150000	0.86800	0.0070000	TRUE
(0.846,0.89]	2020	0.8888889	9	0.0300000	0.86800	0.0208889	TRUE
(0.89,0.933]	2014	1.0000000	4	0.0900000	0.91150	0.0885000	TRUE
(0.89,0.933]	2015	0.8750000	8	-0.0487500	0.91150	-0.0365000	TRUE
(0.89,0.933]	2016	1.0000000	3	0.0933333	0.91150	0.0885000	TRUE
(0.89,0.933]	2017	1.0000000	3	0.1100000	0.91150	0.0885000	TRUE
(0.89,0.933]	2018	1.0000000	5	0.1080000	0.91150	0.0885000	TRUE
(0.89,0.933]	2019	1.0000000	4	0.1050000	0.91150	0.0885000	TRUE
(0.89,0.933]	2020	0.6666667	3	-0.2766667	0.91150	-0.2448333	TRUE
(0.0669,0.11]	2014	0.0000000	4	-1.0000000	0.08845	-0.0884500	FALSE
(0.0669,0.11]	2015	0.1250000	8	0.6237500	0.08845	0.0365500	FALSE
(0.0669,0.11]	2016	0.0000000	3	-1.0000000	0.08845	-0.0884500	FALSE
(0.0669,0.11]	2017	0.0000000	3	-1.0000000	0.08845	-0.0884500	FALSE
(0.0669,0.11]	2018	0.0000000	5	-1.0000000	0.08845	-0.0884500	FALSE
(0.0669,0.11]	2019	0.0000000	4	-1.0000000	0.08845	-0.0884500	FALSE
(0.0669,0.11]	2020	0.3333333	3	2.5700000	0.08845	0.2448833	FALSE
(0.11,0.154]	2013	0.0000000	6	-1.0000000	0.13200	-0.1320000	FALSE
(0.11,0.154]	2014	0.2000000	15	0.5980000	0.13200	0.0680000	FALSE
(0.11,0.154]	2015	0.1176471	17	-0.1276471	0.13200	-0.0143529	FALSE
(0.11,0.154]	2016	0.0000000	5	-1.0000000	0.13200	-0.1320000	FALSE
(0.11,0.154]	2017	0.1428571	7	-0.0671429	0.13200	0.0108571	FALSE
(0.11,0.154]	2018	0.0000000	10	-1.0000000	0.13200	-0.1320000	FALSE
(0.11,0.154]	2019	0.1250000	8	0.0937500	0.13200	-0.0070000	FALSE
(0.11,0.154]	2020	0.1111111	9	-0.1088889	0.13200	-0.0208889	FALSE
(0.154,0.197]	2013	0.2000000	10	0.0550000	0.17550	0.0245000	FALSE
(0.154,0.197]	2014	0.1428571	21	-0.2428571	0.17550	-0.0326429	FALSE
(0.154,0.197]	2015	0.1794872	39	-0.0174359	0.17550	0.0039872	FALSE
(0.154,0.197]	2016	0.2631579	19	0.4815789	0.17550	0.0876579	FALSE
(0.154,0.197]	2017	0.2380952	21	0.3666667	0.17550	0.0625952	FALSE
(0.154,0.197]	2018	0.1363636	22	-0.2586364	0.17550	-0.0391364	FALSE
(0.154,0.197]	2019	0.1333333	15	-0.3406667	0.17550	-0.0421667	FALSE
(0.154,0.197]	2020	0.0833333	12	-0.5541667	0.17550	-0.0921667	FALSE
(0.154,0.197]	2021	0.0000000	3	-1.0000000	0.17550	-0.1755000	FALSE
(0.197,0.24]	2013	0.0909091	11	-0.5772727	0.21850	-0.1275909	FALSE
(0.197,0.24]	2014	0.2500000	24	0.1258333	0.21850	0.0315000	FALSE
(0.197,0.24]	2015	0.2439024	41	0.1065854	0.21850	0.0254024	FALSE
(0.197,0.24]	2016	0.0909091	33	-0.5712121	0.21850	-0.1275909	FALSE
(0.197,0.24]	2017	0.1142857	35	-0.4914286	0.21850	-0.1042143	FALSE
(0.197,0.24]	2018	0.1785714	28	-0.1767857	0.21850	-0.0399286	FALSE
(0.197,0.24]	2019	0.1600000	25	-0.2760000	0.21850	-0.0585000	FALSE
(0.197,0.24]	2020	0.1290323	31	-0.4500000	0.21850	-0.0894677	FALSE
(0.197,0.24]	2021	0.0000000	1	-1.0000000	0.21850	-0.2185000	FALSE
(0.24,0.283]	2013	0.0588235	17	-0.7847059	0.26150	-0.2026765	FALSE
(0.24,0.283]	2014	0.1750000	40	-0.3487500	0.26150	-0.0865000	FALSE
(0.24,0.283]	2015	0.2812500	32	0.0665625	0.26150	0.0197500	FALSE
(0.24,0.283]	2016	0.2127660	47	-0.1972340	0.26150	-0.0487340	FALSE
(0.24,0.283]	2017	0.2500000	48	-0.0447917	0.26150	-0.0115000	FALSE
(0.24,0.283]	2018	0.2619048	42	0.0047619	0.26150	0.0004048	FALSE
(0.24,0.283]	2019	0.2647059	34	-0.0150000	0.26150	0.0032059	FALSE
(0.24,0.283]	2020	0.3333333	42	0.2304762	0.26150	0.0718333	FALSE
(0.24,0.283]	2021	0.2000000	5	-0.2200000	0.26150	-0.0615000	FALSE
(0.283,0.326]	2013	0.1764706	17	-0.4088235	0.30450	-0.1280294	FALSE
(0.283,0.326]	2014	0.2745098	51	-0.1156863	0.30450	-0.0299902	FALSE
(0.283,0.326]	2015	0.3448276	58	0.1263793	0.30450	0.0403276	FALSE
(0.283,0.326]	2016	0.3043478	69	-0.0256522	0.30450	-0.0001522	FALSE
(0.283,0.326]	2017	0.2727273	33	-0.1075758	0.30450	-0.0317727	FALSE
(0.283,0.326]	2018	0.1627907	43	-0.4579070	0.30450	-0.1417093	FALSE
(0.283,0.326]	2019	0.3750000	40	0.2240000	0.30450	0.0705000	FALSE
(0.283,0.326]	2020	0.2857143	63	-0.0673016	0.30450	-0.0187857	FALSE
(0.283,0.326]	2021	0.6666667	6	1.1216667	0.30450	0.3621667	FALSE
(0.326,0.369]	2013	0.3333333	15	-0.0546667	0.34750	-0.0141667	FALSE
(0.326,0.369]	2014	0.4255319	47	0.2155319	0.34750	0.0780319	FALSE
(0.326,0.369]	2015	0.3970588	68	0.1205882	0.34750	0.0495588	FALSE
(0.326,0.369]	2016	0.2500000	56	-0.2896429	0.34750	-0.0975000	FALSE
(0.326,0.369]	2017	0.4523810	42	0.2807143	0.34750	0.1048810	FALSE
(0.326,0.369]	2018	0.3600000	50	0.0230000	0.34750	0.0125000	FALSE
(0.326,0.369]	2019	0.3061224	49	-0.1281633	0.34750	-0.0413776	FALSE
(0.326,0.369]	2020	0.3508772	57	-0.0136842	0.34750	0.0033772	FALSE
(0.326,0.369]	2021	0.3333333	6	-0.0133333	0.34750	-0.0141667	FALSE
(0.369,0.413]	2013	0.5000000	18	0.2916667	0.39100	0.1090000	FALSE
(0.369,0.413]	2014	0.3953488	43	0.0104651	0.39100	0.0043488	FALSE
(0.369,0.413]	2015	0.5454545	66	0.3907576	0.39100	0.1544545	FALSE
(0.369,0.413]	2016	0.5208333	96	0.3155208	0.39100	0.1298333	FALSE
(0.369,0.413]	2017	0.3442623	61	-0.1122951	0.39100	-0.0467377	FALSE
(0.369,0.413]	2018	0.5000000	84	0.2789286	0.39100	0.1090000	FALSE
(0.369,0.413]	2019	0.4931507	73	0.2667123	0.39100	0.1021507	FALSE
(0.369,0.413]	2020	0.3684211	57	-0.0591228	0.39100	-0.0225789	FALSE
(0.369,0.413]	2021	0.1666667	6	-0.5983333	0.39100	-0.2243333	FALSE
(0.413,0.456]	2013	0.5555556	9	0.3000000	0.43450	0.1210556	FALSE
(0.413,0.456]	2014	0.5675676	37	0.3216216	0.43450	0.1330676	FALSE
(0.413,0.456]	2015	0.5151515	66	0.1771212	0.43450	0.0806515	FALSE
(0.413,0.456]	2016	0.5409836	61	0.2350820	0.43450	0.1064836	FALSE
(0.413,0.456]	2017	0.4084507	71	-0.0650704	0.43450	-0.0260493	FALSE
(0.413,0.456]	2018	0.4000000	55	-0.0750909	0.43450	-0.0345000	FALSE
(0.413,0.456]	2019	0.3974359	78	-0.0793590	0.43450	-0.0370641	FALSE
(0.413,0.456]	2020	0.4520548	73	0.0404110	0.43450	0.0175548	FALSE
(0.413,0.456]	2021	0.5714286	7	0.3157143	0.43450	0.1369286	FALSE
(0.456,0.499]	2013	0.4000000	10	-0.1610000	0.47750	-0.0775000	FALSE
(0.456,0.499]	2014	0.3913043	23	-0.1865217	0.47750	-0.0861957	FALSE
(0.456,0.499]	2015	0.5208333	48	0.0939583	0.47750	0.0433333	FALSE
(0.456,0.499]	2016	0.4222222	90	-0.1254444	0.47750	-0.0552778	FALSE
(0.456,0.499]	2017	0.4130435	46	-0.1436957	0.47750	-0.0644565	FALSE
(0.456,0.499]	2018	0.6140351	57	0.3015789	0.47750	0.1365351	FALSE
(0.456,0.499]	2019	0.5616438	73	0.1806849	0.47750	0.0841438	FALSE
(0.456,0.499]	2020	0.4285714	56	-0.1198214	0.47750	-0.0489286	FALSE
(0.456,0.499]	2021	0.5000000	2	0.0200000	0.47750	0.0225000	FALSE

Does the method of victory affect the relationship between odds and outcome? Reduce number of bins (compared to Year comparison above) to stabilize estimates. Graphs do not tell whole story due to number of data points available across bins.

odds_perf_by_method = gauge_over_performance(num_bin = 5, min_bin_size = 30, variable = "Method")

kable(odds_perf_by_method)

Probability_Bin	Dummy	Prop_of_Victory	Size_of_Bin	ROI	Mid_Bin	Over_Performance	Is_Fav
(0.501,0.587]	KO/TKO	0.5240000	250	-0.0403200	0.54400	-0.0200000	TRUE
(0.501,0.587]	M-DEC	0.6666667	9	0.1844444	0.54400	0.1226667	TRUE
(0.501,0.587]	S-DEC	0.4385965	114	-0.2028947	0.54400	-0.1054035	TRUE
(0.501,0.587]	SUB	0.5548387	155	0.0037419	0.54400	0.0108387	TRUE
(0.501,0.587]	U-DEC	0.5419162	334	-0.0062874	0.54400	-0.0020838	TRUE
(0.587,0.674]	KO/TKO	0.6028881	277	-0.0437906	0.63050	-0.0276119	TRUE
(0.587,0.674]	M-DEC	0.6666667	6	0.0666667	0.63050	0.0361667	TRUE
(0.587,0.674]	S-DEC	0.4107143	112	-0.3425893	0.63050	-0.2197857	TRUE
(0.587,0.674]	SUB	0.5144928	138	-0.1839130	0.63050	-0.1160072	TRUE
(0.587,0.674]	U-DEC	0.6454294	361	0.0236842	0.63050	0.0149294	TRUE
(0.674,0.76]	KO/TKO	0.7268722	227	0.0123789	0.71700	0.0098722	TRUE
(0.674,0.76]	M-DEC	0.0000000	1	-1.0000000	0.71700	-0.7170000	TRUE
(0.674,0.76]	S-DEC	0.5714286	63	-0.1866667	0.71700	-0.1455714	TRUE
(0.674,0.76]	SUB	0.7304348	115	0.0180000	0.71700	0.0134348	TRUE
(0.674,0.76]	U-DEC	0.7722420	281	0.0755516	0.71700	0.0552420	TRUE
(0.76,0.846]	KO/TKO	0.8014184	141	-0.0039716	0.80300	-0.0015816	TRUE
(0.76,0.846]	M-DEC	1.0000000	1	0.2600000	0.80300	0.1970000	TRUE
(0.76,0.846]	S-DEC	0.4500000	20	-0.4270000	0.80300	-0.3530000	TRUE
(0.76,0.846]	SUB	0.8426966	89	0.0495506	0.80300	0.0396966	TRUE
(0.76,0.846]	U-DEC	0.9142857	140	0.1424286	0.80300	0.1112857	TRUE
(0.846,0.933]	KO/TKO	0.8510638	47	-0.0374468	0.88950	-0.0384362	TRUE
(0.846,0.933]	S-DEC	1.0000000	3	0.1533333	0.88950	0.1105000	TRUE
(0.846,0.933]	SUB	0.9583333	24	0.0820833	0.88950	0.0688333	TRUE
(0.846,0.933]	U-DEC	0.9393939	33	0.0772727	0.88950	0.0498939	TRUE
(0.0669,0.154]	KO/TKO	0.1489362	47	0.3393617	0.11045	0.0384862	FALSE
(0.0669,0.154]	S-DEC	0.0000000	3	-1.0000000	0.11045	-0.1104500	FALSE
(0.0669,0.154]	SUB	0.0416667	24	-0.7025000	0.11045	-0.0687833	FALSE
(0.0669,0.154]	U-DEC	0.0606061	33	-0.5239394	0.11045	-0.0498439	FALSE
(0.154,0.24]	KO/TKO	0.1985816	141	-0.0098582	0.19700	0.0015816	FALSE
(0.154,0.24]	M-DEC	0.0000000	1	-1.0000000	0.19700	-0.1970000	FALSE
(0.154,0.24]	S-DEC	0.5500000	20	1.6125000	0.19700	0.3530000	FALSE
(0.154,0.24]	SUB	0.1573034	89	-0.2049438	0.19700	-0.0396966	FALSE
(0.154,0.24]	U-DEC	0.0857143	140	-0.5875714	0.19700	-0.1112857	FALSE
(0.24,0.326]	KO/TKO	0.2731278	227	-0.0531718	0.28300	-0.0098722	FALSE
(0.24,0.326]	M-DEC	1.0000000	1	2.0300000	0.28300	0.7170000	FALSE
(0.24,0.326]	S-DEC	0.4285714	63	0.5015873	0.28300	0.1455714	FALSE
(0.24,0.326]	SUB	0.2695652	115	-0.0703478	0.28300	-0.0134348	FALSE
(0.24,0.326]	U-DEC	0.2277580	281	-0.2165836	0.28300	-0.0552420	FALSE
(0.326,0.413]	KO/TKO	0.3971119	277	0.0597473	0.36950	0.0276119	FALSE
(0.326,0.413]	M-DEC	0.3333333	6	-0.0916667	0.36950	-0.0361667	FALSE
(0.326,0.413]	S-DEC	0.5892857	112	0.5446429	0.36950	0.2197857	FALSE
(0.326,0.413]	SUB	0.4855072	138	0.2974638	0.36950	0.1160072	FALSE
(0.326,0.413]	U-DEC	0.3545706	361	-0.0556510	0.36950	-0.0149294	FALSE
(0.413,0.499]	KO/TKO	0.4760000	250	0.0419600	0.45600	0.0200000	FALSE
(0.413,0.499]	M-DEC	0.3333333	9	-0.2344444	0.45600	-0.1226667	FALSE
(0.413,0.499]	S-DEC	0.5614035	114	0.2307018	0.45600	0.1054035	FALSE
(0.413,0.499]	SUB	0.4451613	155	-0.0121935	0.45600	-0.0108387	FALSE
(0.413,0.499]	U-DEC	0.4580838	334	0.0074251	0.45600	0.0020838	FALSE

How does fight finishing method vary with implied probability of vegas odds?

odds_perf_by_method %>%
  dplyr::filter(Is_Fav == T) %>%
  ggplot(aes(x=Mid_Bin, y=Size_of_Bin, group = Dummy, color = Dummy))+
  geom_point()+
  geom_smooth(se=F)+
  ylab("Count")+
  xlab("Adjusted Implied Probability (%)")+
  ggtitle("Favorites")+
  labs(color="Method")

odds_perf_by_method %>%
  dplyr::filter(Is_Fav == F) %>%
  ggplot(aes(x=Mid_Bin, y=Size_of_Bin, group = Dummy, color = Dummy))+
  geom_point()+
  geom_smooth(se=F)+
  ylab("Count")+
  xlab("Adjusted Implied Probability (%)")+
  ggtitle("Underdogs")+
  labs(color="Method")

Calculate the proportion of fights that end by various methods as a function of implied probability of fight odds.

odds_perf_by_method %>%
  group_by(Is_Fav, Mid_Bin) %>%
  summarise(Total_Count = sum(Size_of_Bin)) -> total_count

odds_perf_by_method %>%
  group_by(Is_Fav, Mid_Bin, Dummy) %>%
  summarise(Count= Size_of_Bin) -> single_count

method_count_by_odds = merge(single_count, total_count)
method_count_by_odds %>%
  dplyr::mutate(Method_Prop = Count / Total_Count ) -> method_count_by_odds

method_count_by_odds %>%
  dplyr::filter(Is_Fav == T) %>%
  ggplot(aes(x=Mid_Bin*100, y=Method_Prop*100, group = Dummy, color=Dummy))+
  geom_point()+
  geom_smooth(se=F)+
  ylab("Probability of Method (%)")+
  xlab("Adjusted Implied Probability (%)")+
  ggtitle("Favorites")+
  labs(color="Method")

method_count_by_odds %>%
  dplyr::filter(Is_Fav == F) %>%
  ggplot(aes(x=Mid_Bin*100, y=Method_Prop*100, group = Dummy, color=Dummy))+
  geom_point()+
  geom_smooth(se=F)+
  ylab("Probability of Method (%)")+
  xlab("Adjusted Implied Probability (%)")+
  ggtitle("Underdogs")+
  labs(color="Method")

Fighter Odds

Convert short back to long format.

df_odds_short %>%
  gather(key = "Result", value = "NAME", Loser:Winner) -> df_odds_long

Identify if fighter was favortie to assign proper Implied Probability.

df_odds_long %>%
  dplyr::mutate(
  Was_Favorite = ifelse(
    (Favorite_was_Winner & (Result == "Winner")) | (!Favorite_was_Winner & (Result == "Loser"))
    , T
    , F
  )
) -> df_odds_long

summary(df_odds_long[, "Was_Favorite"])

##    Mode   FALSE    TRUE 
## logical    2941    2941

Identify Implied Probability of each fighter.

df_odds_long %>%
  dplyr::mutate(
    Implied_Probability = ifelse(
      Was_Favorite
      , Favorite_Probability
      , Underdog_Probability
    )
    , Adjusted_Implied_Probability = ifelse(
      Was_Favorite
      , Adjusted_Favorite_Probability
      , Adjusted_Underdog_Probability
    )
  ) -> df_odds_long

summary(df_odds_long[,c("Implied_Probability", "Adjusted_Implied_Probability")])

##  Implied_Probability Adjusted_Implied_Probability
##  Min.   :0.07117     Min.   :0.0673              
##  1st Qu.:0.35971     1st Qu.:0.3593              
##  Median :0.50000     Median :0.5000              
##  Mean   :0.50223     Mean   :0.5000              
##  3rd Qu.:0.64103     3rd Qu.:0.6407              
##  Max.   :0.94340     Max.   :0.9327

Get rid of useless columns.

df_odds_long %>% dplyr::select(
  c(
    NAME
    , Event
    , Date
    , Result
    , Implied_Probability
    , Adjusted_Implied_Probability
  )
) -> df_odds_long

Summarize data.

summary(df_odds_long)

##      NAME                                            Event     
##  Length:5882        UFC Fight Night: Chiesa vs. Magny   :  28  
##  Class :character   UFC Fight Night: Poirier vs. Gaethje:  28  
##  Mode  :character   UFC Fight Night: Whittaker vs. Till :  28  
##                     UFC 190: Rousey vs Correia          :  26  
##                     UFC 193: Rousey vs Holm             :  26  
##                     UFC 210: Cormier vs. Johnson 2      :  26  
##                     (Other)                             :5720  
##       Date               Result          Implied_Probability
##  Min.   :2013-04-27   Length:5882        Min.   :0.07117    
##  1st Qu.:2015-08-23   Class :character   1st Qu.:0.35971    
##  Median :2017-05-13   Mode  :character   Median :0.50000    
##  Mean   :2017-06-17                      Mean   :0.50223    
##  3rd Qu.:2019-04-20                      3rd Qu.:0.64103    
##  Max.   :2021-02-06                      Max.   :0.94340    
##                                                             
##  Adjusted_Implied_Probability
##  Min.   :0.0673              
##  1st Qu.:0.3593              
##  Median :0.5000              
##  Mean   :0.5000              
##  3rd Qu.:0.6407              
##  Max.   :0.9327              
## 

Add Win and Log Odds columns.

df_odds_long %>%
  dplyr::mutate(
    Won = ifelse(Result == "Winner", T, F)
    , Logit_Prob = qlogis(Implied_Probability)
    , Adjusted_Logit_Prob = qlogis(Adjusted_Implied_Probability)
  ) -> df_odds_long

summary(df_odds_long[, c("Won", "Logit_Prob", "Adjusted_Logit_Prob")])

##     Won            Logit_Prob       Adjusted_Logit_Prob
##  Mode :logical   Min.   :-2.56879   Min.   :-2.6289    
##  FALSE:2941      1st Qu.:-0.57661   1st Qu.:-0.5786    
##  TRUE :2941      Median : 0.00000   Median : 0.0000    
##                  Mean   : 0.01186   Mean   : 0.0000    
##                  3rd Qu.: 0.57982   3rd Qu.: 0.5786    
##                  Max.   : 2.81341   Max.   : 2.6289

Get performance and odds for each fighter using Adjusted Implied Probability.

df_odds_long %>%
  dplyr::group_by(NAME) %>%
  dplyr::summarise(
    Exp_Prop = mean(Adjusted_Implied_Probability)
    , Logit_Exp_Prop = mean(Adjusted_Logit_Prob)
    , Win_Prop = mean(Won)
    , N_Fights = length(Won)
    , Over_Performance = Win_Prop - Exp_Prop
    , Logit_Over = qlogis(Win_Prop) - Logit_Exp_Prop
    , Back_Trans_Exp = plogis(Logit_Exp_Prop)
  ) -> df_odds_long_fighters

Look at which fights were included in the dataset for a specific fighter.

df_odds_long %>%
  dplyr::filter(NAME == "Roxanne Modafferi") -> df_roxy

kable(df_roxy)

NAME	Event	Date	Result	Implied_Probability	Adjusted_Implied_Probability	Won	Logit_Prob	Adjusted_Logit_Prob
Roxanne Modafferi	UFC Fight Night: Dos Anjos vs. Edwards	2019-07-20	Loser	0.4385965	0.4399686	FALSE	-0.2468601	-0.2412893
Roxanne Modafferi	UFC Fight Night: Blaydes vs. Volkov	2020-06-20	Loser	0.4651163	0.4694002	FALSE	-0.1397619	-0.1225522
Roxanne Modafferi	The Ultimate Fighter: Team Rousey vs. Team Tate Finale	2013-11-30	Loser	0.1937984	0.2000738	FALSE	-1.4255151	-1.3858330
Roxanne Modafferi	UFC Fight Night: Chiesa vs. Magny	2021-01-20	Loser	0.2777778	0.2657546	FALSE	-0.9555114	-1.0162702
Roxanne Modafferi	UFC 230: Cormier vs. Lewis	2018-11-03	Loser	0.1724138	0.1695402	FALSE	-1.5686159	-1.5888892
Roxanne Modafferi	UFC Fight Night: Waterson vs. Hill	2020-09-12	Winner	0.2777778	0.2657546	TRUE	-0.9555114	-1.0162702
Roxanne Modafferi	UFC Fight Night: Overeem vs. Oleinik	2019-04-20	Winner	0.2666667	0.2683698	TRUE	-1.0116009	-1.0029092
Roxanne Modafferi	UFC 246: McGregor vs. Cowboy	2020-01-18	Winner	0.1246883	0.1159156	TRUE	-1.9487632	-2.0316905

Top 10 over-performers with at least 5 fights where number of fights is simply number available in the dataset (see above).

df_odds_long_fighters %>%
  dplyr::filter(N_Fights >= 5) %>%
  dplyr::arrange(desc(Over_Performance)) %>%
  head(10) -> df_top_over_perform
# now with logit
df_odds_long_fighters %>%
  dplyr::filter(N_Fights >= 5) %>%
  dplyr::arrange(desc(Logit_Over)) %>%
  head(10) -> df_top_over_perform_logit

kable(df_top_over_perform, caption = "Top 10 Over Performers with at least 5 Fights")  

Top 10 Over Performers with at least 5 Fights
NAME	Exp_Prop	Logit_Exp_Prop	Win_Prop	N_Fights	Over_Performance	Logit_Over	Back_Trans_Exp
Leonardo Santos	0.4454486	-0.2777403	1.0000000	5	0.5545514	Inf	0.4310079
Robert Whittaker	0.4996490	0.0065223	1.0000000	10	0.5003510	Inf	0.5016306
Brandon Moreno	0.4399010	-0.2686787	0.8571429	7	0.4172418	2.060438	0.4332315
Arnold Allen	0.5867006	0.3757653	1.0000000	6	0.4132994	Inf	0.5928513
Brian Ortega	0.4823820	-0.0684020	0.8750000	8	0.3926180	2.014312	0.4829062
Alexander Volkanovski	0.6101305	0.5177296	1.0000000	8	0.3898695	Inf	0.6266167
Bryan Caraway	0.4194964	-0.3415057	0.8000000	5	0.3805036	1.727800	0.4154438
Yan Xiaonan	0.6240270	0.5391744	1.0000000	5	0.3759730	Inf	0.6316203
Amanda Nunes	0.5507052	0.2811614	0.9166667	12	0.3659615	2.116734	0.5698309
Joaquim Silva	0.4575904	-0.1889315	0.8000000	5	0.3424096	1.575226	0.4529071

kable(df_top_over_perform_logit, caption = "Logit Scale: Top 10 Over Performers with at least 5 Fights")

Logit Scale: Top 10 Over Performers with at least 5 Fights
NAME	Exp_Prop	Logit_Exp_Prop	Win_Prop	N_Fights	Over_Performance	Logit_Over	Back_Trans_Exp
Alexander Volkanovski	0.6101305	0.5177296	1	8	0.3898695	Inf	0.6266167
Arnold Allen	0.5867006	0.3757653	1	6	0.4132994	Inf	0.5928513
Demetrious Johnson	0.8609483	1.8803058	1	9	0.1390517	Inf	0.8676462
Israel Adesanya	0.7002859	0.8678323	1	7	0.2997141	Inf	0.7042944
Jon Jones	0.7892464	1.3952860	1	7	0.2107536	Inf	0.8014348
Kamaru Usman	0.6925314	0.8901129	1	10	0.3074686	Inf	0.7089135
Khabib Nurmagomedov	0.7598282	1.2002959	1	9	0.2401718	Inf	0.7685774
Kyung Ho Kang	0.6633446	0.7104716	1	6	0.3366554	Inf	0.6705054
Leonardo Santos	0.4454486	-0.2777403	1	5	0.5545514	Inf	0.4310079
Petr Yan	0.8144451	1.5256823	1	5	0.1855549	Inf	0.8213737

Top 10 under performers with at least 5 fights.

df_odds_long_fighters %>%
  dplyr::filter(N_Fights >= 5) %>%
  dplyr::arrange(Over_Performance) %>%
  head(10) -> df_top_under_perform
# with logit
df_odds_long_fighters %>%
  dplyr::filter(N_Fights >= 5) %>%
  dplyr::arrange(Logit_Over) %>%
  head(10) -> df_top_under_perform_logit

kable(df_top_under_perform, caption = "Top 10 Under Performers with at least 5 Fights")

Top 10 Under Performers with at least 5 Fights
NAME	Exp_Prop	Logit_Exp_Prop	Win_Prop	N_Fights	Over_Performance	Logit_Over	Back_Trans_Exp
Kailin Curran	0.5404624	0.1811195	0.1428571	7	-0.3976052	-1.972879	0.5451565
Joshua Burkman	0.3760531	-0.5400292	0.0000000	7	-0.3760531	-Inf	0.3681808
Hyun Gyu Lim	0.5720479	0.3587458	0.2000000	5	-0.3720479	-1.745040	0.5887368
Alexander Gustafsson	0.6271431	0.5898086	0.2857143	7	-0.3414288	-1.506099	0.6433212
Gray Maynard	0.5072171	0.0245074	0.1666667	6	-0.3405504	-1.633945	0.5061265
Junior Albini	0.5325358	0.1453508	0.2000000	5	-0.3325358	-1.531645	0.5362739
Rashad Evans	0.5236378	0.1041933	0.2000000	5	-0.3236378	-1.490488	0.5260248
Andrea Lee	0.7055647	0.8841184	0.4000000	5	-0.3055647	-1.289583	0.7076749
Johny Hendricks	0.5509110	0.2250002	0.2500000	8	-0.3009110	-1.323613	0.5560140
Anderson Silva	0.4249640	-0.3485824	0.1428571	7	-0.2821068	-1.443177	0.4137262

kable(df_top_under_perform_logit, caption ="Logit Scale: Top 10 Under Performers with at least 5 Fights" )

Logit Scale: Top 10 Under Performers with at least 5 Fights
NAME	Exp_Prop	Logit_Exp_Prop	Win_Prop	N_Fights	Over_Performance	Logit_Over	Back_Trans_Exp
Joshua Burkman	0.3760531	-0.5400292	0.0000000	7	-0.3760531	-Inf	0.3681808
Kailin Curran	0.5404624	0.1811195	0.1428571	7	-0.3976052	-1.972879	0.5451565
Hyun Gyu Lim	0.5720479	0.3587458	0.2000000	5	-0.3720479	-1.745040	0.5887368
Gray Maynard	0.5072171	0.0245074	0.1666667	6	-0.3405504	-1.633945	0.5061265
Junior Albini	0.5325358	0.1453508	0.2000000	5	-0.3325358	-1.531645	0.5362739
Alexander Gustafsson	0.6271431	0.5898086	0.2857143	7	-0.3414288	-1.506099	0.6433212
Rashad Evans	0.5236378	0.1041933	0.2000000	5	-0.3236378	-1.490488	0.5260248
Anderson Silva	0.4249640	-0.3485824	0.1428571	7	-0.2821068	-1.443177	0.4137262
Ronda Rousey	0.8322407	1.8077087	0.6000000	5	-0.2322407	-1.402244	0.8590847
Brad Pickett	0.3826965	-0.5461462	0.1250000	8	-0.2576965	-1.399764	0.3667590

Most favored fighters with at least 5 fights

df_odds_long_fighters %>%
  dplyr::filter(N_Fights >= 5) %>%
  dplyr::arrange(desc(Exp_Prop)) %>%
  head(10) -> df_most_fav
# with logit
df_odds_long_fighters %>%
  dplyr::filter(N_Fights >= 5) %>%
  dplyr::arrange(desc(Logit_Exp_Prop)) %>%
  head(10) -> df_most_fav_logit

kable(df_most_fav)

NAME	Exp_Prop	Logit_Exp_Prop	Win_Prop	N_Fights	Over_Performance	Logit_Over	Back_Trans_Exp
Demetrious Johnson	0.8609483	1.880306	1.0000000	9	0.1390517	Inf	0.8676462
Ronda Rousey	0.8322407	1.807709	0.6000000	5	-0.2322407	-1.4022436	0.8590847
Cristiane Justino	0.8252814	1.703941	0.8571429	7	0.0318615	0.0878185	0.8460487
Petr Yan	0.8144451	1.525682	1.0000000	5	0.1855549	Inf	0.8213737
Zabit Magomedsharipov	0.8050291	1.485833	1.0000000	6	0.1949709	Inf	0.8154520
Tatiana Suarez	0.7972730	1.391506	1.0000000	5	0.2027270	Inf	0.8008325
Jon Jones	0.7892464	1.395286	1.0000000	7	0.2107536	Inf	0.8014348
Magomed Ankalaev	0.7647264	1.211622	0.8000000	5	0.0352736	0.1746719	0.7705859
Khabib Nurmagomedov	0.7598282	1.200296	1.0000000	9	0.2401718	Inf	0.7685774
Mairbek Taisumov	0.7342042	1.072182	0.7777778	9	0.0435736	0.1805805	0.7450117

kable(df_most_fav_logit)

NAME	Exp_Prop	Logit_Exp_Prop	Win_Prop	N_Fights	Over_Performance	Logit_Over	Back_Trans_Exp
Demetrious Johnson	0.8609483	1.880306	1.0000000	9	0.1390517	Inf	0.8676462
Ronda Rousey	0.8322407	1.807709	0.6000000	5	-0.2322407	-1.4022436	0.8590847
Cristiane Justino	0.8252814	1.703941	0.8571429	7	0.0318615	0.0878185	0.8460487
Petr Yan	0.8144451	1.525682	1.0000000	5	0.1855549	Inf	0.8213737
Zabit Magomedsharipov	0.8050291	1.485833	1.0000000	6	0.1949709	Inf	0.8154520
Jon Jones	0.7892464	1.395286	1.0000000	7	0.2107536	Inf	0.8014348
Tatiana Suarez	0.7972730	1.391506	1.0000000	5	0.2027270	Inf	0.8008325
Magomed Ankalaev	0.7647264	1.211622	0.8000000	5	0.0352736	0.1746719	0.7705859
Khabib Nurmagomedov	0.7598282	1.200296	1.0000000	9	0.2401718	Inf	0.7685774
Mairbek Taisumov	0.7342042	1.072182	0.7777778	9	0.0435736	0.1805805	0.7450117

Least favored fighters with at least 5 fights.

df_odds_long_fighters %>%
  dplyr::filter(N_Fights >= 5) %>%
  dplyr::arrange(Exp_Prop) %>%
  head(10) -> df_least_fav
# with logit
df_odds_long_fighters %>%
  dplyr::filter(N_Fights >= 5) %>%
  dplyr::arrange(Logit_Exp_Prop) %>%
  head(10) -> df_least_fav_logit

kable(df_least_fav, caption = "Top 10 Least Favored Fighters with at least 5 Fights")

Top 10 Least Favored Fighters with at least 5 Fights
NAME	Exp_Prop	Logit_Exp_Prop	Win_Prop	N_Fights	Over_Performance	Logit_Over	Back_Trans_Exp
Roxanne Modafferi	0.2743472	-1.0507130	0.3750000	8	0.1006528	0.5398874	0.2590882
Daniel Kelly	0.2769185	-0.9737988	0.6000000	10	0.3230815	1.3792639	0.2741240
Jessica Aguilar	0.2859562	-0.9707245	0.2000000	5	-0.0859562	-0.4155698	0.2747361
Dan Henderson	0.2887631	-0.9309147	0.5000000	6	0.2112369	0.9309147	0.2827392
Thibault Gouti	0.2982523	-0.9293738	0.1666667	6	-0.1315857	-0.6800641	0.2830518
Anthony Perosh	0.2985353	-0.9384366	0.4000000	5	0.1014647	0.5329715	0.2812162
Leslie Smith	0.3018944	-0.9783537	0.4000000	5	0.0981056	0.5728886	0.2732186
Garreth McLellan	0.3067211	-0.8343938	0.2000000	5	-0.1067211	-0.5519005	0.3027168
Yaotzin Meza	0.3076578	-0.8634526	0.4000000	5	0.0923422	0.4579875	0.2966185
Takanori Gomi	0.3093210	-0.8732262	0.2000000	5	-0.1093210	-0.5130682	0.2945834

kable(df_least_fav_logit, caption = "Logit Scale: Top 10 Least Favored Fighters with at least 5 Fights")

Logit Scale: Top 10 Least Favored Fighters with at least 5 Fights
NAME	Exp_Prop	Logit_Exp_Prop	Win_Prop	N_Fights	Over_Performance	Logit_Over	Back_Trans_Exp
Roxanne Modafferi	0.2743472	-1.0507130	0.3750000	8	0.1006528	0.5398874	0.2590882
Leslie Smith	0.3018944	-0.9783537	0.4000000	5	0.0981056	0.5728886	0.2732186
Daniel Kelly	0.2769185	-0.9737988	0.6000000	10	0.3230815	1.3792639	0.2741240
Jessica Aguilar	0.2859562	-0.9707245	0.2000000	5	-0.0859562	-0.4155698	0.2747361
Anthony Perosh	0.2985353	-0.9384366	0.4000000	5	0.1014647	0.5329715	0.2812162
Dan Henderson	0.2887631	-0.9309147	0.5000000	6	0.2112369	0.9309147	0.2827392
Thibault Gouti	0.2982523	-0.9293738	0.1666667	6	-0.1315857	-0.6800641	0.2830518
Takanori Gomi	0.3093210	-0.8732262	0.2000000	5	-0.1093210	-0.5130682	0.2945834
Yaotzin Meza	0.3076578	-0.8634526	0.4000000	5	0.0923422	0.4579875	0.2966185
Julian Erosa	0.3202609	-0.8581763	0.2000000	5	-0.1202609	-0.5281181	0.2977205

Examine odds for specific fighters.

# Israel Adesanya
df_odds_long_fighters %>% dplyr::filter(NAME == "Israel Adesanya") -> df_Izzy
kable(df_Izzy)

NAME	Exp_Prop	Logit_Exp_Prop	Win_Prop	N_Fights	Over_Performance	Logit_Over	Back_Trans_Exp
Israel Adesanya	0.7002859	0.8678323	1	7	0.2997141	Inf	0.7042944

# Anthony Smith
df_odds_long_fighters %>% dplyr::filter(NAME == "Anthony Smith") -> df_Smith
kable(df_Smith)

NAME	Exp_Prop	Logit_Exp_Prop	Win_Prop	N_Fights	Over_Performance	Logit_Over	Back_Trans_Exp
Anthony Smith	0.4539811	-0.2286408	0.6428571	14	0.1888761	0.8164275	0.4430875