1

Yesterday, I posted this question about Rock Paper Scissors: Mathematics of Rock Paper Scissors game

I recently thought of the following questions:

Suppose there are 1000 players at the start. At the start of the first round, pairs of players are made Each pair of players plays only 1 game (best of 3). The winners move to the next round, the loser is instantly eliminated.

  • Q1: On average, how many ties can we expect in this game? Can we know the variance?
  • Q2: On average, at what round and at which turn will the 1000th tie happen? Can we know the variance?

I was not sure how to analyze these problems, so I tried to simulate them:

enter image description here enter image description here

Can someone please show me how to mathematically answer these questions?


Here is the computer code (R) used for these simulations:

library(ggplot2)
library(dplyr)
library(viridis)
library(gridExtra)
library(tidyr)

analyze_rps_tournament <- function(n_players, n_sims = 1000, target_tie = 250, n_trajectories = 100) { simulate_single_match <- function() { games <- 0 ties <- 0 repeat { games <- games + 1 choices <- sample(1:3, 2, replace = TRUE) if (choices[1] == choices[2]) { ties <- ties + 1 } else { break } } return(list(games = games, ties = ties)) }

simulate_tournament &lt;- function(n_players) {
    if (n_players &lt; 2 || n_players %% 2 != 0) {
        stop(&quot;Number of players must be even and at least 2&quot;)
    }

    players &lt;- 1:n_players
    round_num &lt;- 1
    total_games &lt;- 0
    total_ties &lt;- 0
    games_by_round &lt;- list()
    ties_by_round &lt;- list()

    while (length(players) &gt; 1) {
        round_games &lt;- 0
        round_ties &lt;- 0
        winners &lt;- c()

        for (i in seq(1, length(players), 2)) {
            match_result &lt;- simulate_single_match()
            round_games &lt;- round_games + match_result<span class="math-container">$games
            round_ties &lt;- round_ties + match_result$</span>ties
            winners &lt;- c(winners, players[i + sample(0:1, 1)])
        }

        games_by_round[[round_num]] &lt;- round_games
        ties_by_round[[round_num]] &lt;- round_ties
        total_games &lt;- total_games + round_games
        total_ties &lt;- total_ties + round_ties
        players &lt;- winners
        round_num &lt;- round_num + 1
    }

    return(list(
        games_by_round = games_by_round,
        ties_by_round = ties_by_round,
        total_games = total_games,
        total_ties = total_ties,
        n_rounds = round_num - 1
    ))
}

cat(&quot;Running general tournament statistics...\n&quot;)

results &lt;- list()
for(i in 1:n_sims) {
    if(i %% 100 == 0) cat(&quot;Running simulation&quot;, i, &quot;of&quot;, n_sims, &quot;\n&quot;)
    results[[i]] &lt;- simulate_tournament(n_players)
}

games_df &lt;- data.frame(
    round = rep(1:results[[1]]<span class="math-container">$n_rounds, n_sims),
    games = unlist(lapply(results, function(x) unlist(x$</span>games_by_round))),
    simulation = rep(1:n_sims, each = results[[1]]$n_rounds)
)

ties_df &lt;- data.frame(
    round = rep(1:results[[1]]<span class="math-container">$n_rounds, n_sims),
    ties = unlist(lapply(results, function(x) unlist(x$</span>ties_by_round))),
    simulation = rep(1:n_sims, each = results[[1]]$n_rounds)
)

game_trajectories &lt;- games_df %&gt;%
    group_by(simulation) %&gt;%
    mutate(cum_games = cumsum(games)) %&gt;%
    ungroup()

tie_trajectories &lt;- ties_df %&gt;%
    group_by(simulation) %&gt;%
    mutate(cum_ties = cumsum(ties)) %&gt;%
    ungroup()

total_ties &lt;- sapply(results, function(x) x<span class="math-container">$total_ties)
total_games &lt;- sapply(results, function(x) x$</span>total_games)
ties_mean &lt;- mean(total_ties)
ties_sd &lt;- sd(total_ties)
games_mean &lt;- mean(total_games)
games_sd &lt;- sd(total_games)

cat(&quot;\nTheoretical Expectations:\n&quot;)
cat(sprintf(&quot;Expected games per match: 1.5\n&quot;))
cat(sprintf(&quot;Expected total games: %.1f\n&quot;, (n_players - 1) * 1.5))
cat(sprintf(&quot;Expected total ties: %.1f\n&quot;, (n_players - 1) * 0.5))

cat(&quot;\nSimulation Results:\n&quot;)
cat(sprintf(&quot;Mean total games: %.1f ± %.1f\n&quot;, games_mean, games_sd))
cat(sprintf(&quot;Mean total ties: %.1f ± %.1f\n&quot;, ties_mean, ties_sd))

sampled_sims &lt;- sample(unique(game_trajectories$simulation), n_trajectories)
sim_subtitle &lt;- sprintf(&quot;%d Players, %d Simulations&quot;, n_players, n_sims)
common_theme &lt;- theme_minimal() + 
    theme(axis.text.x = element_text(angle = 0),
          panel.grid.minor = element_blank())

p1 &lt;- ggplot() +
    geom_line(data = subset(tie_trajectories, simulation %in% sampled_sims),
              aes(x = factor(round), y = cum_ties, group = simulation),
              color = &quot;#440154FF&quot;, alpha = 0.1) +
    stat_summary(data = tie_trajectories,
                aes(x = factor(round), y = cum_ties),
                fun = mean, geom = &quot;line&quot;,
                color = &quot;red&quot;, size = 1, group = 1) +
    labs(title = &quot;Cumulative Ties by Round&quot;,
         subtitle = sprintf(&quot;%s\nMean final: %.1f ± %.1f ties&quot;, 
                          sim_subtitle, ties_mean, ties_sd),
         x = &quot;Round Number&quot;,
         y = &quot;Cumulative Number of Ties&quot;) +
    common_theme

p2 &lt;- ggplot() +
    geom_line(data = subset(game_trajectories, simulation %in% sampled_sims),
              aes(x = factor(round), y = cum_games, group = simulation),
              color = &quot;#238A8DFF&quot;, alpha = 0.1) +
    stat_summary(data = game_trajectories,
                aes(x = factor(round), y = cum_games),
                fun = mean, geom = &quot;line&quot;,
                color = &quot;red&quot;, size = 1, group = 1) +
    labs(title = &quot;Cumulative Games by Round&quot;,
         subtitle = sprintf(&quot;%s\nMean final: %.1f ± %.1f games&quot;, 
                          sim_subtitle, games_mean, games_sd),
         x = &quot;Round Number&quot;,
         y = &quot;Cumulative Number of Games&quot;) +
    common_theme

p3 &lt;- ggplot(ties_df, aes(x = factor(round), y = ties)) +
    geom_boxplot(fill = &quot;#440154FF&quot;, alpha = 0.6, outlier.alpha = 0.3) +
    stat_summary(fun = mean, geom = &quot;point&quot;, shape = 18, size = 3, color = &quot;red&quot;) +
    labs(title = &quot;Distribution of Ties by Round&quot;,
         subtitle = sprintf(&quot;%s\nRounds 1-%d, â—† marks mean&quot;, 
                          sim_subtitle, results[[1]]$n_rounds),
         x = &quot;Round Number&quot;,
         y = &quot;Number of Ties&quot;) +
    common_theme

p4 &lt;- ggplot(games_df, aes(x = factor(round), y = games)) +
    geom_boxplot(fill = &quot;#238A8DFF&quot;, alpha = 0.6, outlier.alpha = 0.3) +
    stat_summary(fun = mean, geom = &quot;point&quot;, shape = 18, size = 3, color = &quot;red&quot;) +
    labs(title = &quot;Distribution of Games by Round&quot;,
         subtitle = sprintf(&quot;%s\nRounds 1-%d, â—† marks mean&quot;, 
                          sim_subtitle, results[[1]]$n_rounds),
         x = &quot;Round Number&quot;,
         y = &quot;Number of Games&quot;) +
    common_theme

grid.arrange(p1, p2, p3, p4, ncol = 2)

cat(sprintf(&quot;\nAnalyzing location of %dth tie...\n&quot;, target_tie))

tie_locations &lt;- data.frame(
    simulation = integer(),
    round = integer(),
    game = integer()
)

for(i in 1:n_sims) {
    if(i %% 100 == 0) cat(&quot;Running simulation&quot;, i, &quot;of&quot;, n_sims, &quot;\n&quot;)

    players &lt;- 1:n_players
    round_num &lt;- 1
    total_ties &lt;- 0
    game_num &lt;- 0
    found_target &lt;- FALSE

    while(length(players) &gt; 1 &amp;&amp; !found_target) {
        n_matches &lt;- length(players) / 2

        for(match in 1:n_matches) {
            repeat {
                game_num &lt;- game_num + 1
                choices &lt;- sample(1:3, 2, replace = TRUE)

                if(choices[1] == choices[2]) {
                    total_ties &lt;- total_ties + 1

                    if(total_ties == target_tie) {
                        tie_locations &lt;- rbind(tie_locations,
                            data.frame(
                                simulation = i,
                                round = round_num,
                                game = game_num
                            )
                        )
                        found_target &lt;- TRUE
                        break
                    }
                } else {
                    break
                }
            }
            if(found_target) break
        }
        players &lt;- players[seq(1, length(players), 2)]
        round_num &lt;- round_num + 1
    }
}

if(nrow(tie_locations) &gt; 0) {
    avg_game &lt;- mean(tie_locations<span class="math-container">$game)
    avg_round &lt;- mean(tie_locations$</span>round)
    sd_game &lt;- sd(tie_locations<span class="math-container">$game)
    sd_round &lt;- sd(tie_locations$</span>round)

    cat(sprintf(&quot;\nStatistics for %dth Tie:\n&quot;, target_tie))
    cat(sprintf(&quot;Average Game: %.1f (SD: %.1f)\n&quot;, avg_game, sd_game))
    cat(sprintf(&quot;Average Round: %.1f (SD: %.1f)\n&quot;, avg_round, sd_round))

    heatmap_data &lt;- tie_locations %&gt;%
        group_by(round, game) %&gt;%
        summarise(count = n(), .groups = 'drop') %&gt;%
        mutate(percentage = count / n_sims * 100)

    game_breaks &lt;- pretty(heatmap_data$game)

    p_heatmap &lt;- ggplot(heatmap_data, aes(x = game, y = factor(round), fill = percentage)) +
        geom_tile() +
        scale_fill_viridis_c(
            name = &quot;% of\nSimulations&quot;,
            option = &quot;viridis&quot;  # Using viridis color scheme (blue-green-yellow)
        ) +
        scale_x_continuous(
            breaks = game_breaks,
            labels = game_breaks
        ) +
        labs(
            title = sprintf(&quot;Location of %dth Tie in Tournament&quot;, target_tie),
            subtitle = sprintf(&quot;%d Players, %d Simulations\nMean Game: %.1f (±%.1f), Mean Round: %.1f (±%.1f)&quot;, 
                             n_players, n_sims, avg_game, sd_game, avg_round, sd_round),
            x = &quot;Game Number&quot;,
            y = &quot;Round Number&quot;
        ) +
        theme_minimal() +
        theme(
            panel.grid = element_blank(),
            axis.text.x = element_text(angle = 45, hjust = 1),
            legend.position = &quot;right&quot;,
            plot.title = element_text(hjust = 0.5),
            plot.subtitle = element_text(hjust = 0.5)
        )

    print(p_heatmap)
} else {
    cat(sprintf(&quot;No simulations reached %d ties\n&quot;, target_tie))
}

}

set.seed(42) analyze_rps_tournament( n_players = 1000,
n_sims = 1000,
target_tie = 250,
n_trajectories = 100 )

farrow90
  • 636

1 Answers1

3

A tie occurs with 1/3 probability. There is a 2/3 chance of a winner per game, so each match consists of an average of 1.5 games. There are 999 matches in total, since one person gets eliminated per match and a total of 999 people must be eliminated. Thus, there are on average 1498.5 games, and 499.5 ties. The 1000th tie is very unlikely to occur at all in the tournament.

  • thank you for your answer! it seems using your logic my simulation is incorrect? – farrow90 Nov 10 '24 at 22:34
  • This answer is correct given that the games are "best of 1", but the question was edited to specify that "game" means "best of 3". Given that, the simulations look reasonable to me. – Ravi Fernando Nov 19 '24 at 05:42