kilometer’s

a junk space

[研究関連]  [R関連] [書評] [その他]

home


Cauchy Distribution

概念

平均が収束しない理由: ゼロに近づくのと同じ程度、∞に近づくから

ライブラリ読み込み

library(tidyverse)
library(patchwork)
theme_set(theme_classic())

サンプリング

set.seed(71)

N <- 5000

df_cauchy <-
  tibble(N = 1:N, theta = runif(N, 0, 2 * pi)) %>%
  mutate(val = 1 / tan(theta / 2)) %>% 
  mutate(mean = cumsum(val) / N) %>% 
  mutate(x = sin(theta),
         y = cos(theta))

単位円描画用のデータフレーム(θの取り方の問題でx,yはあえて逆にしてある)

en <-
  tibble(theta = seq(0, 2 * pi, length = 1000)) %>% 
  mutate(y = cos(theta), x = sin(theta))

描画

.alpha <- 0.05  # あれこれの透明度
.l <- 6         # 軸範囲(±)

i <- 5000    # 描画するサンプル番号

# フィルタ
dat_i <-
  df_cauchy %>% 
  filter(N <= i)

dat_tail <-
  df_cauchy %>% 
  filter(N == i)

# 単位円と3点の描画
g_plot <-
  ggplot(data = dat_i) +
  aes(x = x, y = y) +
  geom_hline(yintercept = 0, color = "darkgrey") +
  geom_vline(xintercept = 0, color = "darkgrey") +
  # 赤破線
  geom_vline(data = dat_tail, 
             aes(xintercept = val), 
             color = "red", linetype = "dotted") +
  # 単位円
  geom_path(data = en, color = "grey") +
  # 該当する角度まで青の円弧
  geom_path(data = en %>% filter(theta <= dat_tail$theta),
            color = "blue", size = 0.5) +
  # 原点-青点の線分
  geom_segment(data = dat_tail,
               xend = 0, yend = 0, color = "skyblue") +
  # 青点-黒点
  geom_segment(data = dat_tail,
               xend = 0, yend = 1, color = "pink") +
  # 赤点-黒点
  geom_segment(data = dat_tail,
               aes(x = val),
               y = 0, xend = 0, yend = 1, color = "pink") +
  # 赤線(赤点がx範囲外の時):yがy範囲外なら描画されない
  geom_segment(data = dat_tail,
               aes(x = x / abs(x) * .l, y = 1 - (1 - y) / abs(x) * .l),
               xend = 0, yend = 1, color = "pink") +
  # 黒点
  geom_point(x = 0, y = 1) +
  # 青点
  geom_point(data = dat_tail,
             color = "blue") +
  # これまでの赤点の重ね書き
  geom_point(aes(x = val, y = 0), 
             color = "red", alpha = .alpha, size = 0.5) +
  # 赤点
  geom_point(data = dat_tail,
             aes(x = val, y = 0), 
             color = "red", size = 1.5) +
  # ラベル
  geom_text(data = dat_tail,
            aes(label = str_c("N=", N)),
            x = -.l, y = 1, hjust = 0, vjust = 1) +
  scale_x_continuous(limits = c(-.l, .l)) +
  scale_y_continuous(limits = c(-1, 1)) +
  coord_fixed()

# thetaの確率密度分布
g_dens_u <-
  ggplot(data = dat_i) +
  aes(x = theta) +
  geom_density(color = "blue", fill = "skyblue") +
  geom_vline(data = dat_tail,
             aes(xintercept = theta),
             color = "blue", linetype = "dotted") +
  geom_point(y = 0, color = "blue", alpha = .alpha, size = 0.5) +
  geom_point(data = dat_tail,
             y = 0, color = "blue", size = 1) +
  scale_x_continuous(limits = c(0, 2 * pi), 
                     breaks = c(0, pi, 2 * pi), 
                     labels = c("0", "pi", "2pi")) +
  scale_y_continuous(limits = c(0, NA), expand = c(0.1, 0)) +
  xlab("theta") +
  ylab("density")

# xの確率密度分布
g_dens <-
  ggplot(data = dat_i) +
  aes(x = val) +
  geom_density(color = "red", fill = "pink") +
  geom_vline(xintercept = 0, color = "darkgrey") +
  geom_point(y = 0, color = "red", alpha = .alpha) +
  geom_vline(data = dat_tail,
             aes(xintercept = val),
             color = "red", linetype = "dotted") +
  geom_vline(data = dat_tail,
             aes(xintercept = mean),
             color = "red", size = 1) +
  scale_x_continuous(limits = c(-.l, .l)) +
  scale_y_continuous(limits = c(0, NA), expand = c(0.1, 0)) +
  theme(axis.title.x = element_blank()) +
  ylab("density")

# xの平均の推移
g_mean <-
  ggplot(data = dat_i) +
  aes(x = mean, 
      y = N) +
  geom_vline(xintercept = 0, color = "darkgrey") +
  geom_path(color = "red", size = 1) +
  scale_x_continuous(limits = c(-.l, .l))

図の合成

g <-
  wrap_plots(g_dens_u, g_plot, g_dens, g_mean,
             heights = c(0.5, 0.7, 1, 1))

保存

ggsave("fig/cauchydist.png", g, width = 5, height = 6)

補足: 確率密度関数

補足: 正規分布の平均はすぐに収束

補足: 動作環境

R version 4.0.3 (2020-10-10)
Platform: x86_64-apple-darwin17.0 (64-bit)
Running under: macOS Big Sur 10.16

 [1] patchwork_1.1.0
 [2] forcats_0.5.0  
 [3] stringr_1.4.0  
 [4] dplyr_1.0.2    
 [5] purrr_0.3.4    
 [6] readr_1.4.0    
 [7] tidyr_1.1.2    
 [8] tibble_3.0.4   
 [9] ggplot2_3.3.5  
[10] tidyverse_1.3.0