import numpy as np
from scipy.stats import binom
import pandas as pd
from scipy.stats import norm
from statsmodels.formula.api import ols
import matplotlib.pyplot as plt第5章のPythonコード
第5章 推測統計の基礎
モジュールのインポート
5.1 統計的仮説検定の考え方
5.1.3 コイン投げの例
p40 = binom.cdf(k = 40, n = 100, p = 0.5)
p60 = binom.cdf(k = 60, n = 100, p = 0.5)
print(p60 - p40)0.9539559330706573
5.2 平均値の検定
5.2.5 Rによる例題演習
simdata = pd.read_csv('distributions.csv')
sample_mean = simdata.mean(numeric_only = True)[0]
sample_var = simdata.var(numeric_only = True)[0]
print(
"Mean of A: ", sample_mean, "\n",
"Varicane of A: ", sample_var, "\n",
"t-value: ", (100 ** 0.5) * (sample_var ** -0.5) * (sample_mean)
)Mean of A: 2.0423230835000004
Varicane of A: 0.37278460765476673
t-value: 33.44994900706078
5.2.6 \(p\)値
1 - norm.cdf(x = 1.250113) + norm.cdf(x = -1.250113)0.21125827155567078
5.3 回帰係数の検定
5.3.2 \(\hat{\beta}_1\)の分布のシミュレーション
np.random.seed(2022)
X = np.random.normal(loc = 0, scale = 1, size = 1000)
Y = 1 + 5 * X + np.random.normal(loc = 0, scale = 1, size = 1000)
mydata = pd.DataFrame(np.array([X, Y]).transpose(), columns = ['X', 'Y'])
beta1 = ols('Y ~ X', data = mydata).fit().params[1]S = 10000
beta1 = np.zeros(S) # 結果の保存
for i in range(S): # 繰り返し開始
X = np.random.normal(loc = 0, scale = 1, size = 1000)
Y = 1 + 5 * X + np.random.normal(loc = 0, scale = 1, size = 1000)
mydata = pd.DataFrame(np.array([X, Y]).transpose(), columns = ['X', 'Y'])
beta1[i] = ols('Y ~ X', data = mydata).fit().params[1]
# 繰り返し終了pd.DataFrame(beta1).describe()| 0 | |
|---|---|
| count | 10000.000000 |
| mean | 5.000060 |
| std | 0.031486 |
| min | 4.887715 |
| 25% | 4.978288 |
| 50% | 4.999855 |
| 75% | 5.021525 |
| max | 5.120464 |
plt.hist(beta1, bins = 20)
plt.show()
5.3.6 Rによる分析例
wagedata = pd.read_csv('wage.csv')
wagedata['log_wage'] = np.log(wagedata['wage'])
result = ols('log_wage ~ educ + exper', data = wagedata).fit()
print(result.summary()) OLS Regression Results
==============================================================================
Dep. Variable: log_wage R-squared: 0.181
Model: OLS Adj. R-squared: 0.181
Method: Least Squares F-statistic: 333.0
Date: Wed, 05 Jun 2024 Prob (F-statistic): 2.32e-131
Time: 18:21:20 Log-Likelihood: -1524.1
No. Observations: 3010 AIC: 3054.
Df Residuals: 3007 BIC: 3072.
Df Model: 2
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
Intercept 4.6660 0.064 73.147 0.000 4.541 4.791
educ 0.0932 0.004 25.796 0.000 0.086 0.100
exper 0.0407 0.002 17.417 0.000 0.036 0.045
==============================================================================
Omnibus: 37.191 Durbin-Watson: 1.739
Prob(Omnibus): 0.000 Jarque-Bera (JB): 41.243
Skew: -0.228 Prob(JB): 1.11e-09
Kurtosis: 3.348 Cond. No. 140.
==============================================================================
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
print(0.0932 / 0.004)23.3
5.4 信頼区間
5.4.2 Rによる分析例
result = ols('log_wage ~ educ + exper', data = wagedata).fit()
print(result.summary()) OLS Regression Results
==============================================================================
Dep. Variable: log_wage R-squared: 0.181
Model: OLS Adj. R-squared: 0.181
Method: Least Squares F-statistic: 333.0
Date: Wed, 05 Jun 2024 Prob (F-statistic): 2.32e-131
Time: 18:21:20 Log-Likelihood: -1524.1
No. Observations: 3010 AIC: 3054.
Df Residuals: 3007 BIC: 3072.
Df Model: 2
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
Intercept 4.6660 0.064 73.147 0.000 4.541 4.791
educ 0.0932 0.004 25.796 0.000 0.086 0.100
exper 0.0407 0.002 17.417 0.000 0.036 0.045
==============================================================================
Omnibus: 37.191 Durbin-Watson: 1.739
Prob(Omnibus): 0.000 Jarque-Bera (JB): 41.243
Skew: -0.228 Prob(JB): 1.11e-09
Kurtosis: 3.348 Cond. No. 140.
==============================================================================
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
betahat = result.params
sigma = result.bse
lower = betahat - 1.96 * sigma
upper = betahat + 1.96 * sigma
pd.concat([lower, upper], axis = 1)| 0 | 1 | |
|---|---|---|
| Intercept | 4.541006 | 4.791063 |
| educ | 0.086089 | 0.100247 |
| exper | 0.036082 | 0.045233 |
result.conf_int(alpha = 0.05)| 0 | 1 | |
|---|---|---|
| Intercept | 4.540958 | 4.791111 |
| educ | 0.086086 | 0.100250 |
| exper | 0.036080 | 0.045235 |
result.conf_int(alpha = 0.01)| 0 | 1 | |
|---|---|---|
| Intercept | 4.501618 | 4.830451 |
| educ | 0.083859 | 0.102477 |
| exper | 0.034641 | 0.046674 |