import numpy as np
import pandas as pd
from statsmodels.formula.api import ols
from linearmodels.panel import PanelOLS, compare
第12章のPythonコード
第12章 操作変数法
モジュールのインポート
12.2 パネルデータ回帰分析
12.2.3 Rによる固定効果モデルの推定
= pd.read_csv('prefecture.csv')
prefdata
prefdata.head()
pref | year | suicide | unemp | |
---|---|---|---|---|
0 | 北海道 | 1997 | 19.6 | 3.7 |
1 | 青森 | 1997 | 26.5 | 3.9 |
2 | 岩手 | 1997 | 25.8 | 2.4 |
3 | 宮城 | 1997 | 18.6 | 3.1 |
4 | 秋田 | 1997 | 30.7 | 3.2 |
= ols('suicide ~ -1 + unemp + C(pref)', data = prefdata).fit()
result
str.contains('unemp')] result.params[result.params.index.
unemp 3.066965
dtype: float64
= prefdata['suicide'].mean()
suicidebar = prefdata['suicide'].mean() unempbar
= prefdata.groupby('pref').mean()[['suicide', 'unemp']].rename(columns = {'suicide' : 'suicidebar', 'unemp' : 'unempbar'})
prefmean = prefdata.set_index('pref').join(prefmean, on = 'pref')
prefdata2 = prefdata2.assign(suicide2 = prefdata2['suicide'] - prefdata2['suicidebar'])
prefdata2 = prefdata2.assign(unemp2 = prefdata2['unemp'] - prefdata2['unempbar']) prefdata2
= ols('suicide2 ~ unemp2', data = prefdata2).fit()
result print(result.summary().tables[1])
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
Intercept 1.717e-16 0.078 2.2e-15 1.000 -0.153 0.153
unemp2 3.0670 0.084 36.370 0.000 2.901 3.232
==============================================================================
= prefdata.set_index(['pref', 'year'], drop=False) prefdata
= PanelOLS.from_formula('suicide ~ unemp + EntityEffects + TimeEffects', data = prefdata).fit()
result print(result)
PanelOLS Estimation Summary
================================================================================
Dep. Variable: suicide R-squared: 0.0228
Estimator: PanelOLS R-squared (Between): 0.2383
No. Observations: 1081 R-squared (Within): 0.2421
Date: Wed, Jun 05 2024 R-squared (Overall): 0.2385
Time: 18:19:53 Log-likelihood -2145.6
Cov. Estimator: Unadjusted
F-statistic: 23.537
Entities: 47 P-value 0.0000
Avg Obs: 23.000 Distribution: F(1,1011)
Min Obs: 23.000
Max Obs: 23.000 F-statistic (robust): 23.537
P-value 0.0000
Time periods: 23 Distribution: F(1,1011)
Avg Obs: 47.000
Min Obs: 47.000
Max Obs: 47.000
Parameter Estimates
==============================================================================
Parameter Std. Err. T-stat P-value Lower CI Upper CI
------------------------------------------------------------------------------
unemp 0.7709 0.1589 4.8515 0.0000 0.4591 1.0827
==============================================================================
F-test for Poolability: 72.439
P-value: 0.0000
Distribution: F(68,1011)
Included effects: Entity, Time
= ols('suicide ~ -1 + unemp + C(pref) + C(year)', data = prefdata).fit()
result print(result.summary())
OLS Regression Results
==============================================================================
Dep. Variable: suicide R-squared: 0.871
Model: OLS Adj. R-squared: 0.862
Method: Least Squares F-statistic: 98.93
Date: Wed, 05 Jun 2024 Prob (F-statistic): 0.00
Time: 18:19:53 Log-Likelihood: -2145.6
No. Observations: 1081 AIC: 4431.
Df Residuals: 1011 BIC: 4780.
Df Model: 69
Covariance Type: nonrobust
===================================================================================
coef std err t P>|t| [0.025 0.975]
-----------------------------------------------------------------------------------
C(pref)[三重] 15.4540 0.576 26.811 0.000 14.323 16.585
C(pref)[京都] 14.1302 0.739 19.127 0.000 12.681 15.580
C(pref)[佐賀] 17.1120 0.627 27.271 0.000 15.881 18.343
C(pref)[兵庫] 15.4590 0.762 20.296 0.000 13.964 16.954
C(pref)[北海道] 17.4921 0.785 22.284 0.000 15.952 19.033
C(pref)[千葉] 14.9005 0.660 22.582 0.000 13.606 16.195
C(pref)[和歌山] 19.0912 0.612 31.208 0.000 17.891 20.292
C(pref)[埼玉] 15.0413 0.712 21.124 0.000 13.644 16.438
C(pref)[大分] 17.0649 0.647 26.392 0.000 15.796 18.334
C(pref)[大阪] 15.8475 0.875 18.120 0.000 14.131 17.564
C(pref)[奈良] 13.1192 0.682 19.223 0.000 11.780 14.458
C(pref)[宮城] 16.3442 0.766 21.350 0.000 14.842 17.846
C(pref)[宮崎] 22.1573 0.664 33.354 0.000 20.854 23.461
C(pref)[富山] 20.0469 0.579 34.629 0.000 18.911 21.183
C(pref)[山口] 18.3655 0.610 30.111 0.000 17.169 19.562
C(pref)[山形] 20.7947 0.600 34.680 0.000 19.618 21.971
C(pref)[山梨] 18.3306 0.594 30.842 0.000 17.164 19.497
C(pref)[岐阜] 17.0414 0.569 29.928 0.000 15.924 18.159
C(pref)[岡山] 14.0439 0.643 21.851 0.000 12.783 15.305
C(pref)[岩手] 24.7182 0.664 37.209 0.000 23.415 26.022
C(pref)[島根] 22.2763 0.546 40.826 0.000 21.206 23.347
C(pref)[広島] 15.5406 0.637 24.398 0.000 14.291 16.791
C(pref)[徳島] 13.8827 0.668 20.773 0.000 12.571 15.194
C(pref)[愛媛] 17.4488 0.651 26.784 0.000 16.170 18.727
C(pref)[愛知] 14.0708 0.610 23.052 0.000 12.873 15.269
C(pref)[新潟] 23.0738 0.634 36.388 0.000 21.829 24.318
C(pref)[東京] 14.7798 0.728 20.302 0.000 13.351 16.208
C(pref)[栃木] 18.1480 0.638 28.428 0.000 16.895 19.401
C(pref)[沖縄] 15.4815 1.032 14.997 0.000 13.456 17.507
C(pref)[滋賀] 14.9267 0.608 24.546 0.000 13.733 16.120
C(pref)[熊本] 16.9430 0.708 23.919 0.000 15.553 18.333
C(pref)[石川] 15.6403 0.595 26.296 0.000 14.473 16.807
C(pref)[神奈川] 13.6703 0.689 19.852 0.000 12.319 15.022
C(pref)[福井] 16.3820 0.545 30.085 0.000 15.313 17.451
C(pref)[福岡] 16.2986 0.833 19.560 0.000 14.663 17.934
C(pref)[福島] 18.8978 0.687 27.505 0.000 17.550 20.246
C(pref)[秋田] 27.8666 0.716 38.905 0.000 26.461 29.272
C(pref)[群馬] 18.7780 0.624 30.106 0.000 17.554 20.002
C(pref)[茨城] 16.9063 0.652 25.932 0.000 15.627 18.186
C(pref)[長崎] 16.8341 0.697 24.157 0.000 15.467 18.202
C(pref)[長野] 17.4964 0.581 30.092 0.000 16.355 18.637
C(pref)[青森] 22.4524 0.829 27.076 0.000 20.825 24.080
C(pref)[静岡] 15.3146 0.593 25.824 0.000 14.151 16.478
C(pref)[香川] 14.5512 0.621 23.417 0.000 13.332 15.771
C(pref)[高知] 19.2860 0.717 26.886 0.000 17.878 20.694
C(pref)[鳥取] 17.4246 0.613 28.420 0.000 16.221 18.628
C(pref)[鹿児島] 18.5798 0.685 27.143 0.000 17.237 19.923
C(year)[T.1998] 5.5834 0.389 14.344 0.000 4.820 6.347
C(year)[T.1999] 4.5654 0.416 10.982 0.000 3.750 5.381
C(year)[T.2000] 3.7649 0.425 8.862 0.000 2.931 4.599
C(year)[T.2001] 2.9585 0.459 6.449 0.000 2.058 3.859
C(year)[T.2002] 3.5622 0.491 7.256 0.000 2.599 4.526
C(year)[T.2003] 5.5267 0.480 11.514 0.000 4.585 6.469
C(year)[T.2004] 4.3137 0.439 9.819 0.000 3.452 5.176
C(year)[T.2005] 4.7456 0.416 11.403 0.000 3.929 5.562
C(year)[T.2006] 4.7956 0.400 11.978 0.000 4.010 5.581
C(year)[T.2007] 5.2002 0.390 13.347 0.000 4.436 5.965
C(year)[T.2008] 4.6308 0.400 11.573 0.000 3.846 5.416
C(year)[T.2009] 4.2218 0.471 8.969 0.000 3.298 5.145
C(year)[T.2010] 2.8489 0.466 6.113 0.000 1.934 3.763
C(year)[T.2011] 2.4818 0.429 5.785 0.000 1.640 3.324
C(year)[T.2012] 0.8309 0.407 2.041 0.042 0.032 1.630
C(year)[T.2013] 0.8858 0.390 2.273 0.023 0.121 1.651
C(year)[T.2014] -0.2013 0.378 -0.532 0.595 -0.943 0.540
C(year)[T.2015] -0.9518 0.376 -2.534 0.011 -1.689 -0.215
C(year)[T.2016] -2.1755 0.378 -5.751 0.000 -2.918 -1.433
C(year)[T.2017] -2.3758 0.388 -6.130 0.000 -3.136 -1.615
C(year)[T.2018] -2.6838 0.402 -6.673 0.000 -3.473 -1.895
C(year)[T.2019] -2.8346 0.404 -7.012 0.000 -3.628 -2.041
unemp 0.7709 0.159 4.851 0.000 0.459 1.083
==============================================================================
Omnibus: 34.857 Durbin-Watson: 1.656
Prob(Omnibus): 0.000 Jarque-Bera (JB): 63.171
Skew: 0.230 Prob(JB): 1.92e-14
Kurtosis: 4.091 Cond. No. 279.
==============================================================================
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
= PanelOLS.from_formula('suicide ~ unemp + TimeEffects', data = prefdata).fit()
POLS = PanelOLS.from_formula('suicide ~ unemp + EntityEffects + TimeEffects', data = prefdata).fit()
TWFE
print(compare({'OLS' : POLS, '2SLS' : TWFE}, precision = 'std-errors', stars = True))
Model Comparison
=====================================================
OLS 2SLS
-----------------------------------------------------
Dep. Variable suicide suicide
Estimator PanelOLS PanelOLS
No. Observations 1081 1081
Cov. Est. Unadjusted Unadjusted
R-squared 0.0146 0.0228
R-Squared (Within) 0.1689 0.2421
R-Squared (Between) 0.1625 0.2383
R-Squared (Overall) 0.1627 0.2385
F-statistic 15.676 23.537
P-value (F-stat) 0.0001 0.0000
===================== ============ ============
unemp 0.5133*** 0.7709***
(0.1296) (0.1589)
======================= ============== ==============
Effects Time Entity
Time
-----------------------------------------------------
Std. Errors reported in parentheses
Rによるデータ演習
= pd.read_csv('minimum_wage.csv')
mwdata
mwdata.head()
store_id | post | emp | price | nj | |
---|---|---|---|---|---|
0 | 6 | 0 | 5.0 | 4.72 | 1 |
1 | 14 | 0 | 16.0 | 4.40 | 1 |
2 | 26 | 0 | 41.5 | 2.95 | 1 |
3 | 27 | 0 | 13.0 | 4.25 | 1 |
4 | 30 | 0 | 10.0 | 4.65 | 1 |
= mwdata.set_index(['store_id', 'post'], drop = False)
mwdata
= PanelOLS.from_formula('emp ~ post:nj + EntityEffects + TimeEffects', data = mwdata).fit()
result print(result.summary)
PanelOLS Estimation Summary
================================================================================
Dep. Variable: emp R-squared: 0.0106
Estimator: PanelOLS R-squared (Between): 0.0741
No. Observations: 714 R-squared (Within): -0.0469
Date: Wed, Jun 05 2024 R-squared (Overall): 0.0696
Time: 18:19:53 Log-likelihood -2068.2
Cov. Estimator: Unadjusted
F-statistic: 3.8098
Entities: 357 P-value 0.0517
Avg Obs: 2.0000 Distribution: F(1,355)
Min Obs: 2.0000
Max Obs: 2.0000 F-statistic (robust): 3.8098
P-value 0.0517
Time periods: 2 Distribution: F(1,355)
Avg Obs: 357.00
Min Obs: 357.00
Max Obs: 357.00
Parameter Estimates
==============================================================================
Parameter Std. Err. T-stat P-value Lower CI Upper CI
------------------------------------------------------------------------------
nj:post 2.3258 1.1916 1.9519 0.0517 -0.0176 4.6693
==============================================================================
F-test for Poolability: 3.6760
P-value: 0.0000
Distribution: F(357,355)
Included effects: Entity, Time
= mwdata.query('post == 0 & nj == 0')['emp'].mean()
mean_pre_pa = mwdata.query('post == 1 & nj == 0')['emp'].mean()
mean_post_pa = mwdata.query('post == 0 & nj == 1')['emp'].mean()
mean_pre_nj = mwdata.query('post == 1 & nj == 1')['emp'].mean()
mean_post_nj
= (mean_post_pa - mean_pre_pa)
diff_pa = (mean_post_nj - mean_pre_nj)
diff_nj
= diff_nj - diff_pa
did print(did)
2.3258311888831678
'lprice'] = np.log(mwdata['price'])
mwdata[
= PanelOLS.from_formula('lprice ~ post:nj + EntityEffects + TimeEffects', data = mwdata).fit()
result print(result.summary)
PanelOLS Estimation Summary
================================================================================
Dep. Variable: lprice R-squared: 0.0159
Estimator: PanelOLS R-squared (Between): 0.0219
No. Observations: 672 R-squared (Within): 0.0355
Date: Wed, Jun 05 2024 R-squared (Overall): 0.0219
Time: 18:19:53 Log-likelihood 1073.7
Cov. Estimator: Unadjusted
F-statistic: 5.0947
Entities: 355 P-value 0.0247
Avg Obs: 1.8930 Distribution: F(1,315)
Min Obs: 1.0000
Max Obs: 2.0000 F-statistic (robust): 5.0947
P-value 0.0247
Time periods: 2 Distribution: F(1,315)
Avg Obs: 336.00
Min Obs: 335.00
Max Obs: 337.00
Parameter Estimates
==============================================================================
Parameter Std. Err. T-stat P-value Lower CI Upper CI
------------------------------------------------------------------------------
nj:post 0.0325 0.0144 2.2571 0.0247 0.0042 0.0609
==============================================================================
F-test for Poolability: 11.365
P-value: 0.0000
Distribution: F(355,315)
Included effects: Entity, Time
/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/linearmodels/panel/model.py:1183: MissingValueWarning:
Inputs contain missing values. Dropping rows with missing observations.