title
output
always_allow_html
Introduction to expandr
html_document
github_document
default
true
suppressWarnings(suppressMessages({
library(knitr )
library(kableExtra )
library(htmltools )
library(tidyverse )
library(scales )
library(ExPanDaR )
}))
knitr :: opts_chunk $ set(fig.align = ' center' )
dat <- read_csv(" https://raw.githubusercontent.com/quarcs-lab/mendez2020-convergence-clubs-code-data/master/assets/dat.csv" )
## Parsed with column specification:
## cols(
## .default = col_double(),
## country = col_character(),
## region = col_character(),
## hi1990 = col_character(),
## isocode = col_character()
## )
## See spec(...) for full column specifications.
## Rows: 2,700
## Columns: 29
## $ id <dbl> 62, 62, 62, 62, 62, 62, 62, 13, 13, 13, 13, 62, 13, 13,…
## $ country <chr> "Mozambique", "Mozambique", "Mozambique", "Mozambique",…
## $ year <dbl> 1990, 1991, 1992, 1993, 1994, 1995, 1996, 2004, 2003, 2…
## $ Y <dbl> 7034.000, 7742.999, 6792.000, 7223.000, 8194.000, 7671.…
## $ K <dbl> 6262, 6462, 6592, 6859, 7246, 7734, 8121, 10047, 9311, …
## $ pop <dbl> 13.371971, 13.719853, 14.203987, 14.775877, 15.363065, …
## $ L <dbl> 5.413710, 5.593190, 5.844729, 6.187860, 6.513672, 6.804…
## $ s <dbl> 0.9964694, 0.9829382, 0.9694070, 0.9558758, 0.9423445, …
## $ alpha_it <dbl> 0.5737705, 0.5737705, 0.5737705, 0.5737705, 0.5737705, …
## $ GDPpc <dbl> 526.0256, 564.3646, 478.1756, 488.8373, 533.3571, 482.0…
## $ lp <dbl> 1299.294, 1384.362, 1162.073, 1167.286, 1257.969, 1127.…
## $ h <dbl> 1.347076, 1.343633, 1.340181, 1.336720, 1.333250, 1.329…
## $ kl <dbl> 1156.693, 1155.333, 1127.854, 1108.461, 1112.429, 1136.…
## $ kp <dbl> 1.1232833, 1.1982358, 1.0303398, 1.0530690, 1.1308308, …
## $ ky <dbl> 0.8902473, 0.8345603, 0.9705536, 0.9496054, 0.8843056, …
## $ TFP <dbl> 203.9550, 220.0026, 189.1149, 194.9619, 213.7372, 193.1…
## $ log_GDPpc_raw <dbl> 6.265350, 6.335700, 6.169978, 6.192030, 6.279191, 6.178…
## $ log_lp_raw <dbl> 7.169576, 7.232995, 7.057961, 7.062437, 7.137254, 7.027…
## $ log_ky_raw <dbl> -0.116255940, -0.180850310, -0.029888673, -0.051708743,…
## $ log_h_raw <dbl> 0.2979364, 0.2953772, 0.2928047, 0.2902188, 0.2876196, …
## $ log_tfp_raw <dbl> 5.317899, 5.393640, 5.242355, 5.272804, 5.364747, 5.263…
## $ log_GDPpc <dbl> 6.163751, 6.195724, 6.227951, 6.261036, 6.295438, 6.331…
## $ log_lp <dbl> 7.050233, 7.075745, 7.101554, 7.128354, 7.156726, 7.187…
## $ log_ky <dbl> -0.1290631, -0.1301618, -0.1312285, -0.1323578, -0.1333…
## $ log_h <dbl> 0.2770405, 0.2796887, 0.2823892, 0.2852334, 0.2883388, …
## $ log_tfp <dbl> 5.257494, 5.286922, 5.316501, 5.346648, 5.377597, 5.409…
## $ region <chr> "Africa", "Africa", "Africa", "Africa", "Africa", "Afri…
## $ hi1990 <chr> "no", "no", "no", "no", "no", "no", "no", "no", "no", "…
## $ isocode <chr> "MOZ", "MOZ", "MOZ", "MOZ", "MOZ", "MOZ", "MOZ", "BDI",…
# Import data definitions
dat_def <- read_csv(" https://raw.githubusercontent.com/quarcs-lab/mendez2020-convergence-clubs-code-data/master/assets/dat-definitions.csv" )
## Parsed with column specification:
## cols(
## var_name = col_character(),
## var_def = col_character(),
## type = col_character()
## )
dat_def %> %
print(n = Inf )
## # A tibble: 28 x 3
## var_name var_def type
## <chr> <chr> <chr>
## 1 country Standardized country name (from PWT) cs_id
## 2 year Year ts_id
## 3 Y GDP numer…
## 4 K Physical Capital numer…
## 5 pop Population numer…
## 6 L Labor Force numer…
## 7 s Years of Schooling numer…
## 8 alpha_it Variable Capital Share numer…
## 9 GDPpc GDP per capita numer…
## 10 lp Labor Productivity numer…
## 11 h Human Capital Index numer…
## 12 kl Capital per Worker numer…
## 13 kp Capital Productivity numer…
## 14 ky Capital-Output Ratio numer…
## 15 TFP Aggregate Efficiency numer…
## 16 log_GDPpc_raw Log of GDP per capita numer…
## 17 log_lp_raw Log of Labor Productivity numer…
## 18 log_ky_raw Log of Capital-Output Ratio numer…
## 19 log_h_raw Log of Human Capital numer…
## 20 log_tfp_raw Log of Total Factor Productivity numer…
## 21 log_GDPpc Trend (HP400) of log of Labor Productivity numer…
## 22 log_lp Trend (HP400) of log of GDP per capita numer…
## 23 log_ky Trend (HP400) of log of Capital-Output Ratio numer…
## 24 log_h Trend (HP400) of log of Human Capital numer…
## 25 log_tfp Trend (HP400) of log of Aggregate Efficiency numer…
## 26 region Regional group (Classification of the UN) factor
## 27 hi1990 High income country (as of 1990, World Bank classificat… factor
## 28 isocode ISO code from the PWT9.0 factor
df <- dat
df $ year <- as.factor(df $ year )
df $ hi1990 <- as.factor(df $ hi1990 )
p <- ggplot(df , aes(x = year )) +
geom_bar(aes(fill = hi1990 ), position = " fill" ) +
labs(x = " year" , fill = " hi1990" , y = " Percent" ) +
scale_y_continuous(labels = percent_format())
p <- p + scale_x_discrete(breaks = pretty(as.numeric(as.character(df $ year )), n = 10 ))
p
df <- dat
prepare_missing_values_graph(df , " year" )
df <- dat [df $ year == " 1990" , ]
t <- prepare_descriptive_table(df )
t $ kable_ret %> %
kable_styling(" condensed" , full_width = F , position = " center" )
Descriptive Statistics
N
Mean
Std. dev.
Min.
25 %
Median
75 %
Max.
id
108
54.500
31.321
1.000
27.750
54.500
81.250
108.000
year
108
1,990.000
0.000
1,990.000
1,990.000
1,990.000
1,990.000
1,990.000
Y
108
364,598.139
1,030,271.047
3,067.000
19,377.750
76,730.500
234,608.000
9,259,567.000
K
108
962,050.796
2,873,937.935
2,004.000
31,131.000
162,679.000
713,896.250
26,453,210.000
pop
108
45.410
140.672
1.565
5.106
10.354
34.444
1,154.606
L
108
19.507
69.002
0.703
2.056
4.352
12.218
637.075
s
108
6.499
2.905
0.893
4.164
6.982
8.792
12.199
alpha_it
90
0.433
0.113
0.148
0.355
0.432
0.493
0.768
GDPpc
108
9,784.000
9,475.931
526.026
2,103.651
6,126.897
16,453.113
37,503.441
lp
108
23,223.794
20,088.835
1,299.294
6,294.126
16,984.627
38,834.154
75,036.344
h
108
2.682
0.734
1.320
2.085
2.767
3.246
4.252
kl
108
63,403.227
68,027.748
725.042
9,651.041
35,519.549
102,058.916
255,639.410
kp
108
0.614
0.610
0.209
0.329
0.458
0.625
5.070
ky
108
2.279
0.987
0.197
1.599
2.184
3.039
4.775
TFP
108
824.208
652.602
139.966
360.481
702.477
1,053.205
4,164.725
log_GDPpc_raw
108
8.606
1.182
6.265
7.651
8.720
9.708
10.532
log_lp_raw
108
9.564
1.101
7.170
8.747
9.740
10.567
11.226
log_ky_raw
108
0.699
0.563
-1.623
0.470
0.781
1.112
1.563
log_h_raw
108
0.946
0.291
0.278
0.735
1.018
1.177
1.447
log_tfp_raw
108
6.441
0.756
4.941
5.887
6.555
6.960
8.334
log_GDPpc
108
8.542
1.189
6.164
7.568
8.676
9.604
10.492
log_lp
108
9.519
1.123
7.050
8.633
9.698
10.386
11.225
log_ky
108
0.740
0.585
-1.807
0.478
0.846
1.130
1.574
log_h
108
0.948
0.293
0.266
0.728
1.031
1.176
1.453
log_tfp
108
6.391
0.787
4.913
5.713
6.543
6.948
8.336
t <- prepare_descriptive_table(df )
# Create a function to round the decimals of a df
round_df <- function (x , digits ) {
# round all numeric variables
# x: data frame
# digits: number of digits to round
numeric_columns <- sapply(x , mode ) == ' numeric'
x [numeric_columns ] <- round(x [numeric_columns ], digits )
x
}
round_df(t $ df , 2 )
## N Mean Std. dev. Min. 25 % Median 75 %
## id 108 54.50 31.32 1.00 27.75 54.50 81.25
## year 108 1990.00 0.00 1990.00 1990.00 1990.00 1990.00
## Y 108 364598.14 1030271.05 3067.00 19377.75 76730.50 234608.00
## K 108 962050.80 2873937.94 2004.00 31131.00 162679.00 713896.25
## pop 108 45.41 140.67 1.57 5.11 10.35 34.44
## L 108 19.51 69.00 0.70 2.06 4.35 12.22
## s 108 6.50 2.90 0.89 4.16 6.98 8.79
## alpha_it 90 0.43 0.11 0.15 0.35 0.43 0.49
## GDPpc 108 9784.00 9475.93 526.03 2103.65 6126.90 16453.11
## lp 108 23223.79 20088.83 1299.29 6294.13 16984.63 38834.15
## h 108 2.68 0.73 1.32 2.08 2.77 3.25
## kl 108 63403.23 68027.75 725.04 9651.04 35519.55 102058.92
## kp 108 0.61 0.61 0.21 0.33 0.46 0.63
## ky 108 2.28 0.99 0.20 1.60 2.18 3.04
## TFP 108 824.21 652.60 139.97 360.48 702.48 1053.21
## log_GDPpc_raw 108 8.61 1.18 6.27 7.65 8.72 9.71
## log_lp_raw 108 9.56 1.10 7.17 8.75 9.74 10.57
## log_ky_raw 108 0.70 0.56 -1.62 0.47 0.78 1.11
## log_h_raw 108 0.95 0.29 0.28 0.73 1.02 1.18
## log_tfp_raw 108 6.44 0.76 4.94 5.89 6.55 6.96
## log_GDPpc 108 8.54 1.19 6.16 7.57 8.68 9.60
## log_lp 108 9.52 1.12 7.05 8.63 9.70 10.39
## log_ky 108 0.74 0.59 -1.81 0.48 0.85 1.13
## log_h 108 0.95 0.29 0.27 0.73 1.03 1.18
## log_tfp 108 6.39 0.79 4.91 5.71 6.54 6.95
## Max.
## id 108.00
## year 1990.00
## Y 9259567.00
## K 26453210.00
## pop 1154.61
## L 637.07
## s 12.20
## alpha_it 0.77
## GDPpc 37503.44
## lp 75036.34
## h 4.25
## kl 255639.41
## kp 5.07
## ky 4.78
## TFP 4164.72
## log_GDPpc_raw 10.53
## log_lp_raw 11.23
## log_ky_raw 1.56
## log_h_raw 1.45
## log_tfp_raw 8.33
## log_GDPpc 10.49
## log_lp 11.23
## log_ky 1.57
## log_h 1.45
## log_tfp 8.34
var <- as.numeric(dat $ log_lp [dat $ year == " 1990" ])
hist(var , main = " " , xlab = " log_lp" , col = " red" , right = FALSE , breaks = 10 )
t <- prepare_ext_obs_table(dat , n = 10 ,
cs_id = " country" ,
ts_id = " year" ,
var = " log_lp" )
t $ df
## country year log_lp
## 2700 Norway 2014 11.984427
## 2699 Norway 2013 11.958503
## 2698 Norway 2012 11.932280
## 2697 Norway 2011 11.905351
## 2696 Norway 2010 11.877324
## 2695 Saudi Arabia 2014 11.871549
## 2694 Norway 2009 11.847876
## 2693 Saudi Arabia 2013 11.820003
## 2692 Norway 2008 11.816551
## 2691 Ireland 2014 11.797161
## 10 Burundi 2005 7.249489
## 9 Burundi 2003 7.248836
## 8 Burundi 2004 7.247464
## 7 Mozambique 1996 7.219810
## 6 Mozambique 1995 7.187088
## 5 Mozambique 1994 7.156726
## 4 Mozambique 1993 7.128354
## 3 Mozambique 1992 7.101554
## 2 Mozambique 1991 7.075745
## 1 Mozambique 1990 7.050233
df <- dat
df <- df [df $ year == " 1990" , ]
prepare_by_group_bar_graph(df , " hi1990" , " lp" , mean , TRUE )$ plot +
ylab(" mean lp" )
df <- dat
prepare_by_group_violin_graph(df , " region" , " log_lp" , TRUE )
df <- dat
prepare_trend_graph(df , " year" , c(" lp" ))$ plot
df <- dat
prepare_quantile_trend_graph(df , " year" , c(0.05 , 0.25 , 0.5 , 0.75 , 0.95 ), " lp" , points = FALSE )$ plot
Custimized quantile trend graph
log_lp_raw <- prepare_quantile_trend_graph(dat , " year" , c(0.05 , 0.25 , 0.5 , 0.75 , 0.95 ), " log_lp_raw" , points = FALSE )$ plot
log_lp_raw <- log_lp_raw +
theme_minimal() +
guides(color = guide_legend(reverse = TRUE )) +
scale_color_discrete(name = " Quantile" ) +
labs(x = " " ,
y = " Log of Labor Productivity" )
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
# ggsave("figs/quintiles_all_log_lp_raw.pdf", width = 6, height = 4)
log_lp_raw
df <- dat
ret <- prepare_correlation_graph(df )
ret2 <- prepare_correlation_graph(df [, c(10 , 11 , 12 , 13 , 14 , 15 , 16 )])
df <- dat
df <- df [, c(" country" , " year" , " log_lp" , " log_GDPpc" , " region" , " pop" )]
df <- df [complete.cases(df ), ]
df $ region <- as.factor(df $ region )
set.seed(42 )
df <- sample_n(df , 1000 )
prepare_scatter_plot(df , " log_lp" , " log_GDPpc" , color = " region" , size = " pop" , loess = 1 )
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
df <- dat
df <- df [, c(" log_lp" , " log_ky" , " log_h" , " log_tfp" , " country" , " year" , " hi1990" )]
df <- df [complete.cases(df ), ]
df $ hi1990 <- as.factor(df $ hi1990 )
df <- droplevels(df )
t <- prepare_regression_table(df , dvs = " log_lp" , idvs = c(" log_ky" , " log_h" , " log_tfp" ), feffects = c(" country" , " year" ), clusters = c(" country" , " year" ), byvar = " hi1990" , models = " ols" )
HTML(t $ table )
Dependent variable: log_lp Full Sample no yes (1) (2) (3) log_ky 0.472*** 0.483*** 0.469*** (0.031) (0.032) (0.060) log_h 0.226* 0.183 0.435*** (0.132) (0.168) (0.135) log_tfp 1.447*** 1.503*** 1.180*** (0.042) (0.048) (0.064) Estimator ols ols ols Fixed effects country, year country, year country, year Std. errors clustered country, year country, year country, year Observations 2,700 2,050 650 R2 0.888 0.893 0.895 Adjusted R2 0.883 0.887 0.886 Note: * p<0.1; ** p<0.05; *** p<0.01
df <- dat
df <- df [, c(" log_lp" , " log_ky" , " log_h" , " log_tfp" , " country" , " year" , " hi1990" )]
df <- df [complete.cases(df ), ]
df $ hi1990 <- as.factor(df $ hi1990 )
df <- droplevels(df )
t <- prepare_regression_table(df , dvs = " log_lp" , idvs = c(" log_ky" , " log_h" , " log_tfp" ), feffects = c(" country" , " year" ), clusters = c(" country" , " year" ), byvar = " hi1990" , models = " ols" , format = " text" )
t
## $models
## $models[[1]]
## $models[[1]]$model
##
## Model Formula: log_lp ~ log_ky + log_h + log_tfp
## <environment: 0x7f7ed4df3178>
##
## Coefficients:
## log_ky log_h log_tfp
## 0.47182 0.22569 1.44690
##
##
## $models[[1]]$type_str
## [1] "ols"
##
## $models[[1]]$fe_str
## [1] "country, year"
##
## $models[[1]]$cl_str
## [1] "country, year"
##
## $models[[1]]$p
## log_ky log_h log_tfp
## 2.524332e-51 8.698143e-02 6.546745e-212
##
## $models[[1]]$se
## log_ky log_h log_tfp
## 0.03061389 0.13181026 0.04226128
##
## $models[[1]]$omit_vars
## NULL
##
## $models[[1]]$byvalue
## [1] "Full Sample"
##
##
## $models[[2]]
## $models[[2]]$model
##
## Model Formula: log_lp ~ log_ky + log_h + log_tfp
## <environment: 0x7f7edad8c7b8>
##
## Coefficients:
## log_ky log_h log_tfp
## 0.48272 0.18284 1.50339
##
##
## $models[[2]]$type_str
## [1] "ols"
##
## $models[[2]]$fe_str
## [1] "country, year"
##
## $models[[2]]$cl_str
## [1] "country, year"
##
## $models[[2]]$p
## log_ky log_h log_tfp
## 7.936025e-48 2.756112e-01 3.362078e-177
##
## $models[[2]]$se
## log_ky log_h log_tfp
## 0.03232132 0.16765703 0.04757119
##
## $models[[2]]$omit_vars
## NULL
##
## $models[[2]]$byvalue
## [1] "no"
##
##
## $models[[3]]
## $models[[3]]$model
##
## Model Formula: log_lp ~ log_ky + log_h + log_tfp
## <environment: 0x7f7ed3207ce8>
##
## Coefficients:
## log_ky log_h log_tfp
## 0.46857 0.43509 1.18037
##
##
## $models[[3]]$type_str
## [1] "ols"
##
## $models[[3]]$fe_str
## [1] "country, year"
##
## $models[[3]]$cl_str
## [1] "country, year"
##
## $models[[3]]$p
## log_ky log_h log_tfp
## 2.890378e-14 1.350437e-03 6.453452e-60
##
## $models[[3]]$se
## log_ky log_h log_tfp
## 0.06011667 0.13511032 0.06436493
##
## $models[[3]]$omit_vars
## NULL
##
## $models[[3]]$byvalue
## [1] "yes"
##
##
##
## $table
## [1] ""
## [2] "==============================================================="
## [3] " Dependent variable: "
## [4] " -----------------------------------------"
## [5] " log_lp "
## [6] " Full Sample no yes "
## [7] " (1) (2) (3) "
## [8] "---------------------------------------------------------------"
## [9] "log_ky 0.472*** 0.483*** 0.469*** "
## [10] " (0.031) (0.032) (0.060) "
## [11] " "
## [12] "log_h 0.226* 0.183 0.435*** "
## [13] " (0.132) (0.168) (0.135) "
## [14] " "
## [15] "log_tfp 1.447*** 1.503*** 1.180*** "
## [16] " (0.042) (0.048) (0.064) "
## [17] " "
## [18] "---------------------------------------------------------------"
## [19] "Estimator ols ols ols "
## [20] "Fixed effects country, year country, year country, year"
## [21] "Std. errors clustered country, year country, year country, year"
## [22] "Observations 2,700 2,050 650 "
## [23] "R2 0.888 0.893 0.895 "
## [24] "Adjusted R2 0.883 0.887 0.886 "
## [25] "==============================================================="
## [26] "Note: *p<0.1; **p<0.05; ***p<0.01"