********************************************************************
* This do-file produces the results in Andrew Leigh, 'Returns to Education in Australia', Economic Papers.
* Feel free to use or adapt the do-file, but please cite the above paper if you do so.
* This do-file uses data from the HILDA dataset, release 5.1 
* To obtain the data, go to http://melbourneinstitute.com/hilda/
* To contact me, email andrew_leigh@ksg02.harvard.edu.
********************************************************************

version 10.0
clear
set mem 25m
* Change this next line to the directory where you have the HILDA files
cd "C:\Users\Andrew\Datasets\HILDA\"
set more off
for any a \ num 2001: use Xcaept Xcaeft Xehtjb Xwscei Xwsfei xwaveid Xhhwte Xhhrhid Xjbhruc Xmrcurr Xwsce Xedagels Xhgage Xhgsex Xhhstate Xedhists Xedhigh Xtifefp Xtifefn Xtifdip Xtifdin using "Combined_X51c.dta", clear \ renpfix X \ gen year=Y \ save temp_Y, replace
for any b c d e \ num 2002/2005: use Xcaept Xcaeft Xehtjb Xwscei Xwsfei xwaveid Xlnwte Xhhwte Xhhrhid Xjbhruc Xmrcurr Xwsce Xedagels Xhgage Xhgsex Xhhstate Xedhists Xedhigh Xtifefp Xtifefn Xtifdip Xtifdin using "Combined_X51c.dta", clear \ renpfix X \ gen year=Y \ save temp_Y, replace
for num 2004/2001: append using temp_X

* Sample selection - keep only 25-64 year olds, not enrolled in PT or FT education
drop if caept==1 | caeft==1 | hgage<25 | hgage>64

recode lnwte min/0=0
ren hgsex female
recode female 2=1 1=0
gen married=mrcurr
recode married 1/2=1 3/6=0
gen fem_marr=female*married
ren jbhruc hours
recode hours -6/0=.
gen parttime=1 if hours<35 & hours>0
gen fulltime=1 if hours>=35 & hours~=.
for any parttime fulltime: recode X .=0
ren hgage age
gen age2=age^2/100
ren hhwte weight
gen fweight=int(weight)
gen exper=int(ehtjb)
recode exper -10/-1=.
egen fem_age=group(female exper) if female~=. & exper~=.

***** Coding education in years *****
* Coding up high school years of attainment
gen schoolyears=edhists
recode schoolyears 1=12 2=11 3=10 4=9 5/9=8 *=.
* Coding up college education
gen edyears=schoolyears
for num 1/6 \ num 17 16 15 12 12 12: replace edyears=Y if edhigh==X

* Logging income
gen income=tifefp
gen income_weekly=wscei
ren tifdip income_annual_disp
gen income_annual=wsfei
gen income_hourly=income_weekly/hours
for var income*: replace X=ln(X)
gen income_positive=1 if wsfei>0 & wsfei~=.
replace income_positive=0 if wsfei==0

* Generating year dummies
for num 2002/2005: gen yearX=1 if year==X \ recode yearX .=0

**************************************
* Years of high school
**************************************
gen school9=1 if edhists==4 & edhigh==9
gen school10=1 if edhists==3 & edhigh==9
gen school11=1 if edhists==2 & edhigh==9
gen school12=1 if edhists==1 & edhigh==8
for num 9/12: recode schoolX .=0 if school9~=. | school10~=. | school11~=. | school12~=.
* Pathways
tab edhigh [aw=weight] if schoolyears==12 
tab edhigh [aw=weight] if schoolyears<12 
* Summary statistics
log using sumstats, replace
tabstat school9-school12 if income_hourly~=. & fem_age~=. & age>24 & age<65 [aw=weight],col(stat) stat(mean n) format(%9.2f)
tabstat school9-school12 if income_annual~=. & fem_age~=. & age>24 & age<65 [aw=weight],col(stat) stat(mean n) format(%9.2f)
tabstat school9-school12 if fem_age~=. & age>24 & age<65 [aw=weight],col(stat) stat(mean n) format(%9.2f)
log close
global reg1 "school10 school11 school12 year2002-year2005 if age>24 & age<65 [aw=weight], r a(fem_age) cl(xwaveid)"
xi: areg income_hourly $reg1 
outreg using ols_returns1.doc, coefastr nocons bracket 3aster replace bdec(3) se ct("HSchool-hourly")
test school11-school10=0
xi: areg income_annual $reg1 
outreg using ols_returns1.doc, coefastr nocons bracket 3aster append bdec(3) se ct("HSchool-annual")
test school11-school10=0
global reg1 "school10 school11 school12 year2002-year2005 i.fem_age if age>24 & age<65 [fw=fweight], r cl(xwaveid)"
xi: dprobit income_positive $reg1
outreg using ols_returns1.doc, coefastr nocons bracket 3aster append bdec(3) se ct("HSchool-pos_earn") addstat("Pseudo R2",e(r2_p),N,e(N_clust)) adec(3) 
test school11-school10=0
global reg1 "school10 school11 school12 year2002-year2005 i.fem_age if age>24 & age<65, q(.25 .75)"
xi: iqreg income_hourly $reg1
outreg using ols_returns1.doc, coefastr nocons bracket 3aster append bdec(3) se ct("HSchool-IQReg-hourly")
xi: iqreg income_annual $reg1
outreg using ols_returns1.doc, coefastr nocons bracket 3aster append bdec(3) se ct("HSchool-IQReg-annual")

* All respondents, but controlling for post-school quals
gen postschool=edhigh
recode postschool 9=8
drop school9-school12
gen school9=1 if edhists==4 
gen school10=1 if edhists==3 
gen school11=1 if edhists==2 
gen school12=1 if edhists==1 
for num 9/12: recode schoolX .=0 if school9~=. | school10~=. | school11~=. | school12~=.
global reg1 "school10 school11 school12 i.postschool year2002-year2005 if age>24 & age<65 [aw=weight], r a(fem_age) cl(xwaveid)"
xi: areg income_hourly $reg1 
outreg using ols_returns1.doc, coefastr nocons bracket 3aster append bdec(3) se ct("HSchool-hourly-postschool control")
xi: areg income_annual $reg1 
outreg using ols_returns1.doc, coefastr nocons bracket 3aster append bdec(3) se ct("HSchool-annual-postschool control")
global reg1 "school10 school11 school12 i.postschool year2002-year2005 i.fem_age if age>24 & age<65 [fw=fweight], r cl(xwaveid)"
xi: dprobit income_positive $reg1
outreg using ols_returns1.doc, coefastr nocons bracket 3aster append bdec(3) se ct("HSchool-pos_earn-postschool control") addstat("Pseudo R2",e(r2_p),N,e(N_clust)) adec(3) 

**************************************
* Trade and uni versus year 12
**************************************
gen highschool=edhigh
recode highschool 8=1 9=0 *=.
gen trade=edhigh if edhists==1
recode trade 4/7=1 8=0 *=.
gen trade_diploma=edhigh if edhists==1 
recode trade_diploma 4=1 8=0 *=.
gen trade_cert34=edhigh if edhists==1
recode trade_cert34 5=1 8=0 *=.
gen trade_cert12=edhigh if edhists==1
recode trade_cert12 6=1 8=0 *=.
gen trade_cert_undef=edhigh if edhists==1
recode trade_cert_undef 7=1 8=0 *=.
gen bachelor=edhigh if edhists==1
recode bachelor 3=1 8=0 *=. 
gen graddip=edhigh if edhists==1
recode graddip 2=1 8=0 *=. 
gen maphd=edhigh if edhists==1
recode maphd 1=1 8=0 *=. 
global reg1 "ed year2002-year2005 i.fem_age if age>24 & age<65 [aw=weight], r cl(xwaveid)"
for var trade_diploma trade_cert34 trade_cert12 bachelor graddip maphd: recode X .=0 if trade_diploma~=. | trade_cert34~=. | trade_cert12~=. | bachelor~=. | graddip~=. | maphd~=.
* Summary statistics
gen yr12=0 if trade_cert34~=. | trade_diploma~=. | bachelor~=. | graddip~=. | maphd~=.
replace yr12=1 if trade_cert34==0 & trade_diploma==0 & bachelor==0 & graddip==0 & maphd==0 
log using sumstats, append
tabstat yr12 trade_cert34 trade_diploma bachelor graddip maphd if income_hourly~=. & fem_age~=. & age>24 & age<65 [aw=weight],col(stat) stat(mean n) format(%9.2f)
tabstat yr12 trade_cert34 trade_diploma bachelor graddip maphd if income_annual~=. & fem_age~=. & age>24 & age<65 [aw=weight],col(stat) stat(mean n) format(%9.2f)
tabstat yr12 trade_cert34 trade_diploma bachelor graddip maphd if fem_age~=. & age>24 & age<65 [aw=weight],col(stat) stat(mean n) format(%9.2f)
log close 
global reg1 "trade_cert12 trade_cert34 trade_diploma bachelor graddip maphd year2002-year2005 if age>24 & age<65 [aw=weight], r a(fem_age) cl(xwaveid)"
xi: areg income_hourly $reg1
outreg using ols_returns2.doc, coefastr nocons bracket 3aster replace bdec(3) se ct("VsYr12-hourly")
xi: areg income_annual $reg1
outreg using ols_returns2.doc, coefastr nocons bracket 3aster append bdec(3) se ct("VsYr12-annual")
global reg1 "trade_diploma trade_cert34 trade_cert12 bachelor graddip maphd year2002-year2005 i.fem_age if age>24 & age<65 [fw=fweight], r cl(xwaveid)"
xi: dprobit income_positive $reg1
outreg using ols_returns2.doc, coefastr nocons bracket 3aster append bdec(3) se ct("VsYr12-pos_earn") addstat("Pseudo R2",e(r2_p),N,e(N_clust)) adec(3) 
global reg1 "trade_diploma trade_cert34 trade_cert12 bachelor graddip maphd year2002-year2005 i.fem_age if age>24 & age<65, q(.25 .75)"
xi: iqreg income_hourly $reg1
outreg using ols_returns2.doc, coefastr nocons bracket 3aster append bdec(3) se ct("VsYr12-IQReg-hourly")
xi: iqreg income_annual $reg1
outreg using ols_returns2.doc, coefastr nocons bracket 3aster append bdec(3) se ct("VsYr12-IQReg-annual")

**************************************
* Comparing trade qualifications with Year 11 or below
**************************************
drop trade*
gen trade=edhigh if edhists>=2 & edhists<=8
recode trade 4/7=1 9=0 *=.
gen trade_diploma=edhigh if edhists>=2 & edhists<=8
recode trade_diploma 4=1 9=0 *=.
gen trade_cert34=edhigh if edhists>=2 & edhists<=8
recode trade_cert34 5=1 9=0 *=.
gen trade_cert12=edhigh if edhists>=2 & edhists<=8
recode trade_cert12 6=1 9=0 *=.
gen trade_cert_undef=edhigh if edhists>=2 & edhists<=8
recode trade_cert_undef 7=1 9=0 *=.
global reg1 "ed i.edhists year2002-year2005 i.fem_age if age>24 & age<65 [aw=weight], r cl(xwaveid)"
for var trade_diploma trade_cert34 trade_cert12: recode X .=0 if trade_diploma~=. | trade_cert34~=. | trade_cert12~=.
* Summary statistics
gen yr11=0 if trade_diploma~=. | trade_cert34~=. | trade_cert12~=.
replace yr11=1 if trade_cert12==0 & trade_cert34==0 & trade_diploma==0
log using sumstats, append
tabstat yr11 trade_cert12 trade_cert34 trade_diploma if income_hourly~=. & fem_age~=. & age>24 & age<65 [aw=weight],col(stat) stat(mean n) format(%9.2f)
tabstat yr11 trade_cert12 trade_cert34 trade_diploma if income_annual~=. & fem_age~=. & age>24 & age<65 [aw=weight],col(stat) stat(mean n) format(%9.2f)
tabstat yr11 trade_cert12 trade_cert34 trade_diploma if fem_age~=. & age>24 & age<65 [aw=weight],col(stat) stat(mean n) format(%9.2f)
log close
global reg1 "trade_cert12 trade_cert34 trade_diploma i.edhists year2002-year2005 if age>24 & age<65 [aw=weight], r a(fem_age) cl(xwaveid)"
char edhists[omit] 2
xi: areg income_hourly $reg1
outreg using ols_returns3.doc, coefastr nocons bracket 3aster replace bdec(3) se ct("VsYr11-hourly")
xi: areg income_annual $reg1
outreg using ols_returns3.doc, coefastr nocons bracket 3aster append bdec(3) se ct("VsYr11-annual")
global reg1 "trade_diploma trade_cert34 trade_cert12 i.edhists year2002-year2005 i.fem_age if age>24 & age<65 [fw=fweight], r cl(xwaveid)"
xi: dprobit income_positive $reg1
outreg using ols_returns3.doc, coefastr nocons bracket 3aster append bdec(3) se ct("VsYr11-pos_earn") addstat("Pseudo R2",e(r2_p),N,e(N_clust)) adec(3) 
global reg1 "trade_diploma trade_cert34 trade_cert12 i.edhists year2002-year2005 i.fem_age if age>24 & age<65, q(.25 .75)"
xi: iqreg income_hourly $reg1
outreg using ols_returns3.doc, coefastr nocons bracket 3aster append bdec(3) se ct("VsYr11-IQReg-hourly")
xi: iqreg income_annual $reg1
outreg using ols_returns3.doc, coefastr nocons bracket 3aster append bdec(3) se ct("VsYr11-IQReg-annual")

