**************************************************************************************
* This do-file was designed to be used in analysing the 
* unconfidentialised version of the HILDA survey.
* The unconfidentialised dataset is only available under strict conditions, 
* and must be used in a secure data environment.
* For more details and to apply for the dataset, see http://melbourneinstitute.com/hilda/
* Note that we use version 3.0-U of HILDA.
* Other researchers should feel free to use or adapt this do-file, but should cite
* Andrew Leigh and Chris Ryan, 2007, "Estimating Returns to Education Using
* Different Natural Experiment Techniques", Economics of Education Review.
* Questions should be directed to andrew_leigh@ksg02.harvard.edu.
**************************************************************************************

clear
set mem 50m
set matsize 800
set more off
cd "<<Insert directory name here>>"

use xwaveid clnwte chhwte chhid chhpid cmrcurr cwsce cjbhruc cedagels cedageln chgdob chgsex chhstate cedhists cedcly cedhigh ctifefp ctifefn ctifdip ctifdin using combined_c30u.dta, clear
for any a b: sort xwaveid \ merge xwaveid using combined_X30u.dta, nokeep keep (Xhhwte Xhhid Xhhpid Xmrcurr Xwsce Xedagels Xhgdob Xhgsex Xhhstate Xedhists Xedhigh Xtifefp Xtifefn Xtifdip Xtifdin) \ drop _merge
ren chhstate state
recode clnwte min/0=0
* Coding birthdate
gen bdate = daily(chgdob, "dmy")
format bdate %d
for any month year: gen bX=X(bdate)
ren chgsex female
recode female 2=1 1=0
gen married=cmrcurr
recode married 1/2=1 3/6=0
gen fem_marr=female*married
ren cjbhruc hours
recode hours -6/0=.
gen parttime=1 if hours<35 & hours>0
gen fulltime=1 if hours>=35 & hours~=.
for any parttime fulltime: recode X .=0
gen age=2003-byear
ren chhwte weight

* Dropping those whose last year of school was done overseas
drop if cedcly>1101 & cedcly~=.

* Coding up high school years of attainment
gen schoolyears=cedhists
recode schoolyears 1=12 2=11 3=10 4=9 5/9=8 *=.

* Coding up college education
gen edyears=schoolyears
for num 1/6 \ num 17 16 15 12 12 12: replace edyears=Y if cedhigh==X

* Logging income
gen income=ctifefp
gen income_weekly=cwsce
gen income_3yr=atifefp+btifefp+ctifefp
gen income_3yr_disp=atifdip+btifdip+ctifdip
ren ctifdip income_annual_disp
ren ctifefp income_annual
gen income_hourly=income_weekly/hours
for var income*: replace X=ln(X)

* In the following states, being on the wrong side of the cutoff lowers compulsory schooling by 12 months
gen beforecutoff3=.
* Qld 1945-51: 31 Dec
replace beforecutoff3=bmonth-13 if state==3 & byear>=1945 & byear<=1951 & (bmonth==10 | bmonth==11 | bmonth==12)
replace beforecutoff3=bmonth if state==3 & byear>=1945 & byear<=1950 & (bmonth==1 | bmonth==2 | bmonth==3)
* Qld 1952-80: 28 Feb
replace beforecutoff3=-3 if state==3 & byear>=1952 & byear<=1980 & (bmonth==12)
replace beforecutoff3=bmonth-3 if state==3 & byear>=1952 & byear<=1980 & (bmonth==1 | bmonth==2)
replace beforecutoff3=bmonth-2 if state==3 & byear>=1952 & byear<=1980 & (bmonth==3 | bmonth==4 | bmonth==5)
* Qld 1981: 31 Jan
replace beforecutoff3=bmonth-14 if state==3 & byear==1980 & (bmonth==11 | bmonth==12)
replace beforecutoff3=-1 if state==3 & byear==1981 & bmonth==1
replace beforecutoff3=bmonth-1 if state==3 & byear==1981 & (bmonth==2 | bmonth==3 | bmonth==4)
* Qld 1982 onwards: 31 Dec
replace beforecutoff3=bmonth-13 if state==3 & byear>=1982 & (bmonth==10 | bmonth==11 | bmonth==12)
replace beforecutoff3=bmonth if state==3 & byear>=1982 & (bmonth==1 | bmonth==2 | bmonth==3)
* WA 1945-95: 31 Dec
replace beforecutoff3=bmonth-13 if state==5 & byear>=1945 & byear<=1995 & (bmonth==10 | bmonth==11 | bmonth==12)
replace beforecutoff3=bmonth if state==5 & byear>=1945 & byear<=1994 & (bmonth==1 | bmonth==2 | bmonth==3)
* Tas 1975+: 1 Jan
replace beforecutoff3=bmonth-13 if state==6 & byear>=1975 & (bmonth==10 | bmonth==11 | bmonth==12)
replace beforecutoff3=bmonth if state==6 & byear>=1975 & (bmonth==1 | bmonth==2 | bmonth==3)

gen relativeposition=beforecutoff3
recode relativeposition -1=1 -2=.909 -3=.818 1=0 2=.0909 3=.1818
* Recoding birth month variable, so that it equals 11/11 for month before cutoff, 0/11 for month after cutoff, etc.
* Of course, this is only relevant if the IV is beforecutoff3, rather than i.beforecutoff3
recode beforecutoff3 1=0 2=1 3=2 -1=12 -2=11 -3=10

* Creating a birthmonth*state interaction
for num 3 5 6: gen bcX=beforecutoff3 if state==X \ recode bcX .=0 if beforecutoff3~=.
* Creating a birthmonth*birthyear interaction
tab beforecutoff3
gen bc=.
for num 1930/1980: replace bc=(byear*100)+beforecutoff3 if byear==X
for num 1930/1980: gen bcX=beforecutoff3 if byear==X \ recode bcX .=0
 
* Creating a state*birthmonth*birthyear interaction (for clustering)
gen statebmonthbyear=(byear*1000)+(bmonth*10)+state

* Summary statistics
gen temp_lt12=1 if edyears<12
replace temp_lt12=0 if edyears>=12
for var income_annual: xi: areg X edyears female if age>24 & age<65 [w=weight], a(byear) r 
tabstat income_annual income_annual_disp income_week income_hour edyears byear female married fem_marr fulltime temp_lt12 [w=weight] if e(sample), stats(mean sd n) col(stat)
tabstat income_3* [w=clnwte] if e(sample), stats(mean sd n) col(stat)
drop temp_lt12

************************************
* OLS 
************************************
for var income_3yr: xi: areg X edyears female if age>24 & age<65 [w=clnwte], a(byear) r \ outreg using ols_returns.doc, coefastr nocons bracket 3aster replace bdec(3) se ct("X")
for var income_3yr_disp: xi: areg X edyears female if age>24 & age<65 [w=clnwte], a(byear) r \ outreg using ols_returns.doc, coefastr nocons bracket 3aster append bdec(3) se ct("X")
for var income_annual income_annual_disp income_week income_hour: xi: areg X edyears female if age>24 & age<65 [w=weight], a(byear) r \ outreg using ols_returns.doc, coefastr nocons bracket 3aster append bdec(3) se ct("X")
for var income_3yr income_3yr_disp: xi: areg X edyears female married fem_marr fulltime if age>24 & age<65 [w=clnwte], a(byear) r \ outreg using ols_returns.doc, coefastr nocons bracket 3aster append bdec(3) se ct("X")
for var income_annual income_annual_disp income_week income_hour: xi: areg X edyears female married fem_marr fulltime if age>24 & age<65 [w=weight], a(byear) r \ outreg using ols_returns.doc, coefastr nocons bracket 3aster append bdec(3) se ct("X")
xi: areg income_annual edyears female married fem_marr fulltime if byear>=1964 & byear<=1971 [w=weight], a(byear) r 

************************************
* MONTH OF BIRTH IV
************************************
xi: ivreg income female relativeposition i.byear i.state (edyears=i.beforecutoff3) if age>24 & age<65 [aw=weight], r first cl(statebmonthbyear)
outreg using regdisc_returns.doc, coefastr nocons bracket 3aster replace bdec(3) se ct("IV1")
xi: ivreg income (edyears=i.bc) female relativeposition i.state i.byear if age>24 & age<65 [w=weight], r first cl(statebmonthbyear)
outreg using regdisc_returns.doc, coefastr nocons bracket 3aster append bdec(3) se ct("IV2")
xi: reg income edyears female i.state i.byear if e(sample) [w=weight], r 
outreg using regdisc_returns.doc, coefastr nocons bracket 3aster append bdec(3) se ct("OLS")

* F-tests
xi: reg female i.byear i.state edyears i.beforecutoff3 if age>24 & age<65 [w=weight], r cl(statebmonthbyear)
testparm _Ibefore*
xi: reg female i.byear i.state edyears i.bc if age>24 & age<65 [w=weight], r cl(statebmonthbyear)
testparm _Ibc*

************************************
* CHANGES IN SCHOOL LEAVING LAWS
************************************
*Note - these 4 lines (and the graph lines below the compschool/leavingage bit)
*open up a new dataset, just for the purposes of creating the graphs. 
*They should be asterisked out except when creating the graphs
*cd "<<Insert directory name here>>"
*use state_birthyear, replace
*ren birthyear byear
*label define statelabel 1 "NSW & ACT" 2 "Vic" 3 "Qld" 4 "SA & NT" 5 "WA" 6 "Tas"
*label values state statelabel

gen startingage=6
la var startingage "School starting age"
*replace startingage=7 if byear<1940 & (state==1 | state==8)
replace startingage=7 if byear<1909 & (state==4 | state==7)
replace startingage=7 if byear<1906 & state==6
gen leavingage=.
la var leavingage "School Leaving Age"
replace leavingage=14 if byear>=1903 & (state==1 | state==8)
replace leavingage=14.33 if byear>=1927 & (state==1 | state==8)
replace leavingage=14.66 if byear>=1928 & (state==1 | state==8)
replace leavingage=15 if byear>=1929 & (state==1 | state==8)
replace leavingage=14 if byear>=1903 & state==2
replace leavingage=15 if byear>=1950 & state==2
replace leavingage=12 if byear>=1890 & state==3
replace leavingage=14 if byear>=1896 & state==3
replace leavingage=15 if byear>=1951 & state==3
replace leavingage=13 if byear>=1890 & (state==4 | state==7)
replace leavingage=14 if byear>=1901 & (state==4 | state==7)
replace leavingage=15 if byear>=1949 & (state==4 | state==7)
replace leavingage=14 if byear>=1901 & state==5
replace leavingage=14.5 if byear>=1950 & state==5
replace leavingage=15.5 if byear>=1952 & state==5
replace leavingage=13 if byear>=1885 & state==6
replace leavingage=14 if byear>=1898 & state==6
replace leavingage=16 if byear>=1932 & state==6
gen compschool=leavingage-startingage+1
la var compschool "Compulsory Years of School"

* Graphing compulsory school leaving
*for num 1/6 \ any "NSW & ACT" "Vic" "Qld" "SA & NT" "WA" "Tas": twoway (line leavingage byear) if state==X, xtitle("") ytitle("") title("Y") name(_X, replace) scheme(s2mono) nodraw  
*graph combine _1 _2 _3 _4 _5 _6, ycommon title(Figure 1: Compulsory School Leaving Age by Birth Year) scheme(s2mono)
*for num 1/6 \ any "NSW & ACT" "Vic" "Qld" "SA & NT" "WA" "Tas": twoway (line compschool byear) if state==X, xtitle("") ytitle("") title("Y") name(_X, replace) scheme(s2mono) nodraw  
*graph combine _1 _2 _3 _4 _5 _6, ycommon title(Figure 3: Years of Compulsory Schooling by Birth Year) scheme(s2mono)

/*
* Graphing age-income profiles
*gen income1517=(exp(income)) if fulltime==1 & age>=15 & age<=17 & edyears==9
recode cwsce min/0=0
gen income1517=cwsce*52 if age>=15 & age<=16 & edyears==9 & cedagels>2
*gen income1517=cwsce*52 if age>=15 & age<=17 & edyears==9 & cedagels>2 & fulltime==1
egen temp=mean(income1517)
replace income1517=temp
drop temp
sum income1517
for num 2/4: gen ageX=age^X
replace income=exp(income_annual)
for num 9/10: xi: reg income age age2-age4 if edyears==X & age>=15 & age<65 \ predict incomeX if age>=15 & age<65 & edyears==X \ bysort age: egen temp=mean(incomeX) \ replace incomeX=temp \ drop temp
tabstat income9 if edyears==9, by(age) stats(mean n)
* replace incomeX=(exp(incomeX)) \
collapse income9 income1517,by(age)
drop if age<15 | age>64
gen income10=income9*1.1
replace income10=. if age==15
for num 9/10: la var incomeX "X years of education"
* Figure 2 graph (Figure 1 in EER paper)
twoway (line income9 age) (line income10 age), xtitle("Age") ytitle("Income") title("Figure 1: Age-income profiles") scheme(s2mono)
for num 9/10: egen totearnX=sum(incomeX) \ sum totearnX

gen n=_n-1
* Calculating discounted present value of future earnings
for X in num 6 8 10: for Y in num 0 3 5 7 : gen gapX_Y=(income9*(X/100))/(((100+Y)/100)^n) if age>=16
for X in num 6 8 10: for Y in num 0 3 5 7 : egen totgapX_Y=sum(gapX_Y)
sum totgap*
drop totgap*
* Calculating discount rate necessary to justify dropping out
for X in num 6 8 10: for Y in num 10/25: gen gapX_Y=(income9*(X/100))/(((100+Y)/100)^n) if age>=16
for X in num 6 8 10: for Y in num 10/25: egen totgapX_Y=sum(gapX_Y)
sum totgap*
*/

gen la=.
gen cs=.
for num 1910/1980: replace la=(byear*100)+leavingage if byear==X \ replace cs=(byear*100)+compschool if byear==X
for num 1910/1980: gen laX=leavingage if byear==X \ recode laX .=0
for num 1/6: gen laX=leavingage if state==X \ recode laX .=0 if leavingage~=. 
gen statebyear=state+(byear*10)

* Instrumenting with changes in school leaving ages
xi: reg edyears leavingage female i.state i.byear if  age>=25 & age<=64 [w=weight], r cl(statebyear)
outreg using dd_returns.doc, coefastr nocons bracket 3aster replace bdec(3) se ct("first stage")
xi: reg edyears leavingage female i.state i.byear if  age>=25 & age<=64 & income~=. [w=weight], r cl(statebyear)
outreg using dd_returns.doc, coefastr nocons bracket 3aster append bdec(3) se ct("first stage - inc nonmiss")
xi: reg income edyears female i.state i.byear if e(sample) [w=weight], r 
outreg using dd_returns.doc, coefastr nocons bracket 3aster append bdec(3) se ct("OLS")
xi: ivreg income (edyears=i.leavingage) female i.state i.byear if  age>=25 & age<=64 [w=weight], r first cl(statebyear)
outreg using dd_returns.doc, coefastr nocons bracket 3aster append bdec(3) se ct("la-IV1")
xi: ivreg income (edyears=i.la) female i.state i.byear if age>=25 & age<=64  [w=weight], r first cl(statebyear)
outreg using dd_returns.doc, coefastr nocons bracket 3aster append bdec(3) se ct("la-IV2")
xi: ivreg income (edyears=la1910-la1980) female i.state i.byear if age>=25 & age<=64  [w=weight], r first cl(statebyear)
outreg using dd_returns.doc, coefastr nocons bracket 3aster append bdec(3) se ct("la-IV3")

* F-tests
for any leavingag la: xi: reg edyears i.X female i.state i.byear if  age>=25 & age<=64  [w=weight], r cl(statebyear) \ testparm _IX*

