************************************
* This Stata dofile sets up and runs regressions for the paper:
* Andrew Leigh, "Does Child Gender Affect Marital Status? Evidence from Australia", Journal of Population Economics, 2008
* The first part of the do-file takes each of the 5 census microdata files and standardises variable names.
* This takes quite a while, since the earlier files are in CSV format, sometimes with multiple files.
* The second part of the paper runs regressions.
* Feel free to adapt this do-file for your own use, but please cite the above paper.
* The census microdata files are known as "CURFs", and cannot be posted online. For details on how to obtain them, see www.abs.gov.au.
* For questions about this do-file, please contact andrew_leigh <at> ksg02.harvard.edu.
************************************

version 9
clear
set mem 250m
set more off

/*

************************************
* In each case, we recode marital status to the 1996/2001 codes
* 1	never married 2	widowed  3	divorced 4	separated 5	married 6	not applicable
* Additional variables: tisp=# of children ever born, cdcaf=count of dependent children temporarily absent, mdcp=defacto
* DeFacto: mdcp==2
*
* Note that in 1981 & 1991, rlhp identifies "issue child" and "other child". 
* For comparison to other years, we do not use this additional information
************************************

*1981 Census 
cd <directory name here>
insheet using fesh81new.csv, clear
ren v3 fst
ren v15 fno
recode fno 4/5=.
gen n=_n
gen hhid=_n if fst==1
tsset n
replace hhid=l.hhid if hhid==. & fst~=1
gen famid=hhid*10+fno
drop n
for any famid hhid: egen temp=group(X) \ replace X=temp \ drop temp
ren v4 age
ren v5 ageleftschool
ren v7 birthplace
ren v13 finf
ren v14 fmc
ren v17 hours
ren v18 income
ren v21 secondmarriage
ren v22 mstp
ren v25 qualific 
ren v29 rlhp
ren v33 sex
ren v34 lfs
ren v35 tisp
drop v*
* Recoding marital status to 2001 values
recode mstp 1=1 2=5 3=4 4=3 5=2 6=6
recode rlhp 0=. 2=1 
replace rlhp=2 if rlhp==1 & ((fmc>=4 & fmc<=8) | (fmc>=29 & fmc<=33))
gen child=1 if (rlhp==3 | rlhp==4) & age<=18
recode income 0=. 1=0 2=500 3=1500 4=2500 5=3500 6=5000 7=7000 8=9000 9=11000 10=13500 12=20000 13=24000 14=29990 15=.
recode finf 0=. 1=0 2=500 3=1500 4=2500 5=3500 6=5000 7=7000 8=9000 9=11000 10=13500 12=20000 13=24000 14=29990 15=.
recode hours 0=. 1=0 2=8 3=20 4=30 5=40 6=.
gen hwage=(income/52)/hours
gen female=sex
recode female 1=0 2=1
recode ageleftschool 0/1=. 8/12=8 13=. 
replace ageleftschool=ageleftschool+9
gen edyears=ageleftschool-5
recode edyears 12/max=12
replace edyears=17 if qualific<=108 & edyears<17
replace edyears=16 if qualific<=210 & edyears<16
replace edyears=15 if qualific<=310 & edyears<15
replace edyears=13 if qualific<=410 & edyears<13
replace edyears=11 if qualific<=510 & edyears<11
gen birthyear=1981-age
gen surveyyear=1981
recode tisp 15/16=0
recode secondmarriage 2=1 *=0
sort surveyyear
save temp1981, replace

* 1986 "Section of State"
insheet using 86hsfSOS.csv, clear
gen n=_n
gen hhid=_n if v1==1
gen famid=_n if v1==2
tsset n
replace hhid=l.hhid if hhid==. & v1>=1
replace famid=l.famid if famid==. & v1==3
drop n
gen cdcaf=v2 if v1==2
gen finf=v5 if v1==2
gen fmc=v6 if v1==2
gen mdcf=v8 if v1==2
gen state=.
for any finf fmc cdcaf mdcf: bysort famid: egen temp=max(X) \ replace X=temp \ drop temp
for any state: bysort hhid: egen temp=max(X) \ replace X=temp \ drop temp
la var finf "Family income"
la var cdcaf "Count of children temporarily absent"
keep if v1==3
for any famid hhid: egen temp=group(X) \ replace X=temp \ drop temp
ren v3 age
ren v5 ageleftschool
ren v8 birthplace
ren v14 hours
ren v16 income
ren v19 lfs
ren v20 secondmarriage
ren v21 mstp
ren v24 qualific 
ren v27 rlhp
ren v31 sex
ren v34 tisp
drop v*
* Recoding marital status to 2001 values
recode mstp 1=1 2=5 3=4 4=3 5=2 6=.
recode age 16=17 17=22 18=27 19=32 20=37 21=42 22=47 23=52 24=57 25=62 26=67 27=72 28=77
replace age=age-1 if age<16
* Recoding rlhp to 1991-2001 values
replace rlhp=rlhp+1 if rlhp>=2
replace rlhp=2 if fmc==1 & rlhp==1
gen child=1 if (rlhp==3 | rlhp==4) & age<=18
recode finf 1=2000 2=5000 3=7500 4=12000 5=18500 6=27000 7=36000 8=44000 9/10=.
recode income 1=2000 2=5000 3=7500 4=12000 5=18500 6=27000 7=36000 8=44000 9/11=.
recode hours 1=0 2=8 3=20 4=30 5=37 6=40 7=44 8=52 9/10=.
gen hwage=(income/52)/hours
gen female=sex
recode female 1=0 2=1
recode ageleftschool 12=1 11=. 13/14=.
replace ageleftschool=ageleftschool+11
recode ageleftschool 17/max=17
gen edyears=ageleftschool-5
recode edyears 12/max=12
for num 1/5 \ num 17 16 15 13 11: replace edyears=Y if qualific==X & edyears<Y
gen birthyear=1986-age
gen surveyyear=1986
* Total issue is topcoded to 5
recode tisp 7/8=1
replace tisp=tisp-1
recode secondmarriage 2=1 *=0
sort surveyyear
save temp1986, replace

* 1991
use "C:\Documents and Settings\Andrew Leigh\Datasets\AustCensus\hsf91.dta", clear
for any cdaf coaf: recode X 5=1 \ replace X=X-1
for any cdpf copf: recode X 10=1 \ replace X=X-1
gen tisp=cdaf+coaf+cdpf+copf
ren cdaf cdcaf
ren agep age
recode age 25=27 26=32 27=37 28=42 29=47 30=52 31=57 32=62 33=67 34=72 35=77 36=82 37=87
gen child=1 if (rlhp==3 | rlhp==4) & age<=18
recode rlhp 2=1 
replace rlhp=2 if fmtf==1
* Recoding marital status to 2001 values
recode mstp 1=1 2=5 3=4 4=3 5=2 6=6
gen famid=(numdp*10)+numfp
gen female=sex
recode female 1=0 2=1
ren als ageleftschool 
recode ageleftschool 1=. 2=12 3=14 4=15 5=16 6/8=17 9/10=.
gen edyears=ageleftschool-5
for num 1/6 \ num 17 16 15 13 11 11: replace edyears=Y if qllp==X & edyears<Y
ren lfsp lfs
recode lfs 1/3=1
ren incp income
recode income 1=1500 2=4000 3=6500 4=10000 5=14000 6=18000 7=22500 8=27500 9=32500 10=37500 11=45000 12=55000 13=65000 14=75000 15/16=.
recode finf 1=1500 2=4000 3=6500 4=10000 5=14000 6=18000 7=22500 8=27500 9=32500 10=37500 11=45000 12=55000 13=65000 14=75000 15=90000 16=110000 17=135000 18=172500 19/20=.
ren hrsp hours
recode hours 1=0 2=8 3=20 4=30 5=37 6=40 7=44 8=52 9/10=.
gen hwage=(income/52)/hours
gen birthyear=1991-age
gen surveyyear=1991
keep female age ageleftschool edyears lfs income hwage birthyear surveyyear mstp hours famid rlhp child finf cdcaf mdcf tisp
sort surveyyear
save temp1991, replace

* 1996 
insheet using 96hsf.csv, clear
gen n=_n
gen hhid=_n if v1==2
gen famid=_n if v1==1
tsset n
replace hhid=l.hhid if hhid==. & v1<=1
replace famid=l.famid if famid==. & v1==0
gen finf=v3 if v1==1
gen cdcaf=v4 if v1==1
for any finf cdcaf: bysort famid: egen temp=max(X) \ replace X=temp \ drop temp
la var finf "Family income"
la var cdcaf "Count of children <15 temporarily absent"
keep if v1==0
for any famid hhid: egen temp=group(X) \ replace X=temp \ drop temp
ren v2 state 
ren v3 mdcp
ren v6 age
ren v7 ageleftschool
ren v9 birthplace
ren v14 hours
ren v16 income
ren v18 lfs
* mstp already follows 2001 coding
ren v19 mstp
ren v21 qualific 
ren v24 rlhp
ren v27 female
ren v32 tisp
drop v*
recode female 1=0 2=1
replace age=age-1
recode age 25=27 26=32 27=37 28=42 29=47 30=52 31=57 32=62 33=67 34=72 35=77 36=82 37=87
recode ageleftschool 1=. 2=12 3=14 4=15 5=16 6/8=17 9/11=.
gen edyears=ageleftschool-5
for num 1/6 \ num 17 16 15 13 11 11: replace edyears=Y if qualific==X & edyears<Y
recode state 1/13=1 14/23=2 24/31=3 32/35=4 36/39=5 40=6 *=.
recode lfs 1/3=1
recode income 1/2=0 3=20 4=60 5=100 6=140 7=180 8=250 9=350 10=450 11=550 12=650 13=750 14=900 15=1250 16=1750 *=.
recode finf 1/2=0 3=60 4=140 5=180 6=250 7=350 8=450 9=550 10=650 11=750 12=900 13=1100 14=1350 15=1750 16=2300 17/19=.
recode hours 1=0 2=8 3=20 4=30 5=37 6=40 7=44 8=52 *=.
replace finf=finf*52
replace income=income*52
gen hwage=(income/52)/hours
gen birthyear=1996-age
gen surveyyear=1996
* Total issue is topcoded to 5
recode tisp 6/8=1
replace tisp=tisp-1
* In 1996, rlhp=3 is a dependent child under 15
gen child=1 if (rlhp==3 | rlhp==4) & age<=18
keep female age ageleftschool edyears state lfs income hwage birthyear surveyyear mstp mdcp hours child rlhp finf cdcaf tisp famid hhid
sort surveyyear
save temp1996, replace

* 2001
use hsf01fam.dta, clear
sort abshid absfid
save, replace
use hsf01psn.dta, clear
sort abshid absfid
merge abshid absfid using hsf01fam.dta, keep(finf cdcaf)
egen tag=tag(abshid)
gen temp1=_n if tag
bysort abshid: egen temp2=max(temp1)
egen temp3=group(temp2)
gen famid=temp3*10+absfid
drop temp1 temp2 temp3
ren agep age
replace age=age-1
recode age 25=27 26=32 27=37 28=42 29=47 30=52 31=57 32=62 33=67 34=72 35=77 36=82 37=87
gen edyears=hscp
recode edyears 1=. 2=10 3=11 4=12 5/7=.
for num 1/5 \ num 17 16 15 13 11: replace edyears=Y if qallp==X & edyears<Y
ren lfsp lfs
recode lfs 1/3=1
ren incp income
recode income 1/2=0 3=20 4=60 5=100 6=140 7=180 8=250 9=350 10=450 11=550 12=650 13=750 14=900 15=1250 16=1750 *=.
recode finf 1/2=0 3=60 4=140 5=180 6=250 7=350 8=450 9=550 10=650 11=750 12=900 13=1100 14=1350 15=1725 16/18=.
ren hrsp hours
recode hours 1=0 2=8 3=20 4=30 5=37 6=40 7=44 8=52 *=.
replace income=income*52
replace finf=finf*52
gen hwage=(income/52)/hours
ren sexp female
recode female 1=0 2=1
gen child=1 if (rlhp==3 | rlhp==4) & age<=18
gen surveyyear=2001
gen birthyear=surveyyear-age
keep famid abshid edyears age female lfs income child surveyyear birthyear mstp mdcp rlhp mdcp finf cdcaf
sort surveyyear
save temp2001, replace

*/

**************************
* Running the regressions
**************************
clear
set more off
set mem 250m
cd <directory name here>
use temp2001, clear
* Censuses
for num 1981 1986 1991 1996: sort surveyyear \ merge surveyyear using tempX \ drop _merge

egen temp=group(famid surveyyear)
replace famid=temp
drop temp*

* How many families? 
codebook famid

* Creating a defacto variable (not available for 1981 --> so drop that year when doing the analysis)
* Defacto for 1996 & 2001
gen defacto=1 if mdcp==2 & mstp~=5 & (surveyyear==1996 | surveyyear==2001)
* Defacto for 1991 (social marital status is measured at the family, not indiv level)
replace defacto=1 if (mdcf==2 | mdcf==3) & rlhp<=2 & mstp<=4 & surveyyear==1991
* Defacto for 1986 (social marital status is measured at the family, not indiv level)
replace defacto=1 if mdcf==2 & rlhp==1 & mstp<=4 & surveyyear==1986

* Creating family income quintiles
gen qfinf=.
for num 1981(5)2001: xtile temp=finf if surveyyear==X, nq(5) \ replace qfinf=temp if surveyyear==X \ drop temp

gen agesq=age^2/100

bysort surveyyear: sum age if child==1

*This approach codes up only children<=18 (child==1)
bysort famid: egen rank=rank(birthyear) if child==1, unique
gen temp1=female if rank==1
bysort famid: egen firstgirl=max(temp1)
tab firstgirl
gen temp2=female if rank==2
bysort famid: egen secondgirl=max(temp2)
tab firstgirl secondgirl
gen temp3=female if rank==3
bysort famid: egen thirdgirl=max(temp3)
bysort famid: egen numkids=max(rank)
bysort famid: egen temp4=min(age) if child==1
bysort famid: egen youngestchildage=max(temp4)
drop temp*

* Checking tisp and numkids
sum numkids if tisp==numkids & surveyyear<2001
bysort survey: sum numkids if tisp==numkids & numkids>0 & surveyyear<2001
bysort survey: sum numkids if tisp~=numkids & numkids>0 & surveyyear<2001

gen gg=1 if firstgirl==1 & secondgirl==1
replace gg=0 if gg==. & firstgirl~=. & secondgirl~=.
gen bb=1 if firstgirl==0 & secondgirl==0
replace bb=0 if bb==. & firstgirl~=. & secondgirl~=.
gen gb=1 if firstgirl==1 & secondgirl==0
replace gb=0 if gb==. & firstgirl~=. & secondgirl~=.
gen bg=1 if firstgirl==0 & secondgirl==1
replace bg=0 if bg==. & firstgirl~=. & secondgirl~=.
gen ggg=1 if firstgirl==1 & secondgirl==1 & thirdgirl==1
replace ggg=0 if ggg==. & firstgirl~=. & secondgirl~=. & thirdgirl~=.
gen bbb=1 if firstgirl==0 & secondgirl==0 & thirdgirl==0
replace bbb=0 if bbb==. & firstgirl~=. & secondgirl~=. & thirdgirl~=.
gen bgg=1 if firstgirl==0 & secondgirl==1 & thirdgirl==1
replace bgg=0 if bgg==. & firstgirl~=. & secondgirl~=. & thirdgirl~=.
gen bgb=1 if firstgirl==0 & secondgirl==1 & thirdgirl==0
replace bgb=0 if bgb==. & firstgirl~=. & secondgirl~=. & thirdgirl~=.
gen bbg=1 if firstgirl==0 & secondgirl==0 & thirdgirl==1
replace bbg=0 if bbg==. & firstgirl~=. & secondgirl~=. & thirdgirl~=.
gen gbb=1 if firstgirl==1 & secondgirl==0 & thirdgirl==0
replace gbb=0 if gbb==. & firstgirl~=. & secondgirl~=. & thirdgirl~=.
gen gbg=1 if firstgirl==1 & secondgirl==0 & thirdgirl==1
replace gbg=0 if gbg==. & firstgirl~=. & secondgirl~=. & thirdgirl~=.
gen ggb=1 if firstgirl==1 & secondgirl==1 & thirdgirl==0
replace ggb=0 if ggb==. & firstgirl~=. & secondgirl~=. & thirdgirl~=.

gen pctgirls=firstgirl if numkids==1
for any gg bb bg gb \ num 1 0 .5 .5: replace pctgirls=Y if X==1 & numkids==2
for any ggg bbg bgb gbb bgg gbg ggb bbb \ num 1 .3333 .3333 .3333 .6666 .6666 .6666 0: replace pctgirls=Y if X==1 & numkids>=3

* Lone dads
sum firstgirl if rlhp==2 & female==0

* Tagging one observation per household, where rlhp is 1 or 2
bysort famid: egen tag=rank(rlhp), unique
replace tag=. if rlhp>2
bysort survey: tab rlhp if tag==1

* Excluding reference persons aged over 40, or where youngest child is aged>12
replace tag=. if age<18 | age>40 | youngestchildage>12

sum age if tag==1 & numkids>0 & numkids~=., d

sum firstgirl gg gb bg bb ggg bbg bgb gbb bgg gbg ggb bbb if tag==1 & numkids>0 & numkids~=.

gen married_notmarried=mstp
recode married_notmarried 1=1 2=. 3=1 4=1 5=0 6=.

gen married_divorced=mstp if surveyyear~=1999 & surveyyear~=2000 & surveyyear~=2002
recode married_divorced 1=. 2=. 3=1 4=1 5=0 6=.

gen married_nevermarried=mstp if surveyyear~=1999 & surveyyear~=2000 & surveyyear~=2002
recode married_nevermarried 1=1 2=. 3=. 4=. 5=0 6=.

gen mardivnev=1 if married_notmarried==0
replace mardivnev=2 if married_divorced==1
replace mardivnev=3 if married_nevermarried==1

* Checking data
bysort surveyyear: sum firstgirl secondgirl thirdgirl married* 

* Parity progression and the gender of existing children
gen samesex=0 if numkids==2 | numkids==3
replace samesex=1 if (bb==1 | gg==1) & numkids==2
replace samesex=1 if (bbb==1 | ggg==1) & numkids==3

* Summary Statistics
dprobit married_notmarried firstgirl if tag==1
log using sumstats_census.smcl, replace
tabstat married_notmarried married_divorced married_nevermarried numkids pctgirls samesex firstgirl bb gg bg gb bbb ggg bbg bgb gbb bgg gbg ggb if e(sample), columns(statistics) statistics(mean sd n)
tab numkids if e(sample)
tab mardivnev if e(sample)
log close

* Calculating baselines for comparison with Dahl & Moretti
sum married_divorced if firstgirl==0. & tag==1 & numkids==1
sum married_divorced if firstgirl==0 & tag==1
sum married_divorced if bb==1 & tag==1 & numkids==2
sum married_divorced if bb==1 & tag==1
sum married_divorced if bbb==1 & tag==1 & numkids==3
sum married_divorced if bbb==1 & tag==1

* Regressions
* Setup outreg
for any married_notmarried: dprobit X firstgirl if tag==1 \ outreg using census.doc, coefastr se nocons bracket 3aster replace ct(X) bdec(4) addstat("Pseudo R2", e(r2_p), "Observed prob.", e(pbar)) adec(4)

* MAIN SPECIFICATIONS IN PAPER
* Percent girls
for any married_notmarried married_divorced married_nevermarried: dprobit X pctgirls if tag==1 & numkids>=1 \ outreg using census.doc, coefastr se nocons bracket 3aster append ct(X-AnyK) bdec(4) addstat("Pseudo R2", e(r2_p), "Observed prob.", e(pbar)) adec(4)
for any married_notmarried married_divorced married_nevermarried: dprobit X pctgirls if tag==1 & numkids==1 \ outreg using census.doc, coefastr se nocons bracket 3aster append ct(X-1K) bdec(4) addstat("Pseudo R2", e(r2_p), "Observed prob.", e(pbar)) adec(4)
for any married_notmarried married_divorced married_nevermarried: dprobit X pctgirls if tag==1 & numkids==2 \ outreg using census.doc, coefastr se nocons bracket 3aster append ct(X-2K) bdec(4) addstat("Pseudo R2", e(r2_p), "Observed prob.", e(pbar)) adec(4)
for any married_notmarried married_divorced married_nevermarried: dprobit X pctgirls if tag==1 & numkids>=3 \ outreg using census.doc, coefastr se nocons bracket 3aster append ct(X-3K) bdec(4) addstat("Pseudo R2", e(r2_p), "Observed prob.", e(pbar)) adec(4)
* Same sex 
for any married_notmarried married_divorced married_nevermarried: dprobit X samesex if tag==1 & numkids==2 \ outreg using census.doc, coefastr se nocons bracket 3aster append ct(X-2K) bdec(4) addstat("Pseudo R2", e(r2_p), "Observed prob.", e(pbar)) adec(4)
for any married_notmarried married_divorced married_nevermarried: dprobit X samesex if tag==1 & numkids==3 \ outreg using census.doc, coefastr se nocons bracket 3aster append ct(X-3K) bdec(4) addstat("Pseudo R2", e(r2_p), "Observed prob.", e(pbar)) adec(4)
* With controls
global controls = "i.qfinf edyears i.survey age agesq"
for any married_notmarried married_divorced married_nevermarried: xi: dprobit X pctgirls $controls if tag==1 & numkids>=1 \ outreg using census.doc, coefastr se nocons bracket 3aster append ct(X-1K) bdec(4) addstat("Pseudo R2", e(r2_p), "Observed prob.", e(pbar)) adec(4)
for any married_notmarried married_divorced married_nevermarried: xi: dprobit X pctgirls $controls if tag==1 & numkids==1 \ outreg using census.doc, coefastr se nocons bracket 3aster append ct(X-1K) bdec(4) addstat("Pseudo R2", e(r2_p), "Observed prob.", e(pbar)) adec(4)
for any married_notmarried married_divorced married_nevermarried: xi: dprobit X pctgirls $controls if tag==1 & numkids==2 \ outreg using census.doc, coefastr se nocons bracket 3aster append ct(X-2K) bdec(4) addstat("Pseudo R2", e(r2_p), "Observed prob.", e(pbar)) adec(4)
for any married_notmarried married_divorced married_nevermarried: xi: dprobit X pctgirls $controls if tag==1 & numkids>=3 \ outreg using census.doc, coefastr se nocons bracket 3aster append ct(X-3K) bdec(4) addstat("Pseudo R2", e(r2_p), "Observed prob.", e(pbar)) adec(4)
for any married_notmarried married_divorced married_nevermarried: xi: dprobit X samesex $controls if tag==1 & numkids==2 \ outreg using census.doc, coefastr se nocons bracket 3aster append ct(X-2K) bdec(4) addstat("Pseudo R2", e(r2_p), "Observed prob.", e(pbar)) adec(4)
* Year by year
for num 1981(5)2001: dprobit married_notmarried pctgirls if tag==1 & surveyyear==X \ outreg using census.doc, coefastr se nocons bracket 3aster append ct("married_notmarried-X") bdec(4) addstat("Pseudo R2", e(r2_p), "Observed prob.", e(pbar)) adec(4) 
for num 1981(5)2001: dprobit married_notmarried samesex if tag==1 & numkids==2 & surveyyear==X \ outreg using census.doc, coefastr se nocons bracket 3aster append ct(marr_notmarr-2K-X) bdec(4) addstat("Pseudo R2", e(r2_p), "Observed prob.", e(pbar)) adec(4)

* MORE FLEXIBLE SPECIFICATIONS (firstgirl, bb/bg/gb, ggg/bgb etc)
* Without controls, all years
for any married_notmarried married_divorced married_nevermarried: dprobit X firstgirl if tag==1 & numkids==1 \ outreg using census.doc, coefastr se nocons bracket 3aster append ct(X-1K) bdec(4) addstat("Pseudo R2", e(r2_p), "Observed prob.", e(pbar)) adec(4)
for any married_notmarried married_divorced married_nevermarried: dprobit X firstgirl if tag==1 \ outreg using census.doc, coefastr se nocons bracket 3aster append ct(X) bdec(4) addstat("Pseudo R2", e(r2_p), "Observed prob.", e(pbar)) adec(4)
for any married_notmarried married_divorced married_nevermarried: dprobit X gg bg gb if tag==1 & numkids==2 \ outreg using census.doc, coefastr se nocons bracket 3aster append ct(X-2K) bdec(4) addstat("Pseudo R2", e(r2_p), "Observed prob.", e(pbar)) adec(4)
for any married_notmarried married_divorced married_nevermarried: dprobit X gg bg gb if tag==1 \ outreg using census.doc, coefastr se nocons bracket 3aster append ct(X) bdec(4) addstat("Pseudo R2", e(r2_p), "Observed prob.", e(pbar)) adec(4)
for any married_notmarried married_divorced married_nevermarried: dprobit X ggg bbg bgb gbb bgg gbg ggb if tag==1 & numkids==3, constant \ outreg using census.doc, coefastr se nocons bracket 3aster append ct(X-3K) bdec(4) addstat("Pseudo R2", e(r2_p), "Observed prob.", e(pbar)) adec(4)
for any married_notmarried married_divorced married_nevermarried: dprobit X ggg bbg bgb gbb bgg gbg ggb if tag==1, constant \ outreg using census.doc, coefastr se nocons bracket 3aster append ct(X) bdec(4) addstat("Pseudo R2", e(r2_p), "Observed prob.", e(pbar)) adec(4)
* With controls, all years
global controls = "i.qfinf edyears i.survey age agesq"
for any married_notmarried married_divorced married_nevermarried: xi: dprobit X firstgirl $controls if tag==1 & numkids==1 \ outreg using census.doc, coefastr se nocons bracket 3aster append ct(X-1c) bdec(4) addstat("Pseudo R2", e(r2_p), "Observed prob.", e(pbar)) adec(4)
for any married_notmarried married_divorced married_nevermarried: xi: dprobit X firstgirl $controls if tag==1 \ outreg using census.doc, coefastr se nocons bracket 3aster append ct(X) bdec(4) addstat("Pseudo R2", e(r2_p), "Observed prob.", e(pbar)) adec(4)
for any married_notmarried married_divorced married_nevermarried: xi: dprobit X gg bg gb $controls if tag==1 & numkids==2 \ outreg using census.doc, coefastr se nocons bracket 3aster append ct(X-2c) bdec(4) addstat("Pseudo R2", e(r2_p), "Observed prob.", e(pbar)) adec(4)
for any married_notmarried married_divorced married_nevermarried: xi: dprobit X gg bg gb $controls if tag==1 \ outreg using census.doc, coefastr se nocons bracket 3aster append ct(X) bdec(4) addstat("Pseudo R2", e(r2_p), "Observed prob.", e(pbar)) adec(4)
for any married_notmarried married_divorced married_nevermarried: xi: dprobit X ggg bbg bgb gbb bgg gbg ggb $controls if tag==1 & numkids==3, constant \ outreg using census.doc, coefastr se nocons bracket 3aster append ct(X-3c) bdec(4) addstat("Pseudo R2", e(r2_p), "Observed prob.", e(pbar)) adec(4)
for any married_notmarried married_divorced married_nevermarried: xi: dprobit X ggg bbg bgb gbb bgg gbg ggb $controls if tag==1, constant \ outreg using census.doc, coefastr se nocons bracket 3aster append ct(X) bdec(4) addstat("Pseudo R2", e(r2_p), "Observed prob.", e(pbar)) adec(4)
* Without controls, year-by-year
for num 1981(5)2001: dprobit married_notmarried firstgirl if tag==1 & numkids==1 & surveyyear==X \ outreg using census.doc, coefastr se nocons bracket 3aster append ct("married_notmarried-1K-X") bdec(4) addstat("Pseudo R2", e(r2_p), "Observed prob.", e(pbar)) adec(4) \ dprobit married_notmarried firstgirl if tag==1 & surveyyear==X \ outreg using census.doc, coefastr se nocons bracket 3aster append ct("married_notmarried-X") bdec(4) addstat("Pseudo R2", e(r2_p), "Observed prob.", e(pbar)) adec(4) \ dprobit married_notmarried gg bg gb if tag==1 & numkids==2 & surveyyear==X \ outreg using census.doc, coefastr se nocons bracket 3aster append ct("married_notmarried-2K-X") bdec(4) addstat("Pseudo R2", e(r2_p), "Observed prob.", e(pbar)) adec(4) \ dprobit married_notmarried gg bg gb if tag==1 & surveyyear==X \ outreg using census.doc, coefastr se nocons bracket 3aster append ct("married_notmarried-X") bdec(4) addstat("Pseudo R2", e(r2_p), "Observed prob.", e(pbar)) adec(4) \ dprobit married_notmarried ggg bbg bgb gbb bgg gbg ggb if tag==1 & numkids==3 & surveyyear==X, constant \ outreg using census.doc, coefastr se nocons bracket 3aster append ct("married_notmarried-3K-X") bdec(4) addstat("Pseudo R2", e(r2_p), "Observed prob.", e(pbar)) adec(4) \ dprobit married_notmarried ggg bbg bgb gbb bgg gbg ggb if tag==1 & surveyyear==X, constant \ outreg using census.doc, coefastr se nocons bracket 3aster append ct("married_notmarried-X") bdec(4) addstat("Pseudo R2", e(r2_p), "Observed prob.", e(pbar)) adec(4) 

* Ever divorced vs currently divorced (1981 & 1986 only)
gen married_divorced_ever=married_divorced
recode married_divorced_ever 0=1 if secondmarriage==1 
sum secondmarriage married_divorced* if surveyyear<=1986
for any married_divorced married_divorced_ever: dprobit X pctgirls if tag==1 & surveyyear<=1986 \ outreg using census.doc, coefastr se nocons bracket 3aster append ct("81/86-X") bdec(4) addstat("Pseudo R2", e(r2_p), "Observed prob.", e(pbar)) adec(4) 
for any married_divorced married_divorced_ever: dprobit X pctgirls samesex if tag==1 & numkids==2 & surveyyear<=1986 \ outreg using census.doc, coefastr se nocons bracket 3aster append ct("81/86-X") bdec(4) addstat("Pseudo R2", e(r2_p), "Observed prob.", e(pbar)) adec(4) 

* Percent girls
for any married_notmarried married_divorced married_nevermarried: dprobit X pctgirls if tag==1 & numkids>=1 \ outreg using census.doc, coefastr se nocons bracket 3aster append ct(X-AnyK) bdec(4) addstat("Pseudo R2", e(r2_p), "Observed prob.", e(pbar)) adec(4)
for any married_notmarried married_divorced married_nevermarried: dprobit X pctgirls if tag==1 & numkids==1 \ outreg using census.doc, coefastr se nocons bracket 3aster append ct(X-1K) bdec(4) addstat("Pseudo R2", e(r2_p), "Observed prob.", e(pbar)) adec(4)
for any married_notmarried married_divorced married_nevermarried: dprobit X pctgirls if tag==1 & numkids==2 \ outreg using census.doc, coefastr se nocons bracket 3aster append ct(X-2K) bdec(4) addstat("Pseudo R2", e(r2_p), "Observed prob.", e(pbar)) adec(4)
for any married_notmarried married_divorced married_nevermarried: dprobit X pctgirls if tag==1 & numkids>=3 \ outreg using census.doc, coefastr se nocons bracket 3aster append ct(X-3K) bdec(4) addstat("Pseudo R2", e(r2_p), "Observed prob.", e(pbar)) adec(4)
for any married_notmarried married_divorced married_nevermarried: xi: dprobit X pctgirls $controls if tag==1 & numkids>=1 \ outreg using census.doc, coefastr se nocons bracket 3aster append ct(X-1K) bdec(4) addstat("Pseudo R2", e(r2_p), "Observed prob.", e(pbar)) adec(4)
for any married_notmarried married_divorced married_nevermarried: xi: dprobit X pctgirls $controls if tag==1 & numkids==1 \ outreg using census.doc, coefastr se nocons bracket 3aster append ct(X-1K) bdec(4) addstat("Pseudo R2", e(r2_p), "Observed prob.", e(pbar)) adec(4)
for any married_notmarried married_divorced married_nevermarried: xi: dprobit X pctgirls $controls if tag==1 & numkids==2 \ outreg using census.doc, coefastr se nocons bracket 3aster append ct(X-2K) bdec(4) addstat("Pseudo R2", e(r2_p), "Observed prob.", e(pbar)) adec(4)
for any married_notmarried married_divorced married_nevermarried: xi: dprobit X pctgirls $controls if tag==1 & numkids>=3 \ outreg using census.doc, coefastr se nocons bracket 3aster append ct(X-3K) bdec(4) addstat("Pseudo R2", e(r2_p), "Observed prob.", e(pbar)) adec(4)
* Pctgirls & same sex
for any married_notmarried married_divorced married_nevermarried: dprobit X pctgirls samesex if tag==1 & numkids==2 \ outreg using census.doc, coefastr se nocons bracket 3aster append ct(X-2K) bdec(4) addstat("Pseudo R2", e(r2_p), "Observed prob.", e(pbar)) adec(4)
for any married_notmarried married_divorced married_nevermarried: xi: dprobit X pctgirls samesex $controls if tag==1 & numkids==2 \ outreg using census.doc, coefastr se nocons bracket 3aster append ct(X-2K) bdec(4) addstat("Pseudo R2", e(r2_p), "Observed prob.", e(pbar)) adec(4)
for any married_notmarried married_divorced married_nevermarried: dprobit X pctgirls samesex if tag==1 & numkids>=3 \ outreg using census.doc, coefastr se nocons bracket 3aster append ct(X-3K) bdec(4) addstat("Pseudo R2", e(r2_p), "Observed prob.", e(pbar)) adec(4)
for any married_notmarried married_divorced married_nevermarried: xi: dprobit X pctgirls samesex $controls if tag==1 & numkids>=3 \ outreg using census.doc, coefastr se nocons bracket 3aster append ct(X-3K) bdec(4) addstat("Pseudo R2", e(r2_p), "Observed prob.", e(pbar)) adec(4)
* Same sex only (as used in paper)
for any married_notmarried married_divorced married_nevermarried: dprobit X samesex if tag==1 & numkids==2 \ outreg using census.doc, coefastr se nocons bracket 3aster append ct(X-2K) bdec(4) addstat("Pseudo R2", e(r2_p), "Observed prob.", e(pbar)) adec(4)
for any married_notmarried married_divorced married_nevermarried: dprobit X samesex if tag==1 & numkids==3 \ outreg using census.doc, coefastr se nocons bracket 3aster append ct(X-3K) bdec(4) addstat("Pseudo R2", e(r2_p), "Observed prob.", e(pbar)) adec(4)
for num 1981(5)2001: dprobit married_notmarried samesex if tag==1 & numkids==2 & surveyyear==X \ outreg using census.doc, coefastr se nocons bracket 3aster append ct(marr_notmarr-2K-X) bdec(4) addstat("Pseudo R2", e(r2_p), "Observed prob.", e(pbar)) adec(4)
for any married_notmarried married_divorced married_nevermarried: xi: dprobit X pctgirls $controls if tag==1 \ outreg using census.doc, coefastr se nocons bracket 3aster append ct(X-AnyK) bdec(4) addstat("Pseudo R2", e(r2_p), "Observed prob.", e(pbar)) adec(4)
for any married_notmarried married_divorced married_nevermarried: xi: dprobit X samesex $controls if tag==1 & numkids==2 \ outreg using census.doc, coefastr se nocons bracket 3aster append ct(X-2K) bdec(4) addstat("Pseudo R2", e(r2_p), "Observed prob.", e(pbar)) adec(4)

* ROBUSTNESS CHECKS

* Lundberg theory
gen f2samesex=1 if gg==1 | bb==1
replace f2samesex=0 if bg==1 | gb==1
for any married_notmarried married_divorced married_nevermarried: dprobit X f2samesex if tag==1 & numkids>=2 
for any married_notmarried married_divorced married_nevermarried: dprobit X f2samesex if tag==1 & numkids==3 

* Young, low-educ
for any married_notmarried married_divorced married_nevermarried: dprobit X firstgirl if tag==1 & age<30 \ outreg using census.doc, coefastr se nocons bracket 3aster append ct("X-lt30") bdec(4) addstat("Pseudo R2", e(r2_p), "Observed prob.", e(pbar)) adec(4)
for any married_notmarried married_divorced married_nevermarried: dprobit X firstgirl if tag==1 & edy<12 \ outreg using census.doc, coefastr se nocons bracket 3aster append ct("X-lowed") bdec(4) addstat("Pseudo R2", e(r2_p), "Observed prob.", e(pbar)) adec(4)
for any married_notmarried married_divorced married_nevermarried: dprobit X gg bg gb if tag==1 & age<30 \ outreg using census.doc, coefastr se nocons bracket 3aster append ct("X-lt30") bdec(4) addstat("Pseudo R2", e(r2_p), "Observed prob.", e(pbar)) adec(4)
for any married_notmarried married_divorced married_nevermarried: dprobit X gg bg gb if tag==1 & edy<12 \ outreg using census.doc, coefastr se nocons bracket 3aster append ct("X-lowed") bdec(4) addstat("Pseudo R2", e(r2_p), "Observed prob.", e(pbar)) adec(4)

* One-child families
for any married_notmarried married_divorced married_nevermarried: dprobit X firstgirl if tag==1 & gg==.

* Excl families where dependent children are absent
gen depchildabsent=1 if cdcaf>=1 & cdcaf<=3 & surveyyear>=1986 & surveyyear<=1991
replace depchildabsent=1 if cdcaf==2 & surveyyear>=1996
for any married_notmarried married_divorced married_nevermarried: dprobit X firstgirl if tag==1 & surveyyear>=1986 & depchildabsent~=1
for any married_notmarried married_divorced married_nevermarried: dprobit X gg bg gb if tag==1 & surveyyear>=1986 & depchildabsent~=1
for any married_notmarried married_divorced married_nevermarried: dprobit X ggg bbg bgb gbb bgg gbg ggb if tag==1 & surveyyear>=1986 & depchildabsent~=1, constant

* Using de-factos (unavailable for 1981)
for var married_*: replace X=1 if defacto==1
for any married_notmarried married_divorced married_nevermarried: dprobit X firstgirl if tag==1 & surveyyear>=1986
for any married_notmarried married_divorced married_nevermarried: dprobit X gg bg gb if tag==1 & surveyyear>=1986 
for any married_notmarried married_divorced married_nevermarried: dprobit X ggg bbg bgb gbb bgg gbg ggb if tag==1 & surveyyear>=1986, constant
