do "D:\Data\workdata\704665\daycare\dofiles\first.do"

capture log close

*************************************************************
* finds all individuals born 1930-1960 (later constrained to 1957)
* they are in the yearly  GRUND data sets
* underlying pop file that includes all individuals who ever are in our data
* and their birthdates
* Output: pop3060.dta
*************************************************************

foreach num of numlist 1980/2006{
use pnr fodreg foed_dag koen using  "D:\Data\workdata\704665\rawdata\GRUND`num'.dta", clear
gen year=year(foed_dag)
keep if year>1929 & year<1961
drop year
save "$work\grund`num'.dta",replace
}

foreach num of numlist 2007/2013{
use pnr  FOED_DAG koen using  "D:\Data\workdata\704665\rawdata\GRUND`num'.dta",clear
rename FOED_DAG foed_dag
gen year=year(foed_dag)
keep if year>1929 & year<1961
drop year
save "$work\grund`num'.dta",replace
}


* population 1930-1960 with their fodreg code and birthdate
use "$work\grund1980.dta",clear
foreach num of numlist 1980/2013{
append using "$work\grund`num'.dta"
}

sort pnr 
compress
save "$work\pop3060brutto.dta", replace

foreach num of numlist 1980/2013{
erase "$work\grund`num'.dta"
}


use "$work\pop3060brutto.dta", clear

* omit the ones without valid parish codes - they cannot be merged to munic of birth

drop if fodreg==.
by pnr: egen int fodreg_new=max(fodreg) 

drop fodreg
rename fodreg_new fodreg
by pnr: keep if _n==1 /*keep one spell per person*/

dis _N
*total population observed after 1980, living in DK, born 1930-1960


capture drop fodgroup
gen fodgroup=.

*Documentation of fodreg in dst.dk under Population/Foedreg_kode

replace fodgroup=1 if fodreg>=5100 & fodreg<=5902 /*other countries*/

replace fodgroup=1 if fodreg==5999 /*unknown but abroad*/

replace fodgroup=2 if fodreg>=9501 & fodreg<=9599 /*Greenland*/
replace fodgroup=2 if fodreg==3999 /*Greenland*/

replace fodgroup=3 if fodre>=2401 & fodreg<=2599 /*unknown place DK*/
replace fodgroup=3 if fodreg==4998
replace fodgroup=3 if fodreg==4999 /*unknown*/

replace fodgroup=4 if fodreg>=4301 & fodreg<=4499 /*weird...*/

replace fodgroup=5 if fodreg<1000 /*new post 1970 municipal codes*/

replace fodgroup=6 if fodreg>=4501 & fodreg<=4599 /*county without documentation for the codes used...*/

replace fodgroup=7 if fodreg>=4601 & fodreg<= 4799 /*other religious groups and old names (no documentation for old names) */

replace fodgroup=8 if fodreg>=4801 & fodreg<=4989 /*catholic groups*/

replace fodgroup=9 if fodreg==9993 | fodreg==9995 | fodreg==1455 /*Values 9993 & 9995 is not documented and has few observations.*/

replace fodgroup=10 if fodreg==. /* few Observations have missings */

replace fodgroup=11 if fodreg==9999 /* 9999 is unknown */

replace fodgroup=12 if fodreg==1308 /* Fyns Amt */

replace fodgroup=13 if fodreg>=7001 & fodreg<=9348 /* Parish in Denmark */

replace fodgroup=14 if fodreg>+9301 & fodreg<=9309 /*hosp births*/



tab fodgroup,miss
tab fodreg if fodgroup==.

* 80% have a valid code (group 13) that we can use
* the largest groups that we omit: 8% born aborad
* 4% new municipal code as fodreg code 
* 5.6% hospital births

keep if fodgroup==13
drop fodgroup

gen fodyear=year(foed_dag)

label var fodreg "Parish of birth"
label var fodyear "Year of birth"
label var foed_dag "Date of birth"

*******************************
compress
save "$work\pop3060.dta", replace
*******************************



*******************************
* population to compare to birth statistics
*******************************


use "$work\pop3060brutto.dta", clear

drop if fodreg==.
by pnr: egen int fodreg_new=max(fodreg) 

drop fodreg
rename fodreg_new fodreg
by pnr: keep if _n==1 /*keep one spell per person*/

capture drop fodgroup
gen fodgroup=.

*Documentation of fodreg in dsts.dk under Population/Foedreg_kode

drop if fodreg>=5100 & fodreg<=5902 /*other countries*/

drop if fodreg==5999 /*unknown but abroad*/

drop if fodreg>=9501 & fodreg<=9599 /*Greenland*/
drop if fodreg==3999 /*Greenland*/

replace fodgroup=3 if fodre>=2401 & fodreg<=2599 /*unknown place DK*/
replace fodgroup=3 if fodreg==4998
replace fodgroup=3 if fodreg==4999 /*unknown*/

replace fodgroup=4 if fodreg>=4301 & fodreg<=4499 /*weird...*/

replace fodgroup=5 if fodreg<1000 /*new post 1970 municipal codes*/

replace fodgroup=6 if fodreg>=4501 & fodreg<=4599 /*county without documentation for the codes used...*/

replace fodgroup=7 if fodreg>=4601 & fodreg<= 4799 /*other religious groups and old names (no documentation for old names) */

replace fodgroup=8 if fodreg>=4801 & fodreg<=4989 /*catholic groups*/

replace fodgroup=9 if fodreg==9993 | fodreg==9995 | fodreg==1455 /*Values 9993 & 9995 is not documented and has few observations.*/

replace fodgroup=10 if fodreg==. /* few Observations have missings */

replace fodgroup=11 if fodreg==9999 /* 9999 is unknown */

replace fodgroup=12 if fodreg==1308 /* Fyns Amt */

replace fodgroup=13 if fodreg>=7001 & fodreg<=9348 /* Parish in Denmark */

replace fodgroup=14 if fodreg>9301 & fodreg<=9309 /*hosp births*/

tab fodgroup,miss

tab fodreg if fodgroup==.

gen validparish=(fodgroup==13)
gen borndk=(fodgroup==3 | fodgroup==5 | fodgroup==6 | fodgroup==7 | fodgroup==8 | fodgroup==12 | fodgroup==13 | fodgroup==14)

gen fodyear=year(foed_dag)

drop fodgroup

label var fodreg "Parish of birth"
label var fodyear "Year of birth"
label var foed_dag "Date of birth"

*******************************
compress
save "$work\pop3060_lbcomparison.dta", replace
*******************************
