set scheme s1mono

/* This do file produces the figures in Appendix C1 as well as the figures and tables related to
   Section 3.3, Appendix B3. */

/* Compute the plot from Appendix C1 */

log using alm_appendix, text replace

use summary_by_year_comp, replace
keep if year>=1950
merge n:1 description_new using soc_codes_corresp, keep(1 3)
drop if description_new_miss==1 | soc_new==. | soc_new==999999
collapse (mean) length words  [aw=ct], by(year)
gen spell=words/length
line length year , xtitle("Year") ytitle("Length") color(black) ylab(0(30)90)
graph export "appendix_length.eps", replace
line spell year , xtitle("Year") ytitle("Fraction of Words Which Appear" "in the English Dictionary") color(black)  ylab(0(.2)1)
graph export "appendix_spell.eps", replace

/* First, compute the smoothed task measures for 1977 and 1991 from our newspaper data */

use summary_by_year_comp, replace
keep if year>=1950
merge n:1 description_new using soc_codes_corresp, keep(1 3)
drop if description_new_miss==1 | soc_new==. | soc_new==999999

bys year : egen ads=sum(ct2)
gen share=ct2/ads

drop _merge
ren soc_new onet_soc 
keep spitz*C year ct2 onet_soc words share
gen soc_code=floor(onet_soc/100)

 /* need to group some soc codes which do not appear in the decennial census 
   (their non-appearance in that data occurs because of an imperfect match 
    between soc codes and occ codes */

  replace soc_code=1130 if soc_code==1131
  replace soc_code=2510 if soc_code==2511
  replace soc_code=2520 if soc_code==2530 
  replace soc_code=2911 if soc_code==2990
  replace soc_code=3190 if soc_code==3120 
  replace soc_code=3520 if soc_code==3510
  replace soc_code=3990 if soc_code==3970
  replace soc_code=3990 if soc_code==3940
  replace soc_code=4520 if soc_code==4510 

bys year soc_code: egen share_=sum(share)
collapse (mean) spitz*  words share_ [aw=ct], by(year soc_code)
ren share_ share
sum spitz_nr_analytic_C [aw=share]
local spitz_nr_analytic_C_loc=r(mean)
sum spitz_nr_inter_C [aw=share]
local spitz_nr_inter_C_loc=r(mean)
sum spitz_nr_manual_C [aw=share]
local spitz_nr_manual_C_loc=r(mean)
sum spitz_r_manual_C [aw=share]
local spitz_r_manual_C_loc=r(mean)
sum spitz_r_cognitive_C [aw=share]
local spitz_r_cognitive_C_loc=r(mean)

foreach var of varlist spitz*C  {
   gen `var'_sh=`var'/``var'_loc'
   gen `var'_W=`var'/words*1000
}

foreach var of varlist spitz*_C_sh {
   gen `var'_=`var'/(spitz_nr_analytic_C_sh+spitz_nr_inter_C_sh+spitz_nr_manual_C_sh+spitz_r_manual_C_sh+spitz_r_cognitive_C_sh)
}

keep if year==1977 | year==1991
save data_by_soc4_year, replace

/* From David Autor's replication files, retrieve the DOT scores which are used 
  in table 6 of ALM (2003). The first set of plots, here, become the final figure of 
  Appendix B3 */

qui do  centiles-gen-8497_table6.do
keep if sex==0
xtset occ8090 year
foreach var of varlist ehf-math {
   cor `var' l14.`var'
   cor `var' l14.`var' [aw=wt84]
}

cor math l14.math [aw=wt84]
local l2: di %3.2f `=r(rho)' 
graph twoway (scatter math l14.math, mlabel(occ8090) mlabsize(vsmall) mlabpos(0) msym(i)) , xtitle("Math: 1977") ytitle("Math: 1991") note("Correlation: `l2'")
graph export "alm_77_91_nr_a.eps", replace
cor dcp l14.dcp [aw=wt84]
local l2: di %3.2f `=r(rho)' 
graph twoway (scatter dcp l14.dcp, mlabel(occ8090) mlabsize(vsmall) mlabpos(0) msym(i)) , xtitle("Direction, Planning, Control: 1977") ytitle("Direction, Planning, Control: 1991") note("Correlation: `l2'")
graph export "alm_77_91_nr_i.eps", replace
cor finger l14.finger [aw=wt84]
local l2: di %3.2f `=r(rho)' 
graph twoway (scatter finger l14.finger, mlabel(occ8090) mlabsize(vsmall) mlabpos(0) msym(i)) , xtitle("Finger Dexterity: 1977") ytitle("Finger Dexterity: 1991") note("Correlation: `l2'")
graph export "alm_77_91_r_m.eps", replace

/* Again, retrieve the DOT scores which are used in ALM. Now merge with the SOC 
   to get measures by SOC code. Here, we compute the second and third set of figures 
   within Appendix B3 */

qui do  centiles-gen-8497_table6.do
replace year=year+1900
bys occ8090: egen wt_alm=sum(wt8497gen)
collapse (mean) ehf-math  dq* wt_alm [aw=wt8497gen], by(occ8090 year)

/* There are some merge=2 observations which did not have DOT measures 
  occ90 is the file from David Autor's replication files (expanded to have 
  two observations per occ8090, one for year=1977 and one for year=1991 */

merge 1:n occ8090 year using occ90 , keep(3) 
drop _merge
ren occ90 occ1990
bys occ8090 year: gen denom=_N  
/* need this last line of code as there are in a few instances 
   multiple occ8090 codes per occ90 code. Assume that, for these cases, 
   the occ8090 codes are all equally sized within the occ90 code. */
merge n:1 occ1990 using occ1990_occsoc_correspondence.dta
bys occsoc: egen wt_alm_by_occsoc=sum(wt_alm/denom)
collapse (mean) dq*  ehf-math wt_alm_by_occsoc, by(occsoc year)
save alm_table_6_changes, replace

use data_by_soc4_year, replace
ren soc_code occsoc

/* the merge=1 codes are for the military; the merge=2 codes are those 
    for which the dot isn't measuring (I think) the relevant measures 
   for these occupations */

merge n:1 occsoc year using alm_table_6_changes, keep(1 3)
sort occsoc year
foreach var of varlist spitz*C_W {
  gen `var'_G=(`var'-`var'[_n-1])/(`var'/2+`var'[_n-1]/2) if year==1991 & occsoc==occsoc[_n-1]
}

cor spitz_nr_analytic_C_W math if year==1977 [aw=share]
local l2: di %3.2f `=r(rho)' 
scatter  spitz_nr_analytic_C_W math if year==1977, mlabel(occsoc) msym(i) mlabpos(0) xtitle("DOT: Math") ytitle("Newspaper: Nonroutine Analytic") note("Correlation: `l2'") xlab(0(2)10)
graph export "alm_compare_77_nr_a.eps", replace

cor spitz_nr_inter_C_W dcp if year==1977 [aw=share]
local l2: di %3.2f `=r(rho)' 
scatter  spitz_nr_inter_C_W dcp if year==1977, mlabel(occsoc) msym(i) mlabpos(0) xtitle("DOT: Direction, Planning, Control") ytitle("Newspaper: Nonroutine Interactive") note("Correlation: `l2'") xlab(0(2)10)
graph export "alm_compare_77_nr_i.eps", replace

cor spitz_r_manual_C_W finger if year==1977 [aw=share]
local l2: di %3.2f `=r(rho)' 
graph twoway (scatter spitz_r_manual_C_W finger if year==1977 , mlabel(occsoc)  mlabpos(0) msym(i)) , xtitle("DOT: Finger Dexterity") ytitle("Newspaper: Routine Manual") note("Correlation: `l2'") xlab(0(2)10)
graph export "alm_compare_77_r_m.eps", replace

cor spitz_nr_manual_C_W ehf if year==1977 [aw=share]
cor spitz_r_cognitive_C_W sts if year==1977 [aw=share]

sort occsoc year
gen count7791=share+share[_n-1] if year==1991

cor dqmath spitz_nr_analytic_C_W_G [aw=count7791]
local l2: di %3.2f `=r(rho)' 
scatter  spitz_nr_analytic_C_W_G dqmath , mlabel(occsoc) msym(i) mlabpos(0) xtitle("DOT: Math") ytitle("Newspaper: Nonroutine Analytic") note("Correlation: `l2'") 
graph export "alm_compare_77_91_nr_a.eps", replace
cor dqdcp spitz_nr_inter_C_W_G  [aw=count7791]
local l2: di %3.2f `=r(rho)' 
scatter  spitz_nr_inter_C_W_G dqdcp , mlabel(occsoc) msym(i) mlabpos(0) xtitle("DOT: Direction, Planning, Control") ytitle("Newspaper: Nonroutine Interactive") note("Correlation: `l2'") 
graph export "alm_compare_77_91_nr_i.eps", replace
cor dqehf spitz_nr_manual_C_W_G  [aw=count7791]
local l2: di %3.2f `=r(rho)' 
graph twoway (scatter spitz_r_manual_C_W_G dqfinger , mlabel(occsoc)  mlabpos(0) msym(i)) , xtitle("DOT: Finger Dexterity") ytitle("Newspaper: Routine Manual") note("Correlation: `l2'")
graph export "alm_compare_77_91_r_m.eps", replace
cor dqfinger spitz_r_manual_C_W_G  [aw=count7791]
cor dqsts spitz_r_cognitive_C_W_G  [aw=count7791]
use ind* sex occsoc educd year perwt occ* incwage wkswork2 age labf qocc qeduc classwkr qsex qage marst  using  usa_00034, replace
keep if wkswork2>=4 & incwage~=0 & incwage~=.
keep if age>=16 & age<65 & labf==2 & qocc~=4 & qeduc~=4 & classwkr ==2 & qsex~=4 & qage~=4 & marst~=.
keep if year>=1960 & year<=2000

gen byte edu1=(educd>=20 &  educd<=61 & educd~=60) | (educd<=17 & educd>=2)
gen byte edu2=((educd>=62 & educd<=64) | educd==60)
gen byte edu3=(educd>=65 & educd<=90)
gen byte edu4=(educd==100 | educd==101)
gen byte edu5=(educd>=110 & educd<.)

gen eduG=0
replace eduG=1 if edu2==1
replace eduG=2 if edu3==1
replace eduG=3 if edu4==1
replace eduG=4 if edu5==1

gen occsize=1
collapse (sum) occsize [aw=perwt], by(occ1990 year  ind1990 eduG sex)
joinby occ1990 using occ1990_occsoc_correspondence
collapse (sum) occsize, by(occsoc year ind1990 eduG sex)
fillin sex ind edu occsoc year
replace occsize=0 if occsize==.
drop _fillin

reshape wide occsize, i(occsoc eduG ind1990 sex) j(year)
foreach var of varlist occsize* {
   replace `var'=0 if `var'==.
   sum `var'
   replace `var'=`var'/r(sum)
}
save ipums_occupation_shares_edu_ind_sex_2, replace

use  ipums_occupation_shares_edu_ind_sex_2.dta , replace
reshape long occsize , i(sex eduG occsoc ind1990) j(year)
save ipums_occupation_shares_edu_ind_sex_2_long.dta, replace

 use summary_by_year_comp , replace
 drop if description_new_miss==1
 drop if year<1950
 merge n:1 description_new using soc_codes_corresp
 drop if _merge==2
 replace soc_new=999999 if soc_new==.
 drop _merge
 bys description_new year: egen s1=sum(ct2)
 gen occsoc=floor(soc_new/100)

  replace occsoc=1130 if occsoc==1131
  replace occsoc=2510 if occsoc==2511
  replace occsoc=2520 if occsoc==2530 
  replace occsoc=2911 if occsoc==2990
  replace occsoc=3190 if occsoc==3120 
  replace occsoc=3520 if occsoc==3510
  replace occsoc=3990 if occsoc==3970
  replace occsoc=3990 if occsoc==3940
  replace occsoc=4520 if occsoc==4510

 drop if occsoc==9999 | occsoc==.
 bys year : egen ads=sum(ct2)
 gen share=ct2/ads
 sum spitz_nr_analytic_C [aw=share]
 local spitz_nr_analytic_C_loc=r(mean)
 sum spitz_nr_inter_C [aw=share]
 local spitz_nr_inter_C_loc=r(mean)
 sum spitz_nr_manual_C [aw=share]
 local spitz_nr_manual_C_loc=r(mean)
 sum spitz_r_manual_C [aw=share]
 local spitz_r_manual_C_loc=r(mean)
 sum spitz_r_cognitive_C [aw=share]
 local spitz_r_cognitive_C_loc=r(mean)

 foreach var of varlist spitz*C  {
   gen `var'_sh=`var'/``var'_loc'
   gen `var'_W=`var'/words*1000
 }
foreach var of varlist spitz*_C_sh  {
   gen `var'_=`var'/(spitz_nr_analytic_C_sh+spitz_nr_inter_C_sh+spitz_nr_manual_C_sh+spitz_r_manual_C_sh+spitz_r_cognitive_C_sh)
}
keep if year==1977
collapse (mean) *sh_  *W [aw=share] , by(occsoc)
joinby  occsoc using ipums_occupation_shares_edu_ind_sex_2_long.dta, unmatched(master)
/* the merge=1 observations are those for military occupations, not included in our census sample */ 
drop if _merge==1
drop _merge
sort sex eduG ind1990
by sex eduG ind1990: gen group=1 if _n==1
replace group=sum(group)
bys group year: egen s2=sum(occsize)
collapse (mean) spitz* s2 [aw=occsize], by(group year)

sort year group

foreach var of varlist spitz_nr_analytic_C_W-spitz_nr_manual_C_W {
 sort year `var'
 gen t1=sum(s2) if year==1960
 bys group: egen `var'Rk=min(t1)
 drop t1
}
collapse (mean) *k [aw=s2], by(year)
sum spitz_nr_analytic_C_WRk if _n==1
foreach var of varlist spitz* {
  replace `var'=(`var'-r(mean)+.5)*100
}

graph twoway  ( line spitz_nr_analytic_C_WRk year,  lwidth(thick) lpattern(dash) color(green)) ( line spitz_nr_inter_C_WRk year,  color(black) ) ( line spitz_r_cognitive_C_WRk year, color(red) lpattern(longdash) lwidth(medthick) )  ( line spitz_r_manual_C_WRk year,  color(blue) lpattern(shortdash) lwidth(thick)) ( line spitz_nr_manual_C_WRk year, color(gray)  lwidth(thick))   , ytitle("Mean Task Input in Percentiles" "of 1960 Task Distribution") legend(lab(1 "NR: Analytic") lab(2 "NR: Interactive") lab(3 "R: Cognitive") lab(4 "R: Manual") lab(5 "NR: Manual") row(2)) xtitle("Year")  xlab(1960(10)2000) ylab(38(4)62)
graph export "alm_between_C_ind.eps", replace
outsheet using "alm_between_C_ind.csv", replace delim(,)
/* This is the figure that appears within Section 3.3 of our paper */

log close