clear all
set more off

*<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><
*																				
*		epp-prepare-data.do
*		see AEJPol_2017_0709_data_read_me.pdf
*
*<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><

cd "your\path\here"

*===============================================================================
*	[A] Main analysis file
*===============================================================================
*<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><
*	[A.1] Student test-score data
*<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><
*-------------------------------------------------------------------------------
*	Combine data from different years
*-------------------------------------------------------------------------------

use student_ach_data_2015 , clear
gen year = 2015

append using student_ach_data_2014 
replace year = 2014 if mi( year )

* change to avoid append conflict with 2013
destring grade, replace
tostring performance_level_rla , replace
tostring performance_level_ma , replace
tostring performance_level_sc , replace

append using student_ach_data_2013
replace year = 2013 if mi( year )

rename unique_student_id studentid

*-------------------------------------------------------------------------------
*	Reshape to student-year-subject observations
*-------------------------------------------------------------------------------

bysort studentid year : gen order = _n
reshape long scale_score_@ , i( studentid year order ) j( subject ) string
drop order

replace subject = "ma" if subject == "math"

drop if mi( scale_score_ )

duplicates tag studentid year subject , gen( tag )
drop if tag != 0 // < 0.25% of cases have more than one test, dropping
drop tag	

*-------------------------------------------------------------------------------
*	Standardize test scores
*-------------------------------------------------------------------------------

bysort subject year grade : center scale_score_ , gen( stdscore ) standardize

*-------------------------------------------------------------------------------
*	Demographic variables
*-------------------------------------------------------------------------------

gen female = student_gender == "F" if inrange( year , 2013 , 2014 )
egen mode_gender = mode( female ) , by( studentid )
replace female = mode_gender if !mi( mode_gender )
drop student_gender mode_gender

gen MI_female = mi( female )
replace female = 0 if mi( female )

	gen raceth = 1 if race_reported == 5
replace raceth = 2 if race_reported == 2
replace raceth = 3 if race_reported == 3
replace raceth = 4 if race_reported == 1
replace raceth = 5 if inlist( race_reported , 0 , 4 , 6 )
label define raceth 1 "White" 2 "Black" 3 "Latino" 4 "Asian" 5 "Other" 
label values raceth raceth

gen sped = special_education == 1
drop special_education

gen ell = !mi( ell_excluded ) ///
		| !mi( esl_services_ell ) ///
		| !mi( esl_services_t1_t2 )
drop ell_excluded esl_services_ell esl_services_t1_t2		

*-------------------------------------------------------------------------------
*	Simple Lagged Scores, Retention
*-------------------------------------------------------------------------------

egen _id = group( studentid subject )
tsset _id year

gen stdscore_tm1 = l.stdscore

gen retained = grade == l.grade
drop _id

*-------------------------------------------------------------------------------
*	Lagged Scores Cross Subject
*-------------------------------------------------------------------------------

preserve

keep studentid year subject *_tm1

rename stdscore_tm1 stdscore_tm1_

reshape wide stdscore_tm1_ , i( studentid year ) j( subject ) string

egen stdscore_tm1_avg = rowmean( stdscore_tm1_ma stdscore_tm1_rla )

tempfile lagscores
save `lagscores' , replace

restore

merge m:1 studentid year using `lagscores' , nogen assert( match ) 

*-------------------------------------------------------------------------------
*	Finish up
*-------------------------------------------------------------------------------

egen test_schoolid = concat( system_number school_number ) , punct( "-" )
replace test_schoolid = "" if mi( system_number ) | mi( school_number )
drop school_name school_number system_name system_number

loc vars studentid year grade subject test_schoolid ///
		 stdscore stdscore_tm1 stdscore_tm1_avg ///
		 female MI_female raceth sped ell retained 
keep `vars'
order `vars'		 
 
keep if inrange( year , 2014 , 2015 ) & inlist( subject , "rla" , "ma" )

tempfile testscores
save `testscores' , replace

*<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><
*	[A.2] Student-Teacher Links 
*<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><
*-------------------------------------------------------------------------------
*	Combine data from different years
*-------------------------------------------------------------------------------

use teacher_link_2015 , clear

drop year
gen year = 2015

egen tch_schoolid = concat( system_no school_no ) , punct( "-" )
replace tch_schoolid = "" if mi( system_no ) | mi( school_no )

tempfile link2015
save `link2015' , replace

use teacher_link_2014 , clear
gen year = 2014 

egen tch_schoolid = concat( districtnumber schoolnumber ) , punct( "-" )
replace tch_schoolid = "" if mi( districtnumber ) | mi( schoolnumber )
egen stu_schoolid = concat( district_no school_no ) , punct( "-" )
replace stu_schoolid = "" if mi( district_no ) | mi( school_no )

append using `link2015'

rename student_id studentid
rename teacher_id teacherid

keep teacherid studentid year *_schoolid stud_link_pct_*

*-------------------------------------------------------------------------------
*	Reshape to Student-Teacher-Year-Subject Observations
*-------------------------------------------------------------------------------

bysort studentid teacherid year : gen order = _n
reshape long stud_link_pct_@ , i( studentid teacherid year order ) j( subject ) string
drop order

replace subject = "ma" if subject == "math"
replace subject = "sc" if subject == "sci"

duplicates drop

collapse (sum) stud_link_pct_ , by( studentid teacherid year subject tch_schoolid stu_schoolid )
isid studentid teacherid year subject

*-------------------------------------------------------------------------------
*	Student-Teacher Weight
*-------------------------------------------------------------------------------

gen _wt = stud_link_pct_ / 100
egen _total = sum( _wt ) , by( studentid year subject )
gen wt_stu_tch = _wt / _total
drop stud_link_pct_ _wt _total

*-------------------------------------------------------------------------------
*	Finish Up
*-------------------------------------------------------------------------------

tempfile links
save `links' , replace

*<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><
*	[A.3] Combine test scores and student-teacher links
*<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><

use `testscores' , clear
merge 1:m studentid year subject using `links' , keep( master match ) 

*-------------------------------------------------------------------------------
*	schoolid: fill-in missing values, not perfect by reasonable
*-------------------------------------------------------------------------------

replace stu_schoolid = test_schoolid if mi( stu_schoolid )
drop _merge test_schoolid

egen _mode = mode( stu_schoolid ) , by( teacherid )
replace tch_schoolid = _mode if !mi( teacherid ) & mi( tch_schoolid )
drop _mode

replace tch_schoolid = stu_schoolid if !mi( teacherid ) & mi( tch_schoolid )

*-------------------------------------------------------------------------------
*	schoolid: some teachers in the experiment move schools between 2014
*			  and 2015; use 2014 experiment-year school for those teachers
*-------------------------------------------------------------------------------

preserve

keep if year == 2014 & substr( tch_schoolid , 1 , 3 ) == "570"

keep teacherid tch_schoolid 
duplicates drop
isid teacherid

rename tch_schoolid tch_schoolid_2014 

tempfile school2014
save `school2014' , replace

restore

merge m:1 teacherid using `school2014' , nogen

replace tch_schoolid = tch_schoolid_2014 if year == 2015 & !mi( tch_schoolid_2014 )
drop tch_schoolid_2014

*-------------------------------------------------------------------------------
*	Random-assignment details
*-------------------------------------------------------------------------------

* use teacher's schoolid
* (ET 12/17/15--using student school instead does not change the results substantively)
rename tch_schoolid schoolid
drop stu_schoolid

merge m:1 schoolid using epp-treat-assign , nogen keep( master match ) keepusing( treat rblock )

*-------------------------------------------------------------------------------
*	Reduce to analysis sample
*-------------------------------------------------------------------------------

* study schools sample only
keep if !mi( treat ) 

* drop observations with zero weight
drop if wt_stu_tch == 0 | mi( wt_stu_tch )

* drop 3rd graders
drop if grade == 3

* odd case, does not affect results
drop if teacherid == 15547 

* for 2015 analysis, only teachers in experiment
egen in2014 = max( year == 2014 ) , by( teacherid ) 
drop if year == 2015 & in2014 == 0 

*<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><
*	[A.4] Additional teacher and pair characteristics
*<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><
*-------------------------------------------------------------------------------
*	EPP teacher matching details
*-------------------------------------------------------------------------------

merge m:1 teacherid using epp-matches , nogen keep( master match )

drop t_need_* tm_have_*

* complete role indicators
foreach v in t_targ t_ment t_targ_tot t_ment_tot {
	replace `v' = 0 if mi( `v' )
}	
gen t_norole = t_targ == 0 & t_ment == 0

* proportion matches "high" indicator
    gen t_pmatches_high = inrange( t_pmatches , .5 , 1 ) 
replace t_pmatches_high = 0 if t_targ == 0

gen t_pmatches_nat = t_pmatches // here for use in Table 2 covariate balance

* standardize proportion matches and needs
egen _tag = tag( teacherid year )
foreach v in t_needs t_pmatches {
	sum `v' if _tag == 1 & year == 2014
	replace `v' = ( `v' - r(mean) ) / r(sd) if year == 2014
}	
drop _tag 

replace t_pmatches = 0 if t_targ == 0
replace t_needs    = 0 if t_targ == 0

* same subject indicators
	gen tm_ssubj = tm_ssubj_math if subject == "ma"
replace tm_ssubj = tm_ssubj_rla  if subject == "rla"
drop tm_ssubj_*

*-------------------------------------------------------------------------------
*	Teacher value-added from TVAAS
*-------------------------------------------------------------------------------

preserve

use tvaas_2013 , clear
duplicates drop

rename teacher_id teacherid
egen schoolid = concat( districtnumber schoolnumber ) , punct( - )

keep if inlist( year , "2012" , "2013" )

keep teacherid schoolid test subject year grade index

* standardize by test subject year grade 
bysort test subject year grade : center index , gen( t_va ) standardize

* subject-specific
gen t_vam = t_va if regexm( subject , "Algebra|Math" )
gen t_var = t_va if regexm( subject , "English|Reading|Language" )

* average TVAAS 
collapse t_va* , by( teacherid year subject grade )
collapse t_va* , by( teacherid year subject )
collapse t_va* , by( teacherid year )
collapse t_va* , by( teacherid )

egen t_va_avg = rowmean( t_vam t_var )

tempfile valadd
save `valadd' , replace

restore

merge m:1 teacherid using `valadd' , nogen keep( master match )

gen txtm_va = t_va * tm_va

*-------------------------------------------------------------------------------
*	Teacher experience
*-------------------------------------------------------------------------------

preserve

use teacher_exp_masked_oct14 , clear

rename teacher_id teacherid
rename yrs_exp t_expr
keep teacherid t_expr

gen t_expr2 = t_expr ^ 2

gen t_expr_young = t_expr <= 10 

tempfile exper
save `exper' , replace

restore

merge m:1 teacherid using `exper' , nogen keep( master match )

*<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><
*	[A.5] Finish Up
*<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><
*-------------------------------------------------------------------------------
*	Allow missing values
*-------------------------------------------------------------------------------

foreach v in t_expr tm_expr t_va tm_va txtm_va tm_sgrade tm_ssubj {
	gen MI_`v' = mi( `v' ) 
	replace `v' = 0 if mi( `v' )
}
replace t_expr2 = 0 if mi( t_expr )

*-------------------------------------------------------------------------------
*	Interaction terms
*	(Make explicitly because epp_wcbt and epp_frt are easier to write)
*-------------------------------------------------------------------------------

* teacher role and pair characteristics

gen targXt = t_targ * treat
gen mentXt = t_ment * treat
gen noroleXt = t_norole * treat

foreach v in t_pmatches t_pmatches_high t_needs t_va tm_va txtm_va ///
			 tm_sgrade tm_ssubj t_expr t_expr2 t_expr_young tm_expr {
	gen targX`v' = t_targ * `v'
	gen targXtX`v' = t_targ * treat * `v'
}

* student test score function

gen MI_stdscore_tm1_avg = mi( stdscore_tm1_avg )
replace stdscore_tm1_avg = 0 if mi( stdscore_tm1_avg )

egen gys = group( grade year subject )
tab gys , gen( Xgys )
drop gys

foreach g of varlist Xgys* {
	gen stdscore_tm1_avg`g' = stdscore_tm1_avg * `g'
}


compress
save new-epp-analysis , replace


*===============================================================================
*	[B] Observation scores, pre-treatment
*===============================================================================

loc inst so ms pic ls act qu feed grp tck tks th ps 
loc plan ip sw as 
loc envi ex msb env rc 

*-------------------------------------------------------------------------------
*	Observation data
*-------------------------------------------------------------------------------

use "12-13 Evaluation User File" , clear
rename teacher_id teacherid

keep if districtno == "570"
destring districtno schoolno , replace 
egen schoolid = concat( districtno schoolno ) , punct( - )

keep teacherid schoolid 

tempfile userfile
save `userfile' , replace

use obs_level_eval_data_2013 , clear		

renvars * , lower
rename teacher_id teacherid

keep if rubric == "General Educator"

collapse (mean) `inst' `plan' `envi' , by( teacherid )

merge 1:1 teacherid using `userfile' , nogen keep( master match )

*-------------------------------------------------------------------------------
*	Overall and group average scores
*-------------------------------------------------------------------------------

egen obsvmean = rowmean( `inst' `plan' `envi' )

foreach s in inst plan envi {
	egen obsvmean_`s' = rowmean( ``s'' )
}

*-------------------------------------------------------------------------------
*	"needs" (score < 3) & "haves" (score >= 4)
*-------------------------------------------------------------------------------

loc cond_need <  3
loc cond_have >= 4

foreach i in need have {

	* 19 skills individually
	foreach v in `inst' `plan' `envi' {
		gen `i'_`v' = `v' `cond_`i'' if !mi( `v' )
	}

	egen N`i's = rowtotal( `i'_* )

	gen any_`i's = N`i's >= 1 if !mi( N`i's )

	* for average scores
	foreach v of varlist obsvmean* {
		gen `i'_`v' = `v' `cond_`i'' if !mi( `v' )
	}
	
}

*-------------------------------------------------------------------------------
*	Finish up
*-------------------------------------------------------------------------------

merge m:1 schoolid using epp-treat-assign , nogen assert( master match ) 
keep if !mi( treat )

compress
save epp-obsv-pre , replace

*===============================================================================
*	[C] Observation scores, post-treatment
*===============================================================================

loc inst so ms pic ls act qu feed grp tck tks th ps 
loc plan ip sw as 
loc envi ex msb env rc 

*-------------------------------------------------------------------------------
*	Observation data
*-------------------------------------------------------------------------------

use obs_level_eval_data_2014 , clear
destring schoolno , replace ignore( X )
gen year = 2014

append using obs_level_eval_data_2015 
replace year = 2015 if mi( year )

rename teacher_id teacherid
drop if mi( teacherid )

egen schoolid = concat( districtno schoolno ) , punct( - )

collapse (mean) `inst' `plan' `envi' , by( teacherid schoolid year )

*-------------------------------------------------------------------------------
*	Experiment details, keep experiment schools
*-------------------------------------------------------------------------------

merge m:1 schoolid using epp-treat-assign , nogen keep( match )
drop if rblock == 108 // high schools

*-------------------------------------------------------------------------------
*	Standardize individual skill scores 
*-------------------------------------------------------------------------------

foreach v in `inst' `plan' `envi' {
	foreach y in 2014 2015 {
		sum `v' if treat == 0 & year == `y' 
		replace `v' = ( `v' - r(mean) ) / r(sd) if year == `y'
	}
}

*-------------------------------------------------------------------------------
*	Simple average of 19 skills; standardized
*-------------------------------------------------------------------------------

egen obsvmean = rowmean( `inst' `plan' `envi' )
foreach v in obsvmean {
	foreach y in 2014 2015 {
		sum `v' if treat == 0 & year == `y' 
		replace `v' = ( `v' - r(mean) ) / r(sd) if year == `y'
	}
}

*-------------------------------------------------------------------------------
*	Averages for the subset of skills matched, not matched, and where
*	no attempt was made to match; standardized
*-------------------------------------------------------------------------------

preserve

merge m:1 teacherid using epp-matches , nogen keep( master match )

keep if t_targ == 1

* indicators for whether each skill was matched, not matched, no attempt was made to match
foreach v in `inst' `plan' `envi' {
	
	* skills matached
	gen  M_`v' = t_need_`v' == 1 & tm_have_`v' == 1 
	
	* skills not matched
	gen XM_`v' = M_`v' == 0 			  
	
	* skills where no attempt was made to match
	gen XN_`v' = t_need_`v' == 0 				  
	
}

foreach y in M XM XN {

	foreach v in `inst' `plan' `envi' {
		gen S_`y'_`v' = `v' / `y'_`v'
	}
	
	egen obsvmean_`y' = rowmean( S_`y'_* )
	drop S_`y'_* 
	
	foreach t in 2014 2015 {
		sum obsvmean_`y' if treat == 0 & year == `t'
		replace obsvmean_`y' = ( obsvmean_`y' - r(mean) ) / r(sd) if year == `t'
	}	
			
}

rename obsvmean_M  obsvmean_match
rename obsvmean_XM obsvmean_notmatch
rename obsvmean_XN obsvmean_notneed

keep teacherid year obsvmean_match obsvmean_notmatch obsvmean_notneed 
isid teacherid year

tempfile ybymatch
save `ybymatch'

restore

merge 1:1 teacherid year using `ybymatch' , nogen assert( master match )

*-------------------------------------------------------------------------------
*	Teacher details, keep teachers who are in the test score analysis
*-------------------------------------------------------------------------------

preserve

use new-epp-analysis , clear

keep teacherid treat rblock t_targ t_ment t_norole t_expr 
duplicates drop
isid teacherid

count
assert r(N) == 136

rename treat Ntreat
rename rblock Nrblock

tempfile testteachers
save `testteachers' , replace

restore

merge m:1 teacherid using `testteachers' , nogen keep( match using )

*-------------------------------------------------------------------------------
*	Finish up
*-------------------------------------------------------------------------------

gen MI_expr = mi( t_expr )
replace t_expr = 0 if mi( t_expr )

	gen expr_group = 1 if inrange( t_expr , 0  , 4  )
replace expr_group = 2 if inrange( t_expr , 5  , 9  )
replace expr_group = 3 if inrange( t_expr , 10 , 24 )
replace expr_group = 4 if inrange( t_expr , 25 , 50 )
replace expr_group = 9 if MI_expr == 1

drop `inst' `plan' `envi' 
drop if mi( obsvmean ) // cases with no scores at all

compress
save epp-obsv-post , replace

*===============================================================================
*===============================================================================
