/*
Replication file, May 20 2019

Outline 

Main text tables:
Table 1: Heterogeneity in Initial Prescription of a New Pharmaceutical Drug		
Table 2: Regression Summary Statistics		
Table 3: Descriptive Statistics		
Table 4: Time to First Prescription of New Drug, U.S. Physicians, 2000—2010		
Table 5: Prescription Diversity, U.S. Physicians, 2000—2010				
	  Prescription Propensity, U.S. Physicians, 2000-2010				

Main text figures:
Figure 1: Database diffusion curves, by doctor characteristic
Figure 2: Drug diffusion curves, by drug
Figure 3: Drug diffusion maps, Crestor  
Figures 4, 5, and 6: Event study graphs (with tables)

Appendix tables:
Table A.1: Included under Table 2 above
Table A.2: Replicate Table 5 on all-physician sample
Table A.3: Included under Figures 4, 5, and 6 above
Table A.4: Intensity of Database Use - Time to First Prescription, Prescription Diversity
Table A.5: Intensity of Database Use - Prescription Propensity
Table A.6, A.7, A.8: Second-stage, first-stage estimates - All outcomes
Table A.9: Mandatory Substitution Laws by Zipcode, Time to First Prescription, U.S. Physicians, 2000—2010	
Table A.10: Medical Innovation by Zipcode, Time to First Prescription, U.S. Physicians, 2000—2010	
Table A.11: Information and Prescribing Heterogeneity Among U.S. Physicians	
*/
											 
***************************************************************************************
*Table 1: Heterogeneity in Initial Prescription of a New Pharmaceutical Drug		
***************************************************************************************

use temp8d.dta

sort me_number
merge me_number using me_number_IV_cutoff2.dta
tab _merge
keep if _merge == 3
drop _merge

keep generic me_number drug_num drug_name time_to_first_rx_in_months log_T Z_drug_pxUse Z_drug_pxUse_x_generic max_Z_ptxUse L1oth_adoption_share_zip_drug IV_drug_pxUse_x_generic
duplicates drop

count

egen count_me = count(log_T), by(me_number)
count if count_me == 1
drop if count_me == 1

save tempor1.dta

preserve
keep if drug_name == "CRESTOR"
count
summ time_to_first_rx_in_months, detail
restore

preserve
keep if drug_name == "ADVICOR"
count
summ time_to_first_rx_in_months, detail
restore

preserve
keep if drug_name == "ALTOPREV"
summ time_to_first_rx_in_months, detail
restore

preserve
keep if drug_name == "LESCOL XL"
summ time_to_first_rx_in_months, detail
restore

preserve
keep if drug_name == "LOVASTATIN"
summ time_to_first_rx_in_months, detail
restore

preserve
keep if drug_name == "PRAVASTATIN"
summ time_to_first_rx_in_months, detail
restore

preserve
keep if drug_name == "SIMVASTATIN"
summ time_to_first_rx_in_months, detail
restore

preserve
keep if drug_name == "SIMCOR"
summ time_to_first_rx_in_months, detail
restore

preserve
keep if drug_name == "VYTORIN"
summ time_to_first_rx_in_months, detail
restore

preserve
keep if drug_name == "ZETIA"
summ time_to_first_rx_in_months, detail
restore

preserve
keep if drug_name == "LOVAZA"
summ time_to_first_rx_in_months, detail
restore

preserve
keep if drug_name == "PRAVIGARD PAC"
summ time_to_first_rx_in_months, detail
restore

clear

*********************************************************************************************************
*Correlation with existing measures of U.S. healthcare variations 
*********************************************************************************************************

*CORRELATION TABLE

*Panel A: correlation across doctors between Z, generic share, generic share by MOP, and 
*six-month post generic substitution share

/*build a dataset that has, for the year 2010:
1. me_number
2. generic_rx_share - medicare part D
3. generic_rx_share - medicaid
4. generic_rx_share - private
5. generic_rx_share - cash
*/

*Get me_number generic_rx_share method of payment, then reshape 
use "prescriptions_2010_clean.dta"
keep me_number year_month drug_name Rx payment_method  
gen generic_flag = 0
replace generic_flag = 1 if (drug_name == "SIMVASTATIN" | drug_name == "LOVASTATIN" | drug_name == "PRAVASTATIN")

egen total_rx_by_mop = sum(Rx), by(me_number payment_method)
egen total_generic_rx_by_mop = sum(Rx*generic_flag), by(me_number payment_method)
gen generic_rx_share_by_mop = total_generic_rx_by_mop / total_rx_by_mop

keep me_number generic_rx_share_by_mop payment_method
duplicates drop

encode payment_method, gen(mop_num)
drop payment_method

tab mop_num

save "generic_rx_share_MOP_temp.dta", replace 

reshape wide generic_rx_share_by_mop, i(me_number) j(mop_num)

desc

sort me_number
count
save "generic_rx_share_by_me_number_MOP.dta", replace 

/*Next, build a dataset that has:
1. me_number
2. zipcode
3. Z_pt in 2010
4. generic_rx_share - overall in 2010, 
*/

use masterfile_1_new

egen Use = max(N_total_month), by(me_number)
gen N_0 = 1
replace N_0 = 0 if Use == 0

keep if year_month >= 201001 

keep me_number year_month drug_name Rx Z_pt zipcode N_0

*compute generic Rx share by doctor for the year 2010
gen generic_flag = 0
replace generic_flag = 1 if (drug_name == "SIMVASTATIN" | drug_name == "LOVASTATIN" | drug_name == "PRAVASTATIN")
egen total_rx = sum(Rx), by(me_number)
egen total_generic_rx = sum(Rx*generic_flag), by(me_number)
gen generic_rx_share = total_generic_rx / total_rx

*build Z and ZxUse for 01/2010
gen Jan_2010_flag = 0
replace Jan_2010_flag = 1 if year_month == 201001
gen Z_ptxUse = Z_pt*N_0
egen Z_jan2010 = max(Z_pt*Jan_2010_flag), by(me_number)
egen ZxUse_jan2010 = max(Z_ptxUse*Jan_2010_flag), by(me_number)

keep me_number zipcode Z_jan2010 ZxUse_jan2010 generic_rx_share
duplicates drop

count
sort me_number
save "Z_gen_rx_share_by_me_number_2010.dta", replace 

/*Then build separate files that have,
1. me_number
5. Z_pt in 2005
6. generic_rx_share - lovastatin, 200206
7. generic_rx_share - pravastatin, 200610
8. generic_rx_share - simvastatin, 200612
*/

use masterfile_1_new

egen Use = max(N_total_month), by(me_number)
gen N_0 = 1
replace N_0 = 0 if Use == 0

keep if year_month == 200501
gen Z_ptxUse = Z_pt*N_0

keep me_number Z_pt Z_ptxUse
duplicates drop

ren Z_pt Z_jan2005
ren Z_ptxUse Z_ptxUse_jan2005

count
sort me_number
save "Z_2005.dta", replace 

*lovastatin, 200206
use masterfile_1_new
keep if year_month == 200206
egen total_rx = sum(Rx), by(me_number)
keep if (drug_name == "LOVASTATIN" | drug_name == "MEVACOR")

gen lov_flag = 0
replace lov_flag = 1 if drug_name == "LOVASTATIN"
gen mev_flag = 0
replace mev_flag = 1 if drug_name == "MEVACOR"

egen total_lov_rx = sum(Rx*lov_flag), by(me_number)
egen total_mev_rx = sum(Rx*mev_flag), by(me_number)
gen lov_share = total_lov_rx / (total_lov_rx + total_mev_rx)

keep me_number lov_share 
duplicates drop

count
sort me_number
save "generic_share_lov_200206.dta", replace 

*pravastatin, 200610
use masterfile_1_new
keep if year_month == 200610
egen total_rx = sum(Rx), by(me_number)
keep if (drug_name == "PRAVASTATIN" | drug_name == "PRAVACHOL")

gen pstatin_flag = 0
replace pstatin_flag = 1 if drug_name == "PRAVASTATIN"
gen prav_flag = 0
replace prav_flag = 1 if drug_name == "PRAVACHOL"

egen total_pstatin_rx = sum(Rx*pstatin_flag), by(me_number)
egen total_prav_rx = sum(Rx*prav_flag), by(me_number)
gen pstatin_share = total_pstatin_rx / (total_pstatin_rx + total_prav_rx)

keep me_number pstatin_share 
duplicates drop

count
sort me_number
save "generic_share_prav_200206.dta", replace 

*simvastatin, 200612
use data_comp1.dta
keep if year_month == 200612
egen total_rx = sum(Rx), by(me_number)
keep if (drug_name == "SIMVASTATIN" | drug_name == "ZOCOR")

gen simv_flag = 0
replace simv_flag = 1 if drug_name == "SIMVASTATIN"
gen zoc_flag = 0
replace zoc_flag = 1 if drug_name == "ZOCOR"

egen total_simv_rx = sum(Rx*simv_flag), by(me_number)
egen total_zoc_rx = sum(Rx*zoc_flag), by(me_number)
gen simv_share = total_simv_rx / (total_simv_rx + total_zoc_rx)

keep me_number simv_share 
duplicates drop

count
sort me_number
save "generic_share_simv_200206.dta", replace 

*Now, merge files
use "generic_rx_share_by_me_number_MOP.dta"
merge me_number using "Z_gen_rx_share_by_me_number_2010.dta"
tab _merge
keep if _merge == 3
count
sort me_number
drop _merge
merge me_number using "Z_2005.dta"
tab _merge
sort me_number
drop _merge
merge me_number using "generic_share_lov_200206.dta"
tab _merge
sort me_number
drop _merge
merge me_number using "generic_share_prav_200206.dta"
tab _merge
sort me_number
drop _merge
merge me_number using "generic_share_simv_200206.dta"
tab _merge
sort me_number
drop _merge

count
sort zipcode
save "doctor_correlation_file.dta"

*Do doctors with high generic prescription shares have high shares regardless of MOP?
*cash (1), commercial (2), medicaid (3), medicare Part D (4)
pwcorr generic_*, sig

*Do doctors that adopt database by Jan 2005 have significantly higher generic shares?
*explained entirely by shifts among commercially insured patients	 
pwcorr Z_pt generic_*, sig

*Panel B: correlation across HSAs between the above variables and Dartmouth atlas outcomes

/*build a dataset that has, for the year 2010:
1. HSA
2. zipcode
2. generic_rx_share - all drugs, part D
3. part D spending per patient 
4. part A&B spending per patient 
5. share of diabetics receiving a statin
*/

*Merge this with doctor-level data by zipcode 

use "doctor_correlation_file.dta"
merge zipcode using  "DAtlas_data.dta"
tab _m
keep if _m==3

*Aggregate over average doctor-level outcomes to the HSA level; collapse the data

egen avg_generic_rx_share_mop1 = mean(generic_rx_share_by_mop1), by(hsa_id)
egen avg_generic_rx_share_mop2 = mean(generic_rx_share_by_mop2), by(hsa_id)
egen avg_generic_rx_share_mop3 = mean(generic_rx_share_by_mop3), by(hsa_id)
egen avg_generic_rx_share_mop4 = mean(generic_rx_share_by_mop4), by(hsa_id)
egen avg_generic_rx_share = mean(generic_rx_share), by(hsa_id)
egen avg_Z_2010 = mean(Z_jan2010), by(hsa_id)
egen avg_ZxUse_2010 = mean(ZxUse_jan2010), by(hsa_id)
egen avg_Z_2005 = mean(Z_jan2005), by(hsa_id)
egen avg_ZxUse_2005 = mean(Z_ptxUse_jan2005), by(hsa_id)
egen avg_lov_share = mean(lov_share), by(hsa_id)
egen avg_prav_share = mean(pstatin_share), by(hsa_id)
egen avg_simv_share = mean(simv_share), by(hsa_id)

count

keep hsa_id avg_generic_rx_share_mop1-avg_simv_share pct_diab_statin-fraction_branded
duplicates drop

count 
sort hsa_id

save "HSA_correlation_file.dta", replace

pwcorr avg_generic_rx_share pct_diab_statin-fraction_branded, sig

pwcorr avg_generic_rx_share avg_generic_rx_share_mop* fraction_branded, sig

pwcorr avg_Z_2005 avg_generic_rx_share avg_generic_rx_share_mop*, sig

pwcorr avg_Z_2005 pct_diab_statin-fraction_branded, sig

pwcorr avg_prav_share avg_lov_share avg_simv_share, sig

pwcorr avg_prav_share avg_lov_share avg_simv_share fraction_branded, sig
 
clear
	
***************************************************************************************
*Table 2: Regression Summary Statistics			
***************************************************************************************

use tempdataB22.dta

keep if (Rx_flag != . & Z_ptxUse_x_NewGeneric_2 != . & Z_ptxUse_x_NewBranded_2 != . & Z_ptxUse_x_OldGeneric_2 != . & Z_ptxUse_x_OldBranded_2 != . &  Z_IV_x_NewGeneric_2 != . & Z_IV_x_NewBranded_2 != . & Z_IV_x_OldGeneric_2 != . & Z_IV_x_OldBranded_2 != . & me_number_drug != . &  drug_month_num != .)

keep if year_month == 200001 | month == 06 | month == 12

sort me_number
merge me_number using me_number_IV_cutoff2.dta
tab _merge
keep if _merge == 3
drop _merge

count

save tempo.dta, replace

summ Rx, detail
summ Rx_flag, detail

*drug-month level
preserve

keep New_2 NewGeneric_2 drug_name year_month

duplicates drop

gen New = New_2 + NewGeneric_2

summ New, detail

restore

*number of drugs
preserve
keep drug_num year_month
duplicates drop
count if year_month == 200001
count if year_month == 201012
restore

clear

*physician-month level
use tempC2g.dta

sort me_number year_month
merge me_number year_month using generic_rx_share.dta
tab _merge 
keep if _m == 3
drop _m

gen zip2 = substr(string(zipcode),1,2)
destring zip2, replace

*so that IV and OLS sample sizes match:
egen nobs = count(num_drugs), by(zipcode year_month)
drop if nobs == 1

count

summ Z_pt, detail
summ Z_ptxUse, detail
summ L1oth_adoption_share_zip, detail
summ Use, detail
summ hhi_rx, detail
summ num_drugs_month, detail
summ generic_rx_share, detail
summ num_rx, detail

*number of doctors
preserve
keep me_number
duplicates drop
count

restore

clear

*physician-drug level

use temp8j.dta

*merge the L1.num_rx variable, N_{it-t]
sort me_number drug_num
merge me_number drug_num using temporary.dta
tab _merge
drop if _merge == 2
drop _merge

gen rx_within_one_year = 1
replace rx_within_one_year = 0 if time_to_first_rx > 12

summ rx_within_one_year, detail
summ time_to_first, detail

clear

***************************************************************************************
*Table 3: Descriptive Statistics		
***************************************************************************************

use masterfile_new

keep me_number drug_name year_month Rx zipcode
keep if year_month == 201012
gen generic_flag = 0
replace generic_flag = 1 if (drug_name == "SIMVASTATIN" | drug_name == "LOVASTATIN" | drug_name == "PRAVASTATIN")

*compute total Rx, generic Rx, lipitor Rx by doctor
egen total_rx = sum(Rx), by(me_number)
egen total_generic_rx = sum(Rx*generic_flag), by(me_number)
gen lip_flag = 0
replace lip_flag = 1 if drug_name == "LIPITOR"
egen total_lip_rx = sum(Rx*lip_flag), by(me_number)
gen lov_flag = 0
replace lov_flag = 1 if drug_name == "LOVASTATIN"
gen pstatin_flag = 0
replace pstatin_flag = 1 if drug_name == "PRAVASTATIN"
gen simv_flag = 0
replace simv_flag = 1 if drug_name == "SIMVASTATIN"
egen total_lov_rx = sum(Rx*lov_flag), by(me_number)
egen total_pstatin_rx = sum(Rx*pstatin_flag), by(me_number)
egen total_simv_rx = sum(Rx*simv_flag), by(me_number)

save calcs_temp_1.dta, replace

keep me_number zipcode total_rx total_generic_rx total_lip_rx  total_simv_rx total_lov_rx total_pstatin_rx total_simv_rx
duplicates drop

gen generic_rx_share = total_generic_rx / total_rx
gen lipitor_rx_share = total_lip_rx / total_rx
gen lov_rx_share = total_lov_rx / total_rx
gen pstatin_rx_share = total_pstatin_rx / total_rx
gen simv_rx_share = total_simv_rx / total_rx

*summarize heterogeneity across doctors, full sample
summ total_rx, detail
drop if total_rx == 0
summ total_rx, detail

***************
*PANEL A
summ generic_rx_share, detail
summ lov_rx_share, detail
summ pstatin_rx_share, detail
summ simv_rx_share, detail

*variation across zipcodes
egen mean_generic_share_zip = mean(generic_rx_share), by(zipcode)
egen sd_generic_share_zip = sd(generic_rx_share), by(zipcode)

egen mean_lov_share_zip = mean(lov_rx_share), by(zipcode)
egen sd_lov_share_zip = sd(lov_rx_share), by(zipcode)

egen mean_pstatin_share_zip = mean(pstatin_rx_share), by(zipcode)
egen sd_pstatin_share_zip = sd(pstatin_rx_share), by(zipcode)

egen mean_simv_share_zip = mean(simv_rx_share), by(zipcode)
egen sd_simv_share_zip = sd(simv_rx_share), by(zipcode)

save calcs_temp_2.dta, replace

keep mean_generic_share_zip-sd_simv_share_zip zipcode
duplicates drop

summ mean_generic_share_zip, detail
summ sd_generic_share_zip, detail
summ mean_lov_share_zip, detail
summ sd_lov_share_zip, detail
summ mean_pstatin_share_zip, detail
summ sd_pstatin_share_zip, detail
summ mean_simv_share_zip, detail
summ sd_simv_share_zip, detail

clear

***************
*PANEL C

*summarize within-zip generic rx heterogeneity for 12/2010

use calcs_temp_1.dta

keep if (drug_name == "SIMVASTATIN" | drug_name == "LOVASTATIN" | drug_name == "PRAVASTATIN" | drug_name == "ZOCOR" | drug_name == 
drop if total_rx == 0

replace simv_flag = 1 if drug_name == "SIMVASTATIN"
replace lov_flag = 1 if drug_name == "LOVASTATIN"
replace pstatin_flag = 1 if drug_name == "PRAVASTATIN"
gen zoc_flag = 0
replace zoc_flag = 1 if drug_name == "ZOCOR"
gen mev_flag = 0
replace mev_flag = 1 if drug_name == "MEVACOR"
gen prav_flag = 0
replace prav_flag = 1 if drug_name == "PRAVACHOL"

gen flag = 1
egen total_zoc_rx = sum(Rx*zoc_flag), by(me_number)
egen total_mev_rx = sum(Rx*mev_flag), by(me_number)
egen total_prav_rx = sum(Rx*prav_flag), by(me_number)

save calcs_temp_3.dta, replace

keep me_number zipcode total_lov_rx total_pstatin_rx total_simv_rx total_zoc_rx total_mev_rx total_prav_rx
duplicates drop

gen simv_share = total_simv_rx / (total_simv_rx + total_zoc_rx)
gen lov_share = total_lov_rx / (total_lov_rx + total_mev_rx)
gen pstatin_share = total_pstatin_rx / (total_pstatin_rx + total_prav_rx)

summ simv_share, detail
summ lov_share, detail
summ pstatin_share, detail

*compute max and min rx_shares across doctors within a zipcode, and the difference between them (if difference = 1, spans full range)
egen mean_lov_share_zip = mean(lov_share), by(zipcode)
egen sd_lov_share_zip = sd(lov_share), by(zipcode)
egen mean_pstatin_share_zip = mean(pstatin_share), by(zipcode)
egen sd_pstatin_share_zip = sd(pstatin_share), by(zipcode)
egen mean_simv_share_zip = mean(simv_share), by(zipcode)
egen sd_simv_share_zip = sd(simv_share), by(zipcode)

save calcs_temp_4.dta, replace

keep mean_lov_share_zip-sd_simv_share_zip zipcode
duplicates drop

summ mean_lov_share_zip, detail
summ sd_lov_share_zip, detail
summ mean_pstatin_share_zip, detail
summ sd_pstatin_share_zip, detail
summ mean_simv_share_zip, detail
summ sd_simv_share_zip, detail

clear

*columns 5, 6, and 7 middle panel: six months after each generic launch-- 200206 for lov, 200610 for prav, 200612 for simv

*lovastatin
use data_comp1.dta
keep if year_month == 200206
egen total_rx = sum(Rx), by(me_number)
drop if total_rx == 0
keep if (drug_name == "LOVASTATIN" | drug_name == "MEVACOR")
gen lov_flag = 0
replace lov_flag = 1 if drug_name == "LOVASTATIN"
gen mev_flag = 0
replace mev_flag = 1 if drug_name == "MEVACOR"
egen total_lov_rx = sum(Rx*lov_flag), by(me_number)
egen total_mev_rx = sum(Rx*mev_flag), by(me_number)
keep me_number zipcode total_lov_rx total_mev_rx 
duplicates drop
gen lov_share = total_lov_rx / (total_lov_rx + total_mev_rx)

*compute max and min rx_shares across doctors within a zipcode, and the difference between them (if difference = 1, spans full range)
egen mean_lov_share_zip = mean(lov_share), by(zipcode)
egen sd_lov_share_zip = sd(lov_share), by(zipcode)

*display variation across doctors, overall distribution
summ lov_share, detail

*display variation across zipcodes
keep mean_lov_share_zip sd_lov_share_zip zipcode
duplicates dropsd_lov_share_zip

summ mean_lov_share_zip, detail
summ sd_lov_share_zip, detail

clear

*simvastatin
use data_comp1.dta
keep if year_month == 200612
egen total_rx = sum(Rx), by(me_number)
summ total_rx, detail
drop if total_rx == 0
keep if (drug_name == "SIMVASTATIN" | drug_name == "ZOCOR")
gen simv_flag = 0
replace simv_flag = 1 if drug_name == "SIMVASTATIN"
gen zoc_flag = 0
replace zoc_flag = 1 if drug_name == "ZOCOR"
egen total_simv_rx = sum(Rx*simv_flag), by(me_number)
egen total_zoc_rx = sum(Rx*zoc_flag), by(me_number)
keep me_number zipcode total_simv_rx total_zoc_rx 
duplicates drop
gen simv_share = total_simv_rx / (total_simv_rx + total_zoc_rx)

*compute mean and SD by zipcode 
egen mean_simv_share_zip = mean(simv_share), by(zipcode)
egen sd_simv_share_zip = sd(simv_share), by(zipcode)

*display variation across doctors, overall distribution
summ simv_share, detail

*display variation across zipcodes
keep mean_simv_share_zip sd_simv_share_zip zipcode
duplicates drop

summ mean_simv_share_zip, detail
summ sd_simv_share_zip, detail

clear

*pravastatin
use data_comp1.dta
keep if year_month == 200610
egen total_rx = sum(Rx), by(me_number)
drop if total_rx == 0
keep if (drug_name == "PRAVASTATIN" | drug_name == "PRAVACHOL")
gen pstatin_flag = 0
replace pstatin_flag = 1 if drug_name == "PRAVASTATIN"
gen prav_flag = 0
replace prav_flag = 1 if drug_name == "PRAVACHOL"
egen total_pstatin_rx = sum(Rx*pstatin_flag), by(me_number)
egen total_prav_rx = sum(Rx*prav_flag), by(me_number)
keep me_number zipcode total_pstatin_rx total_prav_rx 
duplicates drop
gen pstatin_share = total_pstatin_rx / (total_pstatin_rx + total_prav_rx)

*compute max and min rx_shares across doctors within a zipcode, and the difference between them (if difference = 1, spans full range)
egen mean_pstatin_share_zip = mean(pstatin_share), by(zipcode)
egen sd_pstatin_share_zip = sd(pstatin_share), by(zipcode)

*display variation across doctors, overall distribution
summ pstatin_share, detail

*display variation across zipcodes
keep mean_pstatin_share_zip sd_pstatin_share_zip zipcode
duplicates drop

summ mean_pstatin_share_zip, detail
summ sd_pstatin_share_zip, detail

clear

***************************************************************************************
*For IV: build the set of zipcodes with at least 3 doctors
***************************************************************************************

*Preliminary step: build the set of zipcodes in the instrument sample

use masterfile_1_new

keep if year_month == 200001
keep me_number zipcode
duplicates drop

egen num_docs_zip = count(me_number), by(zipcode)
keep num_docs_zip zipcode 
duplicates drop

summ num_docs_zip, detail
keep if num_docs_zip >= r(p25)

*drop zipcodes with 2 or fewer physicians
keep zipcode
sort zipcode

save zipcodes_IV_cutoff2.dta, replace

use masterfile_1_new

keep me_number zipcode
duplicates drop

sort me_number
save doctor_zips.dta, replace

sort zipcode
merge zipcode using zipcodes_IV_cutoff2.dta
tab _merge
keep if _merge == 3
drop _merge

keep me_number
duplicates drop

sort me_number
save me_number_IV_cutoff2.dta, replace

clear

***************************************************************************************
*Table 4: Time to First Prescription of New Drug, U.S. Physicians, 2000—2010		
***************************************************************************************

use masterfile_1_new
 
keep me_number Rx Z_pt zipcode year_month drug_name New_3mo-NewGeneric_9mo N_total_month
 
egen Use = max(N_total_month), by(me_number)
 
summ N_total_month, detail
 
gen N_0 = 1
replace N_0 = 0 if Use == 0
 
gen Z_ptxUse = Z_pt*N_0
egen max_Z_ptxUse = max(Z_ptxUse), by(me_number)
 
keep if (drug_name == "LESCOL XL" | drug_name == "LOVASTATIN" | drug_name == "ZETIA" | drug_name == "CRESTOR" | drug_name == "VYTORIN" | drug_name == "PRAVASTATIN" | drug_name == "SIMVASTATIN" | drug_name == "ADVICOR" | drug_name == "ALTOPREV" | drug_name == "PRAVIGARD PAC" | drug_name == "LOVAZA" | drug_name == "SIMCOR" )
 
gen Z_drug_pxUse = N_0*Z_pt if drug_name == "LESCOL XL" & year_month == 20001 0
replace Z_drug_pxUse = N_0*Z_pt if drug_name == "LOVASTATIN" & year_month == 200112
replace Z_drug_pxUse = N_0*Z_pt if drug_name == "ZETIA" & year_month == 200210
replace Z_drug_pxUse = N_0*Z_pt if drug_name == "CRESTOR" & year_month == 200308
replace Z_drug_pxUse = N_0*Z_pt if drug_name == "VYTORIN" & year_month == 200407
replace Z_drug_pxUse = N_0*Z_pt if drug_name == "PRAVASTATIN" & year_month == 200604
replace Z_drug_pxUse = N_0*Z_pt if drug_name == "SIMVASTATIN" & year_month == 200606
replace Z_drug_pxUse = N_0*Z_pt if drug_name == "ADVICOR" & year_month == 200112
replace Z_drug_pxUse = N_0*Z_pt if drug_name == "ALTOPREV" & year_month == 200206
replace Z_drug_pxUse = N_0*Z_pt if drug_name == "PRAVIGARD PAC" & year_month == 200306
replace Z_drug_pxUse = N_0*Z_pt if drug_name == "LOVAZA" & year_month == 200410
replace Z_drug_pxUse = N_0*Z_pt if drug_name == "SIMCOR" & year_month == 200802
 
gen Z_drug_p = Z_pt if drug_name == "LESCOL XL" & year_month == 200010
replace Z_drug_p = Z_pt if drug_name == "LOVASTATIN" & year_month == 200112
replace Z_drug_p = Z_pt if drug_name == "ZETIA" & year_month == 200210
replace Z_drug_p = Z_pt if drug_name == "CRESTOR" & year_month == 200308
replace Z_drug_p = Z_pt if drug_name == "VYTORIN" & year_month == 200407
replace Z_drug_p = Z_pt if drug_name == "PRAVASTATIN" & year_month == 200604
replace Z_drug_p = Z_pt if drug_name == "SIMVASTATIN" & year_month == 200606
replace Z_drug_p = Z_pt if drug_name == "ADVICOR" & year_month == 200112
replace Z_drug_p = Z_pt if drug_name == "ALTOPREV" & year_month == 200206
replace Z_drug_p = Z_pt if drug_name == "PRAVIGARD PAC" & year_month == 200306
replace Z_drug_p = Z_pt if drug_name == "LOVAZA" & year_month == 200410
replace Z_drug_p = Z_pt if drug_name == "SIMCOR" & year_month == 200802
 
gen month = substr(string(year_month),5,2)
destring month, replace
gen year = substr(string(year_month),1,4)
destring year, replace
 
save temp8a.dta, replace
 
egen Z_drug_pxUseII = max(Z_drug_pxUse), by(me_number drug_name)
count if Z_drug_pxUseII == .
count if Z_drug_pxUse == .
 
drop Z_drug_pxUse 
ren Z_drug_pxUseII Z_drug_pxUse

egen Z_drug_pII = max(Z_drug_p), by(me_number drug_name)
count if Z_drug_pII == .
count if Z_drug_p == .
 
drop Z_drug_p 
ren Z_drug_pII Z_drug_p

save temp8ab.dta, replace

preserve
keep me_number drug_name Z_drug_p
duplicates drop
encode drug_name, gen(drug_num)
sort me_number drug_num
save tempynew2.dta, replace 
restore

encode drug_name, gen(drug_num)
 
egen first_rx_date = min(year_month) if Rx != 0, by(me_number drug_num)
egen max_first_rx_date = max(first_rx_date), by(me_number drug_num)
summ max_first_rx_date if drug_name == "SIMVASTATIN"
 
gen firstyear = substr(string(max_first_rx_date),1,4)
destring firstyear, replace
gen firstmonth = substr(string(max_first_rx_date),5,2)
destring firstmonth, replace
gen first_rx_date_in_months =12*(firstyear - 2000) + firstmonth
 
save temp8b.dta, replace
 
gen time_to_first_rx_in_months = 0
 
replace time_to_first_rx_in_months = first_rx_date_in_months - (3*12 + 8) if drug_name == "CRESTOR" 
replace time_to_first_rx_in_months = first_rx_date_in_months - (4*12 + 7) if drug_name == "VYTORIN" 
replace time_to_first_rx_in_months = first_rx_date_in_months - (2*12 + 10) if drug_name == "ZETIA" 
replace time_to_first_rx_in_months = first_rx_date_in_months - (6*12 + 4) if drug_name == "PRAVASTATIN"
replace time_to_first_rx_in_months = first_rx_date_in_months - (6*12 + 6) if drug_name == "SIMVASTATIN"
replace time_to_first_rx_in_months = first_rx_date_in_months - (0*12 + 10) if drug_name == "LESCOL XL"
replace time_to_first_rx_in_months = first_rx_date_in_months - (2*12) if drug_name == "LOVASTATIN" 
replace time_to_first_rx_in_months = first_rx_date_in_months - (1*12 + 12) if drug_name == "ADVICOR" 
replace time_to_first_rx_in_months = first_rx_date_in_months - (2*12 + 6) if drug_name == "ALTOPREV" 
replace time_to_first_rx_in_months = first_rx_date_in_months - (3*12 + 6) if drug_name == "PRAVIGARD PAC" 
replace time_to_first_rx_in_months = first_rx_date_in_months - (4*12 + 10) if drug_name == "LOVAZA" 
replace time_to_first_rx_in_months = first_rx_date_in_months - (8*12 + 2) if drug_name == "SIMCOR" 
 
*build another instrument as the adoption propensity of neighboring doctors:
egen num_docs_zip = count(me_number), by(zipcode year_month)
gen num_other_docs_zip = num_docs_zip - 1
egen num_adopters_zip = sum(Z_ptxUse), by(zipcode year_month)
gen num_oth_adopters_zip = num_adopters_zip - Z_ptxUse
gen oth_adoption_share_zip = num_oth_adopters_zip / num_other_docs_zip
summ oth_adoption_share_zip if year_month == 200001
summ oth_adoption_share_zip if year_month == 201012

*this is the instrument:
gen L1oth_adoption_share_zip_drug = .

gen flag_200307 = 0
replace flag_200307 = 1 if year_month == 200307
egen max_IV = max(oth_adoption_share_zip*flag_200307), by(me_number)
replace L1oth_adoption_share_zip_drug = max_IV if drug_name == "CRESTOR" 
drop max_IV

gen flag_200406 = 0
replace flag_200406 = 1 if year_month == 200406
egen max_IV = max(oth_adoption_share_zip*flag_200406), by(me_number)
replace L1oth_adoption_share_zip_drug = max_IV if drug_name == "VYTORIN" 
drop max_IV

gen flag_200209 = 0
replace flag_200209 = 1 if year_month == 200209
egen max_IV = max(oth_adoption_share_zip*flag_200209), by(me_number)
replace L1oth_adoption_share_zip_drug = max_IV if drug_name == "ZETIA" 
drop max_IV
 
gen flag_200603 = 0
replace flag_200603 = 1 if year_month == 200603
egen max_IV = max(oth_adoption_share_zip*flag_200603), by(me_number)
replace L1oth_adoption_share_zip_drug = max_IV if drug_name == "PRAVASTATIN" 
drop max_IV

gen flag_200605 = 0
replace flag_200605 = 1 if year_month == 200605
egen max_IV = max(oth_adoption_share_zip*flag_200605), by(me_number)
replace L1oth_adoption_share_zip_drug = max_IV if drug_name == "SIMVASTATIN" 
drop max_IV

gen flag_200009 = 0
replace flag_200009 = 1 if year_month == 200009
egen max_IV = max(oth_adoption_share_zip*flag_200009), by(me_number)
replace L1oth_adoption_share_zip_drug = max_IV if drug_name == "LESCOL XL" 
drop max_IV

gen flag_200111 = 0
replace flag_200111 = 1 if year_month == 200111
egen max_IV = max(oth_adoption_share_zip*flag_200111), by(me_number)
replace L1oth_adoption_share_zip_drug = max_IV if drug_name == "LOVASTATIN" 
replace L1oth_adoption_share_zip_drug = max_IV if drug_name == "ADVICOR" 
drop max_IV

gen flag_200205 = 0
replace flag_200205 = 1 if year_month == 200205
egen max_IV = max(oth_adoption_share_zip*flag_200205), by(me_number)
replace L1oth_adoption_share_zip_drug = max_IV if drug_name == "ALTOPREV" 
drop max_IV

gen flag_200305 = 0
replace flag_200305 = 1 if year_month == 200305
egen max_IV = max(oth_adoption_share_zip*flag_200305), by(me_number)
replace L1oth_adoption_share_zip_drug = max_IV if drug_name == "PRAVIGARD PAC" 
drop max_IV

gen flag_200410 = 0
replace flag_200410 = 1 if year_month == 200410
egen max_IV = max(oth_adoption_share_zip*flag_200410), by(me_number)
replace L1oth_adoption_share_zip_drug = max_IV if drug_name == "LOVAZA" 
drop max_IV

gen flag_200801 = 0
replace flag_200801 = 1 if year_month == 200801
egen max_IV = max(oth_adoption_share_zip*flag_200801), by(me_number)
replace L1oth_adoption_share_zip_drug = max_IV if drug_name == "SIMCOR" 
drop max_IV

save tempyc.dta, replace

egen num_rx = sum(Rx), by(me_number year_month)

*build the doctor-drug level num_rx control:

gen num_rx_drug = .

egen num_rx_drug_calc = max(num_rx*flag_200307), by(me_number)
replace L1num_rx_drug = num_rx_drug_calc if drug_name == "CRESTOR" 
drop num_rx_drug_calc

egen num_rx_drug_calc = max(num_rx*flag_200406), by(me_number)
replace L1num_rx_drug = num_rx_drug_calc if drug_name == "VYTORIN" 
drop num_rx_drug_calc

egen num_rx_drug_calc = max(num_rx*flag_200209), by(me_number)
replace L1num_rx_drug = num_rx_drug_calc if drug_name == "ZETIA" 
drop num_rx_drug_calc
 
egen num_rx_drug_calc = max(num_rx*flag_200603), by(me_number)
replace L1num_rx_drug = num_rx_drug_calc if drug_name == "PRAVASTATIN" 
drop num_rx_drug_calc

egen num_rx_drug_calc = max(oth_adoption_share_zip*flag_200605), by(me_number)
replace L1num_rx_drug = num_rx_drug_calc if drug_name == "SIMVASTATIN" 
drop num_rx_drug_calc

egen num_rx_drug_calc = max(oth_adoption_share_zip*flag_200009), by(me_number)
replace L1num_rx_drug = num_rx_drug_calc if drug_name == "LESCOL XL" 
drop num_rx_drug_calc

egen num_rx_drug_calc = max(oth_adoption_share_zip*flag_200111), by(me_number)
replace L1num_rx_drug = num_rx_drug_calc if drug_name == "LOVASTATIN" 
replace L1num_rx_drug = num_rx_drug_calc if drug_name == "ADVICOR" 
drop num_rx_drug_calc

egen num_rx_drug_calc = max(oth_adoption_share_zip*flag_200205), by(me_number)
replace L1num_rx_drug = num_rx_drug_calc if drug_name == "ALTOPREV" 
drop num_rx_drug_calc

egen num_rx_drug_calc = max(oth_adoption_share_zip*flag_200305), by(me_number)
replace L1num_rx_drug = num_rx_drug_calc if drug_name == "PRAVIGARD PAC" 
drop num_rx_drug_calc

egen num_rx_drug_calc = max(oth_adoption_share_zip*flag_200410), by(me_number)
replace L1num_rx_drug = num_rx_drug_calc if drug_name == "LOVAZA" 
drop num_rx_drug_calc

egen num_rx_drug_calc = max(oth_adoption_share_zip*flag_200801), by(me_number)
replace L1num_rx_drug = num_rx_drug_calc if drug_name == "SIMCOR" 
drop num_rx_drug_calc

keep me_number drug_name drug_num time_to_first_rx_in_months Z_drug_p Z_drug_pxUse Use max_Z_ptxUse L1oth_adoption_share_zip_drug num_other_docs_zip
duplicates drop
 
save temp8c.dta, replace

gen N_0 = 1
replace N_0 = 0 if Use == 0
gen generic = 0

replace generic = 1 if (drug_name == "PRAVASTATIN"  | drug_name == "SIMVASTATIN" | drug_name == "LOVASTATIN" ) 

gen Z_drug_pxUse_x_generic = Z_drug_pxUse*generic
gen Z_drug_pxNoUse = Z_drug_p*(1-N_0)
gen Z_drug_pxNoUse_x_generic = Z_drug_p*(1-N_0)*generic

gen IV_drug_pxUse_x_generic = L1oth_adoption_share_zip_drug*generic
gen IV_drug_pxNoUse = L1oth_adoption_share_zip_drug*(1-N_0)
gen IV_drug_pxNoUse_x_generic = L1oth_adoption_share_zip_drug*(1-N_0)*generic

summ time_to_first_rx_in_months, detail

gen log_T = log(time_to_first_rx)

save temp8d.dta, replace

count

keep me_number drug_num log_T Z_drug_pxUse Z_drug_pxUse_x_generic Z_drug_p Z_drug_pxNoUse Z_drug_pxNoUse_x_generic max_Z_ptxUse L1oth_adoption_share_zip_drug IV_drug_pxUse_x_generic
duplicates drop

count
egen count_me = count(log_T), by(me_number)
count if count_me == 1
drop if count_me == 1
count

save temp8e.dta, replace

*for zipcode FE, drop 2.5% of observations with fewest doctors per zipcode

use temp8d.dta

sort me_number
merge me_number using me_number_IV_cutoff2.dta
tab _merge
keep if _merge == 3
drop _merge

keep generic me_number drug_num time_to_first_rx log_T Z_drug_pxUse Z_drug_pxUse_x_generic max_Z_ptxUse L1oth_adoption_share_zip_drug IV_drug_pxUse_x_generic
duplicates drop

count
egen count_me = count(log_T), by(me_number)
count if count_me == 1
drop if count_me == 1
count

save temp8g.dta, replace

count

*add back the zipcode variable
sort me_number
merge me_number using doctor_zips.dta
tab _merge
drop if _merge == 2
drop _merge

count
gen zip3 = substr(string(zipcode),1,3)
destring zip3, replace

egen num_obs = count(log_T), by(zip3 drug_num)
drop if num_obs == 1
count

save temp8j.dta, replace

*add lagged number of prescriptions: need to go back and build this into the dataset

use tempyc.dta

egen num_rx = sum(Rx), by(me_number year_month)

gen num_rx_drug = .
gen L1num_rx_drug = .

egen num_rx_drug_calc = max(num_rx*flag_200307), by(me_number)
replace L1num_rx_drug = num_rx_drug_calc if drug_name == "CRESTOR" 
drop num_rx_drug_calc

egen num_rx_drug_calc = max(num_rx*flag_200406), by(me_number)
replace L1num_rx_drug = num_rx_drug_calc if drug_name == "VYTORIN" 
drop num_rx_drug_calc

egen num_rx_drug_calc = max(num_rx*flag_200209), by(me_number)
replace L1num_rx_drug = num_rx_drug_calc if drug_name == "ZETIA" 
drop num_rx_drug_calc
 
egen num_rx_drug_calc = max(num_rx*flag_200603), by(me_number)
replace L1num_rx_drug = num_rx_drug_calc if drug_name == "PRAVASTATIN" 
drop num_rx_drug_calc

egen num_rx_drug_calc = max(oth_adoption_share_zip*flag_200605), by(me_number)
replace L1num_rx_drug = num_rx_drug_calc if drug_name == "SIMVASTATIN" 
drop num_rx_drug_calc

egen num_rx_drug_calc = max(oth_adoption_share_zip*flag_200009), by(me_number)
replace L1num_rx_drug = num_rx_drug_calc if drug_name == "LESCOL XL" 
drop num_rx_drug_calc

egen num_rx_drug_calc = max(oth_adoption_share_zip*flag_200111), by(me_number)
replace L1num_rx_drug = num_rx_drug_calc if drug_name == "LOVASTATIN" 
replace L1num_rx_drug = num_rx_drug_calc if drug_name == "ADVICOR" 
drop num_rx_drug_calc

egen num_rx_drug_calc = max(oth_adoption_share_zip*flag_200205), by(me_number)
replace L1num_rx_drug = num_rx_drug_calc if drug_name == "ALTOPREV" 
drop num_rx_drug_calc

egen num_rx_drug_calc = max(oth_adoption_share_zip*flag_200305), by(me_number)
replace L1num_rx_drug = num_rx_drug_calc if drug_name == "PRAVIGARD PAC" 
drop num_rx_drug_calc

egen num_rx_drug_calc = max(oth_adoption_share_zip*flag_200410), by(me_number)
replace L1num_rx_drug = num_rx_drug_calc if drug_name == "LOVAZA" 
drop num_rx_drug_calc

egen num_rx_drug_calc = max(oth_adoption_share_zip*flag_200801), by(me_number)
replace L1num_rx_drug = num_rx_drug_calc if drug_name == "SIMCOR" 
drop num_rx_drug_calc

keep me_number drug_name drug_num time_to_first_rx_in_months Z_drug_p Z_drug_pxUse Use max_Z_ptxUse L1oth_adoption_share_zip_drug L1num_rx_drug num_other_docs_zip

duplicates drop

save num_rx_drug_calc.dta, replace

keep me_number drug_num L1num_rx_drug
duplicates drop

sort me_number drug_num

save, replace

use temp8j.dta

*merge the L1.num_rx variable, N_{it-t]
sort me_number drug_num
merge me_number drug_num using num_rx_drug_calc.dta
tab _merge
drop if _merge == 2
drop _merge

*consider alternative outcome variable: an indicator for whether the drug is prescribed within 2 years of its introduction and use that as the dependent variable
*cluster by doctor (OLS) or zip3 (OLS, IV)

use temp8j.dta

*merge the L1.num_rx variable, N_{it-t]
sort me_number drug_num
merge me_number drug_num using num_rx_drug_calc.dta
tab _merge
drop if _merge == 2
drop _merge

gen rx_within_one_year = 1
replace rx_within_one_year = 0 if time_to_first_rx > 12

gen rx_within_six_months = 1
replace rx_within_six_months = 0 if time_to_first_rx > 6

gen rx_within_two_years = 1
replace rx_within_two_years = 0 if time_to_first_rx > 24

drop num_obs
egen num_obs = count(rx_within_one_year), by(me_number)
drop if num_obs == 1
egen num_obs_zip = count(rx_within_one_year), by(zipcode drug_num)
drop if num_obs_z == 1

drop num_obs num_obs_z
egen num_obs = count(rx_within_one_year), by(me_number)
drop if num_obs == 1
egen num_obs_zip = count(rx_within_one_year), by(zipcode drug_num)
drop if num_obs_z == 1

tab rx_within*

*cluster by 5-digit zipcode

reghdfe rx_within_one_year Z_drug_pxUse L1num_rx_drug, absorb(i.zipcode##i.drug_num) vce(cluster zipcode)

est sto a1

reghdfe rx_within_one_year Z_drug_pxUse L1num_rx_drug, absorb(me_number i.zipcode##i.drug_num) vce(cluster zipcode)

est sto a2

reghdfe rx_within_one_year Z_drug_pxUse Z_drug_pxUse_x_generic L1num_rx_drug, absorb( i.zipcode##i.drug_num) vce(cluster zipcode)

est sto a3

reghdfe rx_within_one_year Z_drug_pxUse Z_drug_pxUse_x_generic L1num_rx_drug, absorb(me_number i.zipcode##i.drug_num) vce(cluster zipcode)

est sto a4

estimates table a1 a2 a3 a4, b(%7.4f) se(%7.4f) t(%7.4f) p(%7.4f) stfmt(%7.4f) stats(N r2_a r2) 

*restrict the sample to eventual adopters+users

keep if max_Z_ptxUse == 1

drop if L1num_rx_drug == .

drop num_obs num_obs_zip
egen num_obs = count(rx_within_one_year), by(me_number)
drop if num_obs == 1
egen num_obs_zip = count(rx_within_one_year), by(zipcode drug_num)
drop if num_obs_z == 1

drop num_obs num_obs_z
egen num_obs = count(rx_within_one_year), by(me_number)
drop if num_obs == 1
egen num_obs_zip = count(rx_within_one_year), by(zipcode drug_num)
drop if num_obs_z == 1

*baseline

*cluster by 5-digit zipcode

reghdfe rx_within_one_year Z_drug_pxUse Z_drug_pxUse_x_generic L1num_rx_drug, absorb(i.zipcode##i.drug_num) vce(cluster zipcode)

est sto a5

reghdfe rx_within_one_year Z_drug_pxUse Z_drug_pxUse_x_generic L1num_rx_drug, absorb(me_number i.zipcode##i.drug_num) vce(cluster zipcode)

est sto a6

estimates table a5 a6, b(%7.4f) se(%7.4f) t(%7.4f) p(%7.4f) stfmt(%7.4f) stats(N r2_a r2) 

clear

********************************************************
*Table 4 robustness
********************************************************
*Replicate with logit estimation
*Replicate replacing the dependent variable with an indicator for first prescription within 24 months
*cluster by doctor
********************************************************

*Replicate with logit estimation

use temp8j.dta

*merge the L1.num_rx variable, N_{it-t]
sort me_number drug_num
merge me_number drug_num using num_rx_drug_calc.dta
tab _merge
drop if _merge == 2
drop _merge

gen rx_within_one_year = 1
replace rx_within_one_year = 0 if time_to_first_rx > 12

gen rx_within_six_months = 1
replace rx_within_six_months = 0 if time_to_first_rx > 6

gen rx_within_two_years = 1
replace rx_within_two_years = 0 if time_to_first_rx > 24

drop num_obs
egen num_obs = count(rx_within_one_year), by(me_number)
drop if num_obs == 1
egen num_obs_zip = count(rx_within_one_year), by(zipcode drug_num)
drop if num_obs_z == 1

drop num_obs num_obs_z
egen num_obs = count(rx_within_one_year), by(me_number)
drop if num_obs == 1
egen num_obs_zip = count(rx_within_one_year), by(zipcode drug_num)
drop if num_obs_z == 1

gen zipcode_drug_fe = string(zipcode) + "00000" + string(drug_num)
destring zipcode_drug_fe, replace
gen flag = 1

sort me_number drug_num
xtset me_number drug_num

*replace zipcode-drug FE with drug FE; estimate with OLS and logit

reghdfe rx_within_one_year Z_drug_pxUse L1num_rx_drug, absorb(me_number i.drug_num) 
est sto a2a

xtlogit rx_within_one_year Z_drug_pxUse L1num_rx_drug i.drug_num, fe 
est sto a2

reghdfe rx_within_one_year Z_drug_pxUse Z_drug_pxUse_x_generic L1num_rx_drug, absorb(me_number i.drug_num) 
est sto a4a

xtlogit rx_within_one_year Z_drug_pxUse Z_drug_pxUse_x_generic L1num_rx_drug i.drug_num, fe 
est sto a4

estimates table a2a a2 a4a a4, b(%7.4f) se(%7.4f) t(%7.4f) p(%7.4f) stfmt(%7.4f) stats(N r2_a r2) 

*Replicate replacing the dependent variable with an indicator for first prescription within 24 months

reghdfe rx_within_two_years Z_drug_pxUse L1num_rx_drug, absorb(i.zipcode##i.drug_num) vce(cluster zipcode)
est sto a1

reghdfe rx_within_two_years Z_drug_pxUse L1num_rx_drug, absorb(me_number i.zipcode##i.drug_num) vce(cluster zipcode)
est sto a2

reghdfe rx_within_two_years Z_drug_pxUse Z_drug_pxUse_x_generic L1num_rx_drug, absorb( i.zipcode##i.drug_num) vce(cluster zipcode)
est sto a3

reghdfe rx_within_two_years Z_drug_pxUse Z_drug_pxUse_x_generic L1num_rx_drug, absorb(me_number i.zipcode##i.drug_num) vce(cluster zipcode)
est sto a4

estimates table a1 a2 a3 a4, b(%7.4f) se(%7.4f) t(%7.4f) p(%7.4f) stfmt(%7.4f) stats(N r2_a r2) 
										 
*cluster by doctor

reghdfe rx_within_one_year Z_drug_pxUse L1num_rx_drug, absorb(i.zipcode##i.drug_num) vce(cluster me_number)
est sto a1

reghdfe rx_within_one_year Z_drug_pxUse L1num_rx_drug, absorb(me_number i.zipcode##i.drug_num) vce(cluster me_number)
est sto a2

reghdfe rx_within_one_year Z_drug_pxUse Z_drug_pxUse_x_generic L1num_rx_drug, absorb( i.zipcode##i.drug_num) vce(cluster me_number)
est sto a3

reghdfe rx_within_one_year Z_drug_pxUse Z_drug_pxUse_x_generic L1num_rx_drug, absorb(me_number i.zipcode##i.drug_num) vce(cluster me_number)
est sto a4

estimates table a1 a2 a3 a4, b(%7.4f) se(%7.4f) t(%7.4f) p(%7.4f) stfmt(%7.4f) stats(N r2_a r2) 

clear

*restrict to eventual adopters

use temp8j.dta

*merge the L1.num_rx variable, N_{it-t]
sort me_number drug_num
merge me_number drug_num using num_rx_drug_calc.dta
tab _merge
drop if _merge == 2
drop _merge

gen rx_within_one_year = 1
replace rx_within_one_year = 0 if time_to_first_rx > 12

gen rx_within_six_months = 1
replace rx_within_six_months = 0 if time_to_first_rx > 6

gen rx_within_two_years = 1
replace rx_within_two_years = 0 if time_to_first_rx > 24

drop num_obs
egen num_obs = count(rx_within_one_year), by(me_number)
drop if num_obs == 1
egen num_obs_zip = count(rx_within_one_year), by(zipcode drug_num)
drop if num_obs_z == 1

drop num_obs num_obs_z
egen num_obs = count(rx_within_one_year), by(me_number)
drop if num_obs == 1
egen num_obs_zip = count(rx_within_one_year), by(zipcode drug_num)
drop if num_obs_z == 1

keep if max_Z_ptxUse == 1

drop if L1num_rx_drug == .

drop num_obs num_obs_zip
egen num_obs = count(rx_within_one_year), by(me_number)
drop if num_obs == 1
egen num_obs_zip = count(rx_within_one_year), by(zipcode drug_num)
drop if num_obs_z == 1

drop num_obs num_obs_z
egen num_obs = count(rx_within_one_year), by(me_number)
drop if num_obs == 1
egen num_obs_zip = count(rx_within_one_year), by(zipcode drug_num)
drop if num_obs_z == 1

sort me_number drug_num
xtset me_number drug_num

count

*Replicate with logit estimation

reghdfe rx_within_one_year Z_drug_pxUse Z_drug_pxUse_x_generic L1num_rx_drug, absorb(me_number i.drug_num) 
est sto a4a

xtlogit rx_within_one_year Z_drug_pxUse Z_drug_pxUse_x_generic L1num_rx_drug i.drug_num, fe 

est sto a4

estimates table a4a a4, b(%7.4f) se(%7.4f) t(%7.4f) p(%7.4f) stfmt(%7.4f) stats(N r2_a r2) 

*Replicate replacing the dependent variable with an indicator for first prescription within 24 months

reghdfe rx_within_two_years Z_drug_pxUse Z_drug_pxUse_x_generic L1num_rx_drug, absorb( i.zipcode##i.drug_num) vce(cluster zipcode)
est sto a3

reghdfe rx_within_two_years Z_drug_pxUse Z_drug_pxUse_x_generic L1num_rx_drug, absorb(me_number i.zipcode##i.drug_num) vce(cluster zipcode)
est sto a4

estimates table a3 a4, b(%7.4f) se(%7.4f) t(%7.4f) p(%7.4f) stfmt(%7.4f) stats(N r2_a r2) 

*cluster errors by doctor

reghdfe rx_within_one_year Z_drug_pxUse Z_drug_pxUse_x_generic L1num_rx_drug, absorb( i.zipcode##i.drug_num) vce(cluster me_number)
est sto a3

reghdfe rx_within_one_year Z_drug_pxUse Z_drug_pxUse_x_generic L1num_rx_drug, absorb(me_number i.zipcode##i.drug_num) vce(cluster me_number)
est sto a4

estimates table a3 a4, b(%7.4f) se(%7.4f) t(%7.4f) p(%7.4f) stfmt(%7.4f) stats(N r2_a r2) 

clear

***************************************************************************************
*Table 5: Prescription Diversity, U.S. Physicians, 2000—2010	
***************************************************************************************

use masterfile_1_new

keep me_number Rx Z_pt year_month drug_name N_total_month zipcode

egen max_zip = max(zipcode), by(me_number year_month)
replace zipcode = max_zip
drop max_zip
 
egen Use = max(N_total_month), by(me_number)
gen N_0 = 1
replace N_0 = 0 if Use == 0

*hhi
egen num_rx = sum(Rx), by(me_number year_month)
egen num_rx_zip = sum(Rx), by(zipcode year_month)
gen Rx_share = Rx/num_rx
summ Rx_share
gen Rx_share_sq = Rx_share*Rx_share
egen hhi_rx = sum(Rx_share_sq), by(me_number year_month) 
summ hhi_rx, detail
drop if hhi_rx == 0
save tempC.dta, replace

use tempC.dta

*count number of distinct products prescribed each month
keep me_number Rx drug_name hhi_rx Z_pt year_month N_0  num_rx num_rx_zip zipcode
duplicates drop

save tempC1.dta, replace

gen flag = 1
replace flag = 0 if Rx == 0

egen num_drugs_month = sum(flag), by(me_number year_month)
summ num_drugs_month, detail
drop if num_drugs_month == 0

gen Z_ptxUse = Z_pt*N_0
gen Z_ptxNoUse = Z_pt*(1-N_0)

keep hhi_rx num_drugs_month N_0 Z_pt Z_ptxUse Z_ptxNoUse year_month me_number num_rx num_rx_zip zipcode
duplicates drop

gen year = substr(string(year_month),1,4)
destring year, replace
gen month = substr(string(year_month),5,2)
destring month, replace
gen date_in_months = 12*year + month

xtset me_number date_in_months

gen L1num_rx = L1.num_rx
gen L1num_rx_zip_other = L1.num_rx_zip - L1num_rx

gen L2num_rx = L2.num_rx
gen L2num_rx_zip_other = L2.num_rx_zip - L2num_rx

gen D1num_rx = L1num_rx - L2num_rx
gen D1num_rx_zip_other = L1num_rx_zip_other - L2num_rx_zip_other

save tempC2.dta, replace

gen L12num_rx = L12.num_rx
gen L12num_rx_zip_other = L12.num_rx_zip - L12num_rx

gen D12num_rx = L1num_rx - L12num_rx
gen D12num_rx_zip_other = L1num_rx_zip_other - L12num_rx_zip_other

*build another instrument as the adoption propensity of neighboring doctors:
egen num_docs_zip = count(me_number), by(zipcode year_month)
gen num_other_docs_zip = num_docs_zip - 1
egen num_adopters_zip = sum(Z_ptxUse), by(zipcode year_month)
gen num_oth_adopters_zip = num_adopters_zip - Z_ptxUse
gen oth_adoption_share_zip = num_oth_adopters_zip / num_other_docs_zip
summ oth_adoption_share_zip if year_month == 200001
summ oth_adoption_share_zip if year_month == 201012

gen L1num_oth_adopters_zip = L1.num_oth_adopters_zip
gen L1oth_adoption_share_zip = L1.oth_adoption_share_zip

egen num_adopt_no_use_zip = sum(Z_ptxNoUse), by(zipcode year_month)
gen num_oth_adopt_no_use_zip = num_adopt_no_use_zip - Z_ptxNoUse
gen oth_adopt_no_use_share_zip = num_oth_adopt_no_use_zip / num_other_docs_zip
gen L1oth_adopt_no_use_share_zip = L1.oth_adopt_no_use_share_zip

save tempC2.dta, replace

*****************************************************************************

use tempC2.dta

*to identify zipcode-month FE, drop the 2.5% of observations corresponding to least-populated zipcodes
sort me_number
merge me_number using me_number_IV_cutoff2.dta
tab _merge
keep if _merge == 3
drop _merge

drop if L1oth_adoption_share_zip == .

egen count_me = count(year_month), by(me_number)
drop if count_me == 1

save tempC2g.dta, replace

*now, replicate with doctor-specific timetrends

use tempC2g.dta

gen months_since_start = date_in_months - 12*2000 - 2

egen max_Z_ptxUse = max(Z_ptxUse), by(me_number)

*restrict sample to eventual adopters+users
keep if max_Z_ptxUse == 1

*drop singletons
egen nobs = count(num_drugs), by(zipcode year_month)
drop if nobs == 1

*columns 1 and 2

reghdfe num_drugs L1num_rx Z_ptxUse, absorb(i.zipcode##i.year_month i.me_number#c.months_since_start me_number) vce(cluster zipcode)

reghdfe hhi_rx L1num_rx Z_ptxUse, absorb(i.zipcode##i.year_month i.me_number#c.months_since_start me_number) vce(cluster zipcode)

clear

*column 3

use masterfile_1_new

keep me_number Rx Z_pt zipcode year_month drug_name New_3mo-NewGeneric_9mo N_total_month

*build a dummy variable (Use) that is 1 for a doctor that ever looks up a statin
egen Use = max(N_total_month), by(me_number)
gen N_0 = 1

replace N_0 = 0 if Use == 0
gen Z_ptxUse = Z_pt*N_0
gen Rx_flag = 1
replace Rx_flag = 0 if Rx == 0

gen log_Rx = log(Rx)

*replace log_Rx = 0 if log_Rx == .                                           
egen num_rx = sum(Rx), by(me_number year_month)
gen Rx_share = Rx/num_rx
 
gen generic_flag = 0

replace generic_flag = 1 if (drug_name == "SIMVASTATIN" | drug_name == "LOVASTATIN" | drug_name == "PRAVASTATIN")
gen Z_pt_x_Generic = Z_pt*generic_flag
gen Z_ptxUse_x_Generic = Z_ptxUse*generic_flag

save tempdata0.dta, replace
encode drug_name, gen(drug_num)

gen drug_month = drug_name + string(year_month)
encode drug_month, gen(drug_month_num)
tostring me_number, generate(string_me) format(%12.0f)
tostring drug_num, generate(string_drugnum) format(%12.0f)
gen me_number_drug = string_me + "00000000" + string_drugnum 

destring me_number_drug, replace

gen New_2 = 0
replace New_2 = 1 if drug_name == "SIMCOR" & year_month >= 200803 & year_month <= 201002
replace New_2 = 1 if drug_name == "CRESTOR" & year_month >= 200309 & year_month <= 200508
replace New_2 = 1 if drug_name == "LOVAZA" & year_month >= 200412 & year_month <= 200611
replace New_2 = 1 if drug_name == "VYTORIN" & year_month >= 200408 & year_month <= 200607
replace New_2 = 1 if drug_name == "ZETIA" & year_month >= 200211 & year_month <= 200410
replace New_2 = 1 if drug_name == "ADVICOR" & year_month >= 200201 & year_month <= 200312
replace New_2 = 1 if drug_name == "ALTOPREV" & year_month >= 200207 & year_month <= 200406
replace New_2 = 1 if drug_name == "LESCOL XL" & year_month >= 200011 & year_month <= 200210
replace New_2 = 1 if drug_name == "PRAVIGARD PAC" & year_month >= 200307 & year_month <= 200506

gen NewGeneric_2 = 0
replace NewGeneric_2 = 1 if drug_name == "PRAVASTATIN" & year_month >= 200605 & year_month <= 200804
replace NewGeneric_2 = 1 if drug_name == "SIMVASTATIN" & year_month >= 200607 & year_month <= 200806
replace NewGeneric_2 = 1 if drug_name == "LOVASTATIN" & year_month >= 200201 & year_month <= 200312

gen Z_ptxUse_x_NewGeneric_2 = Z_ptxUse*NewGeneric_2
gen Z_ptxUse_x_NewBranded_2 = Z_ptxUse*New_2
gen Z_ptxUse_x_OldGeneric_2 = Z_ptxUse*generic_flag*(1-NewGeneric_2)
gen Z_ptxUse_x_OldBranded_2 = Z_ptxUse*(1-generic_flag)*(1-New_2)

*build doctor-new-generic, doctor-old-generic, and doctor-new-branded FE
gen old_generic = generic_flag*(1-NewGeneric_2)
gen me_number_new_generic = me_number*NewGeneric_2
gen me_number_old_generic = me_number*old_generic
gen me_number_new_branded = me_number*New_2

egen max_Z_ptxUse = max(Z_ptxUse), by(me_number)

save tempdata1.dta, replace

*calculate the date in months
gen year = substr(string(year_month),1,4)
destring year, replace
gen month = substr(string(year_month),5,2)
destring month, replace
gen date_in_months = 12*year + month
gen months_since_start = date_in_months - 12*2000
gen months_since_start_x_1_minus_Z = months_since_start*(1-Z_ptxUse)

egen min_Z_is_1 = max(months_since_start_x_1_minus_Z), by(me_number)
replace min_Z_is_1 = min_Z_is_1 + 1

*now, need to get +/- 12 monhts from start date for each doctor
gen min_Z_is_1_less_12mo = min_Z_is_1 - 24
gen min_Z_is_1_plus_12mo = min_Z_is_1 + 24

gen keep_Z_window_24mo = 1
replace keep_Z_window_24mo = 0 if months_since_start >= min_Z_is_1_plus_12mo 
replace keep_Z_window_24mo = 0 if months_since_start <= min_Z_is_1_less_12mo

summ Z_ptxUse if keep_Z_window_24mo == 1
 
*keep only obs with full 2-year period
count if min_Z_is_1_less_12mo < 0

count if min_Z_is_1_plus_12mo > 132

save tempdata2.dta, replace
 
gen Z_ptxNoUse = Z_pt*(1-N_0)
gen Z_ptxNoUse_x_NewGeneric_2 = Z_ptxNoUse*NewGeneric_2
gen Z_ptxNoUse_x_NewBranded_2 = Z_ptxNoUse*New_2
gen Z_ptxNoUse_x_OldGeneric_2 = Z_ptxNoUse*generic_flag*(1-NewGeneric_2)
gen Z_ptxNoUse_x_OldBranded_2 = Z_ptxNoUse*(1-generic_flag)*(1-New_2)
 
save tempdata2F.dta, replace
 
drop me_number_drug
gen me_number_drug = string_me + "000" + string(drug_num) 
destring me_number_drug, replace
format me_number_drug %30.0f
 
*1. full sample of physicians and months
 
gen flag = 1
egen num_repeats = sum(flag), by(me_number_drug year_month)
summ num_repeats, detail
keep if num_repeats == 1
drop num_repeats flag string_me drug_num 
xtset me_number_drug year_month
 
save tempdataB2.dta
 
use tempdataB2.dta

sort me_number year_month
merge me_number year_month using instrument_merge.dta
tab _merge

gen Z_IV_x_NewGeneric_2 = L1oth_adoption_share_zip*NewGeneric_2
gen Z_IV_x_NewBranded_2 = L1oth_adoption_share_zip*New_2
gen Z_IV_x_OldGeneric_2 = L1oth_adoption_share_zip*generic_flag*(1-NewGeneric_2)
gen Z_IV_x_OldBranded_2 = L1oth_adoption_share_zip*(1-generic_flag)*(1-New_2)

save tempdataB22.dta, replace

*build N_{it}

use masterfile_1_new

keep me_number Rx year_month
egen num_rx = sum(Rx), by(me_number year_month)
keep num_rx me_number year_month
duplicates drop

sort me_number year_month
xtset me_number year_month

gen L1num_rx = L1.num_rx

sort me_number year_month

save num_rx_merge.dta, replace
 
*****************
*restricted dates sample

use tempdataB22.dta

keep if year_month == 200001 | month == 06 | month == 12

keep if (Rx_flag != . & Z_ptxUse_x_NewGeneric_2 != . & Z_ptxUse_x_NewBranded_2 != . & Z_ptxUse_x_OldGeneric_2 != . & Z_ptxUse_x_OldBranded_2 != . &  Z_IV_x_NewGeneric_2 != . & Z_IV_x_NewBranded_2 != . & Z_IV_x_OldGeneric_2 != . & Z_IV_x_OldBranded_2 != . & me_number_drug != . &  drug_month_num != .)

*for zipcode FE, drop 2.5% of observations, i.e. least-populated zipcodes
drop _merge
sort me_number
merge me_number using me_number_IV_cutoff2.dta
tab _merge
keep if _merge == 3
drop _merge

save tempor.dta, replace

*cluster by zipcode + doctor-specific trends

use tempor.dta

egen nobs = count(Rx_flag), by(zipcode year_month)

drop if nobs == 1

drop max_Z_ptxUse

egen max_Z_ptxUse = max(Z_ptxUse), by(me_number)

keep if max_Z_ptxUse == 1

reghdfe Rx_flag L1num_rx Z_ptxUse_x_NewGeneric_2 Z_ptxUse_x_NewBranded_2 Z_ptxUse_x_OldGeneric_2 Z_ptxUse_x_OldBranded_2, absorb(drug_month_num i.zipcode##i.year_month me_number##c.months_since_start) vce(cluster zipcode)

clear

********************************************************
*Table 5 robustness, columns 1 and 2
********************************************************
*control for first lag of the dependent variable
*cluster by doctor
*Replicate with Poisson estimation (column 1 only)

use tempC2g.dta

gen months_since_start = date_in_months - 12*2000 - 2

egen max_Z_ptxUse = max(Z_ptxUse), by(me_number)

*restrict sample to eventual adopters+users
keep if max_Z_ptxUse == 1

gen zipcode_mo_fe = string(zipcode) + string(year_month)
destring zipcode_mo_fe, replace
format zipcode_mo_fe %22.0f

*drop singletons
egen nobs = count(num_drugs), by(zipcode year_month)
drop if nobs == 1

sort me_number date_in_months
xtset me_number date_in_months

*control for first lag of the dependent variable

sort me_number date_in_months
gen L1num_drugs = L1.num_drugs
gen L1hhi_rx = L1.hhi_rx

reghdfe num_drugs L1num_drugs L1num_rx Z_ptxUse, absorb(i.zipcode##i.year_month i.me_number##c.months_since_start) vce(cluster zipcode)

est sto a1

reghdfe hhi_rx L1hhi_rx L1num_rx Z_ptxUse, absorb(i.zipcode##i.year_month i.me_number##c.months_since_start) vce(cluster zipcode)

est sto a2

*cluster by doctor

reghdfe num_drugs L1num_rx Z_ptxUse, absorb(i.zipcode##i.year_month i.me_number##c.months_since_start) vce(cluster me_number)

est sto a1

reghdfe hhi_rx L1hhi_rx Z_ptxUse, absorb(i.zipcode##i.year_month i.me_number##c.months_since_start) vce(cluster me_number)

est sto a2

*Replicate with Poisson estimation

*include doctor and month FE; replicate baseline including doctor and month FE
reghdfe num_drugs L1num_rx Z_ptxUse, absorb(i.year_month me_number) vce(cluster me_number)

xtpoisson num_drugs L1num_rx Z_ptxUse i.year_month, fe 

clear

********************************************************
*Table 5 robustness, column 3
********************************************************
*Replicate with logit estimation
*control for first lag of the dependent variable
*include doctor-drug fixed effects
*include zipcode-month-drug fixed effects
*cluster by doctor

*Replicate with logit estimation

use masterfile_1_new

keep me_number year_month drug_name Rx string_me 
duplicates drop

encode drug_name, gen(drug_num)

tostring me_number, generate(string_me) format(%12.0f)

gen me_number_drug = string_me + "000" + string(drug_num) 
destring me_number_drug, replace
format me_number_drug %30.0f
  
gen flag = 1
egen num_repeats = sum(flag), by(me_number_drug year_month)
summ num_repeats, detail
 
keep if num_repeats == 1

drop num_repeats flag 
 
xtset me_number_drug year_month

gen Rx_flag = 1
replace Rx_flag = 0 if Rx == 0

gen L1Rx_flag = L1.Rx_flag

keep me_number_drug year_month L1Rx_flag
duplicates drop
sort me_number_drug year_month
save lagged_rx.dta, replace

use tempor.dta

sort me_number_drug year_month
merge me_number_drug year_month using lagged_rx.dta
tab _merge
keep if _merge == 3
drop _merge

sort me_number year_month
merge me_number year_month using num_rx_merge.dta
tab _merge

keep if _merge == 3

egen nobs = count(Rx_flag), by(zipcode year_month)

drop if nobs == 1

drop max_Z_pt

egen max_Z_ptxUse = max(Z_ptxUse), by(me_number)

keep if max_Z_ptxUse == 1

*logit estimation - with reduced FE
reghdfe Rx_flag L1num_rx Z_ptxUse_x_NewGeneric_2 Z_ptxUse_x_NewBranded_2 Z_ptxUse_x_OldGeneric_2 Z_ptxUse_x_OldBranded_2, absorb(i.drug_num i.year_month i.me_number) 

xtlogit Rx_flag L1num_rx Z_ptxUse_x_NewGeneric_2 Z_ptxUse_x_NewBranded_2 Z_ptxUse_x_OldGeneric_2 Z_ptxUse_x_OldBranded_2 i.drug_num i.year_month, fe 

*control for first lag of the dependent variable
reghdfe Rx_flag L1Rx_flag L1num_rx Z_ptxUse_x_NewGeneric_2 Z_ptxUse_x_NewBranded_2 Z_ptxUse_x_OldGeneric_2 Z_ptxUse_x_OldBranded_2, absorb(drug_month_num i.zipcode##i.year_month me_number##c.months_since_start) vce(cluster zipcode)

*include zipcode-month-drug fixed effects
reghdfe Rx_flag L1num_rx Z_ptxUse_x_NewGeneric_2 Z_ptxUse_x_NewBranded_2 Z_ptxUse_x_OldGeneric_2 Z_ptxUse_x_OldBranded_2, absorb(drug_month_num i.zipcode##i.year_month#i.generic me_number##c.months_since_start) vce(cluster zipcode)

*include doctor-drug fixed effects
reghdfe Rx_flag L1num_rx Z_ptxUse_x_NewGeneric_2 Z_ptxUse_x_NewBranded_2 Z_ptxUse_x_OldGeneric_2 Z_ptxUse_x_OldBranded_2, absorb(drug_month_num i.zipcode##i.year_month me_number_drug me_number##c.months_since_start) vce(cluster zipcode)

*cluster by doctor
reghdfe Rx_flag L1num_rx Z_ptxUse_x_NewGeneric_2 Z_ptxUse_x_NewBranded_2 Z_ptxUse_x_OldGeneric_2 Z_ptxUse_x_OldBranded_2, absorb(drug_month_num i.zipcode##i.year_month me_number##c.months_since_start) vce(cluster me_number)

clear

***************************************************************************************
*Figure 1: Database diffusion curves, by doctor characteristic
***************************************************************************************

clear all
set more off

capture log close
log using "$logdir/adoption_patterns2.log", text replace

use doc year month drug rx z using "$tmpdir/m1_monthly_alldrugs_withisp", clear

*fix missing z's
egen byte ztmp = max(z), by(doc year month)
drop z
rename ztmp z

*get monthly rx volume
egen mrx = sum(rx), by(doc year month)
gen mon = (year-2000)*12 + month
sort doc mon
qui by doc mon: keep if _n==1

*merge in doctor characteristics
rename doc me_nbr
keep me_nbr mon z mrx
merge m:1 me_nbr using "$datadir/doctor_characteristics"
tab _merge
replace whichmerge = 0 if _merge==1
egen m = tag(me_nbr)
tab whichmerge if m
keep if _merge==3 & inlist(whichmerge,1,2)
drop m

save "$tmpdir/aptmp2", replace

*diffusion curves showing differential adoption rates

*by sex
gen byte female = (sex==1)
egen fracz = mean(z), by(mon female)
replace fracz = 100*fracz
la var fracz "Percent adopted"
la var mon "Year"
egen tagged = tag(mon female)
sort female mon
tw line fracz mon if tagged & female==0 || ///
   line fracz mon if tagged & female==1, lpattern(dash) ///
   ylabel(0 20 40 60, angle(0)) ///
   xlabel(1 "2000" 25 "2002" 49 "2004" 73 "2006" 97 "2008" 121 "2010") ///
   xscale(range(0 132)) ///
   legend(label(1 "Males") label(2 "Females") rows(2) size(*0.9)) ///
   subtitle("By sex") ///
   saving("$graphdir/diffusion_by_sex", replace)
graph export "$graphdir/diffusion_by_sex.eps", logo(off) replace
!epstopdf "$graphdir/diffusion_by_sex.eps"
drop fracz tagged

*by specialty
replace spec1 = 99 if inlist(spec1,33,21,8,39,45)==0
egen fracz = mean(z), by(mon spec1)
replace fracz = 100*fracz
la var fracz "Percent adopted"
egen tagged = tag(mon spec1)
sort spec1 mon
tw line fracz mon if tagged & spec1==33 || ///
   line fracz mon if tagged & spec1==21, lpattern(longdash) || ///
   line fracz mon if tagged & spec1==8, lpattern(shortdash) || ///
   line fracz mon if tagged & spec1==39, lpattern(longdash_dot) || ///
   line fracz mon if tagged & spec1==45, lpattern(shortdash_dot) || ///
   line fracz mon if tagged & spec1==99, lpattern(dot) ///
   ylabel(0 20 40 60, angle(0)) ///
   xlabel(1 "2000" 25 "2002" 49 "2004" 73 "2006" 97 "2008" 121 "2010") ///
   xscale(range(0 132)) ///
   legend(label(1 "Internal Med.") label(2 "Family Practice") ///
     label(3 "Cardiology") label(4 "Nephrology") label(5 "OB/Gyn") ///
     label(6 "Other") rows(2) size(*0.9) colgap(*0.7) keygap(*0.8) ) ///
   subtitle("By specialty") ///
   saving("$graphdir/diffusion_by_specialty", replace)
graph export "$graphdir/diffusion_by_specialty.eps", logo(off) replace
!epstopdf "$graphdir/diffusion_by_specialty.eps"
drop fracz tagged

*by graduation year
gen gyg = 1 if gradyear>1950 & gradyear<1975
replace gyg = 2 if gradyear>=1975 & gradyear<1983
replace gyg = 3 if gradyear>=1983 & gradyear<1991
replace gyg = 4 if gradyear>=1991 & gradyear<2001
egen fracz = mean(z), by(mon gyg)
replace fracz = 100*fracz
la var fracz "Percent adopted"
egen tagged = tag(mon gyg)
sort gyg mon
tw line fracz mon if tagged & gyg==1 || ///
   line fracz mon if tagged & gyg==2, lpattern(longdash) || ///
   line fracz mon if tagged & gyg==3, lpattern(shortdash) || ///
   line fracz mon if tagged & gyg==4, lpattern(dash_dot) ///
   ylabel(0 20 40 60, angle(0)) ///
   xlabel(1 "2000" 25 "2002" 49 "2004" 73 "2006" 97 "2008" 121 "2010") ///
   xscale(range(0 132)) ///
   legend(label(1 "1951-1974") label(2 "1975-1982") ///
     label(3 "1983-1990") label(4 "1991-2000") rows(2) size(*0.9)) ///
   subtitle("By medical school graduation year") ///
   saving("$graphdir/diffusion_by_gradyear", replace)
graph export "$graphdir/diffusion_by_gradyear.eps", logo(off) replace
!epstopdf "$graphdir/diffusion_by_gradyear.eps"
drop fracz tagged

*by region of US
pro def rgn 
  replace region = "`2'" if state==`1'
end
gen region = ""
rgn 1 West
rgn 2 South 
rgn 3 South
rgn 4 Mountain
rgn 5 West
rgn 6 Mountain
rgn 7 Northeast
rgn 8 Mid-atlantic
rgn 9 Mid-atlantic
rgn 10 South
rgn 11 South
rgn 13 West
rgn 14 Midwest
rgn 15 Mountain
rgn 16 Midwest
rgn 17 Midwest
rgn 18 Midwest
rgn 19 South
rgn 20 South
rgn 21 Northeast
rgn 22 Mid-atlantic
rgn 23 Northeast
rgn 24 Midwest
rgn 25 Midwest
rgn 26 Midwest
rgn 28 South
rgn 29 Mountain
rgn 30 Mid-atlantic
rgn 31 Midwest
rgn 32 Midwest
rgn 33 Northeast
rgn 34 Mid-atlantic
rgn 35 Mountain
rgn 36 West
rgn 37 Northeast
rgn 38 Midwest
rgn 39 South
rgn 40 West
rgn 41 Northeast
rgn 43 Northeast
rgn 44 South
rgn 45 Midwest
rgn 46 South
rgn 47 South
rgn 48 Mountain
rgn 49 Mid-atlantic
rgn 51 Northeast
rgn 52 West
rgn 53 Midwest
rgn 54 South
rgn 55 Mountain

egen fracz = mean(z), by(mon region)
replace fracz = 100*fracz
la var fracz "Percent adopted"
sort region mon
egen tagged = tag(mon region)
tw line fracz mon if tagged & region=="West" || ///
   line fracz mon if tagged & region=="Mountain", lpattern(longdash) || ///
   line fracz mon if tagged & region=="Midwest", lpattern(shortdash) || ///
   line fracz mon if tagged & region=="South", lpattern(longdash_dot) || ///
   line fracz mon if tagged & region=="Mid-atlantic", lpattern(shortdash_dot) || ///
   line fracz mon if tagged & region=="Northeast", lpattern(dot) ///
   ylabel(0 20 40 60, angle(0)) ///
   xlabel(1 "2000" 25 "2002" 49 "2004" 73 "2006" 97 "2008" 121 "2010") ///
   xscale(range(0 132)) ///
   legend(label(1 "West") label(2 "Mountain") label(3 "Midwest") ///
     label(4 "South") label(5 "Mid-Atlantic") label(6 "Northeast") rows(2) size(*0.9) colgap(*0.7) keygap(*0.8) ) ///
   subtitle("By geographic region") ///
   saving("$graphdir/diffusion_by_region", replace)
graph export "$graphdir/diffusion_by_region.eps", logo(off) replace
!epstopdf "$graphdir/diffusion_by_region.eps"
drop fracz tagged

*by med school type
gen msrank = .
decode medschool, gen(mstxt)
pro def medrank
  replace msrank = `1' if mstxt=="`2'"
end

medrank 40 "ALBERT EINSTEIN COLLEGE OF MEDICINE OF YESHIVA UNIVERSITY"
medrank 21 "BAYLOR COLLEGE OF MEDICINE"
medrank 30 "BOSTON UNIVERSITY SCHOOL OF MEDICINE"
medrank 49 "BOWMAN GRAY SCHOOL OF MEDICINE OF WAKE FOREST UNIVERSITY"
medrank 35 "BROWN UNIVERSITY PROGRAM IN MEDICINE"
medrank 24 "CASE WESTERN RESERVE UNIVERSITY SCHOOL OF MEDICINE"
medrank 8 "COLUMBIA SCHOOL OF MEDICINE"
medrank 18 "CORNELL UNIVERSITY MEDICAL COLLEGE"
medrank 79 "CREIGHTON UNIVERSITY SCHOOL OF MEDICINE"
medrank 37 "DARTMOUTH MEDICAL SCHOOL"
medrank 8 "DUKE UNIVERSITY SCHOOL OF MEDICINE"
medrank 23 "EMORY UNIVERSITY SCHOOL OF MEDICINE"
medrank 67 "GEORGE WASHINGTON UNIVERSITY SCHOOL OF MEDICINE"
medrank 49 "GEORGETOWN UNIVERSITY OF MEDICINE"
medrank 1 "HARVARD MEDICAL SCHOOL"
medrank 45 "INDIANA UNIVERSITY SCHOOL OF MEDICINE"
medrank 63 "JEFFERSON MEDICAL COLLEGE OF THOMAS JEFFERSON UNIVERSITY"
medrank 3 "JOHNS HOPKINS UNIVERSITY SCHOOL OF MEDICINE"
medrank 27 "MAYO MEDICAL SCHOOL"
medrank 72 "MEDICAL COLLEGE OF VIRGINIA COMMONWEALTH UNIVERSITY SCHOOL OF MEDICINE"
medrank 54 "MEDICAL COLLEGE OF WISCONSIN"
medrank 60 "MEDICAL UNIVERSITY OF SOUTH CAROLINA COLLEGE OF MEDICINE"
medrank 88 "MICHIGAN STATE UNIVERSITY COLLEGE OF HUMAN MEDICINE"
medrank 20 "MOUNT SINAI SCHOOL OF MEDICINE OF CITY UNIVERSITY OF NEW YORK"
medrank 14 "NEW YORK UNIVERSITY SCHOOL OF MEDICINE"
medrank 19 "NORTHWESTERN UNIVERSITY MEDICAL SCHOOL"
medrank 31 "OHIO STATE UNIVERSITY COLLEGE OF MEDICINE"
medrank 31 "OREGON HEALTH SCIENCES UNIVERSITY SCHOOL OF MEDICINE"
medrank 67 "RUSH MEDICAL COLLEGE OF RUSH UNIVERSITY"
medrank 67 "SAINT LOUIS UNIVERSITY SCHOOL OF MEDICINE"
medrank 2 "STANFORD UNIVERSITY SCHOOL OF MEDICINE"
medrank 57 "STATE UNIVERSITY OF NEW YORK AT STONY BROOK, SCHOOL OF MEDICINE"
medrank 55 "TEMPLE UNIVERSITY SCHOOL OF MEDICINE"
medrank 79 "TEXAS A & M UNIVERSITY COLLEGE OF MEDICINE"
medrank 83 "TEXAS TECH UNIVERSITY HEALTH SCIENCE CENTER SCHOOL OF MEDICINE"
medrank 49 "TUFTS UNIVERSITY SCHOOL OF MEDICINE"
medrank 74 "UMDNJ-ROBERT WOOD JOHNSON MEDICAL SCHOOL"
medrank 72 "UNIFORMED SERVICES UNIVERSITY OF THE HEALTH SCIENCES"
medrank 37 "UNIVERSITY OF ALABAMA SCHOOL OF MEDICINE"
medrank 67 "UNIVERSITY OF ARIZONA COLLEGE OF MEDICINE"
medrank 83 "UNIVERSITY OF ARKANSAS COLLEGE OF MEDICINE"
medrank 43 "UNIVERSITY OF CALIFORNIA, DAVIS SCHOOL OF MEDICINE"
medrank 45 "UNIVERSITY OF CALIFORNIA, IRVINE, CALIFORNIA COLLEGE OF MEDICINE"
medrank 17 "UNIVERSITY OF CALIFORNIA, SAN DIEGO SCHOOL OF MEDICINE"
medrank 3 "UNIVERSITY OF CALIFORNIA, SAN FRANCISCO SCHOOL OF MEDICINE"
medrank 13 "UNIVERSITY OF CALIFORNIA, UCLA SCHOOL OF MEDICINE"
medrank 10 "UNIVERSITY OF CHICAGO, PRITZKER SCHOOL OF MEDICINE"
medrank 40 "UNIVERSITY OF CINCINNATI COLLEGE OF MEDICINE"
medrank 35 "UNIVERSITY OF COLORADO SCHOOL OF MEDICINE"
medrank 60 "UNIVERSITY OF CONNECTICUT SCHOOL OF MEDICINE"
medrank 43 "UNIVERSITY OF FLORIDA COLLEGE OF MEDICINE"
medrank 74 "UNIVERSITY OF HAWAII JOHN A. BURNS SCHOOL OF MEDICINE"
medrank 49 "UNIVERSITY OF ILLINOIS AT CHICAGO HEALTH SCIENCE CENTER"
medrank 29 "UNIVERSITY OF IOWA COLLEGE OF MEDICINE"
medrank 67 "UNIVERSITY OF KANSAS SCHOOL OF MEDICINE"
medrank 63 "UNIVERSITY OF KENTUCKY COLLEGE OF MEDICINE"
medrank 83 "UNIVERSITY OF LOUISVILLE SCHOOL OF MEDICINE"
medrank 37 "UNIVERSITY OF MARYLAND SCHOOL OF MEDICINE"
medrank 49 "UNIVERSITY OF MASSACHUSETTS MEDICAL SCHOOL"
medrank 45 "UNIVERSITY OF MIAMI SCHOOL OF MEDICINE"
medrank 10 "UNIVERSITY OF MICHIGAN MEDICAL SCHOOL"
medrank 40 "UNIVERSITY OF MINNESOTA MEDICAL SCHOOL"
medrank 76 "UNIVERSITY OF MISSOURI, COLUMBIA SCHOOL OF MEDICINE"
medrank 60 "UNIVERSITY OF NEBRASKA COLLEGE OF MEDICINE"
medrank 88 "UNIVERSITY OF NEVADA SCHOOL OF MEDICINE"
medrank 83 "UNIVERSITY OF NEW MEXICO SCHOOL OF MEDICINE"
medrank 22 "UNIVERSITY OF NORTH CAROLINA AT CHAPEL HILL SCHOOL OF MEDICINE"
medrank 63 "UNIVERSITY OF OKLAHOMA COLLEGE OF MEDICINE"
medrank 5 "UNIVERSITY OF PENNSYLVANIA SCHOOL OF MEDICINE"
medrank 16 "UNIVERSITY OF PITTSBURGH SCHOOL OF MEDICINE"
medrank 34 "UNIVERSITY OF ROCHESTER SCHOOL OF MEDICINE AND DENTISTRY"
medrank 79 "UNIVERSITY OF SOUTH FLORIDA COLLEGE OF MEDICINE"
medrank 31 "UNIVERSITY OF SOUTHERN CALIFORNIA SCHOOL OF MEDICINE"
medrank 79 "UNIVERSITY OF TENNESSEE COLLEGE OF MEDICINE"
medrank 57 "UNIVERSITY OF TEXAS MEDICAL SCHOOL AT HOUSTON"
medrank 55 "UNIVERSITY OF TEXAS MEDICAL SCHOOL AT SAN ANTONIO"
medrank 25 "UNIVERSITY OF TEXAS SOUTHWESTERN MEDICAL SCHOOL AT DALLAS"
medrank 48 "UNIVERSITY OF UTAH SCHOOL OF MEDICINE"
medrank 57 "UNIVERSITY OF VERMONT COLLEGE OF MEDICINE"
medrank 26 "UNIVERSITY OF VIRGINIA SCHOOL OF MEDICINE"
medrank 10 "UNIVERSITY OF WASHINGTON SCHOOL OF MEDICINE"
medrank 28 "UNIVERSITY OF WISCONSIN MEDICAL SCHOOL"
medrank 14 "VANDERBILT UNIVERSITY SCHOOL OF MEDICINE"
medrank 6 "WASHINGTON UNIVERSITY SCHOOL OF MEDICINE"
medrank 63 "WAYNE STATE UNIVERSITY SCHOOL OF MEDICINE"
medrank 88 "WEST VIRGINIA UNIVERSITY SCHOOL OF MEDICINE"
medrank 7 "YALE UNIVERSITY SCHOOL OF MEDICINE"

replace msrank = 777 if msrank==. & (regexm(mstxt,"HOMEO") | regexm(mstxt,"OSTEO"))
replace msrank = 666 if msrank==. & regexm(mstxt,"CHIRO")
replace msrank = 555 if msrank==. & regexm(mstxt,"PODIA")
replace msrank = 888 if msrank==. & regexm(mstxt,"DENT")
replace msrank = 444 if msrank==. & regexm(mstxt,"OPTOM")
replace msrank = 999 if msrank==.

gen msgroup = 1 if msrank<21
replace msgroup = 2 if msrank>20 & msrank<=51
replace msgroup = 3 if msrank>50 & msrank<=100
replace msgroup = 4 if msrank==999

egen fracz = mean(z), by(mon msgroup)
replace fracz = 100*fracz
la var fracz "Percent adopted"
egen tagged = tag(mon msgroup)
sort msgroup mon
tw line fracz mon if tagged & msgroup==1 || ///
   line fracz mon if tagged & msgroup==2, lpattern(longdash) || ///
   line fracz mon if tagged & msgroup==3, lpattern(shortdash) || ///
   line fracz mon if tagged & msgroup==4, lpattern(longdash_dot) ///
   ylabel(0 20 40 60, angle(0)) ///
   xlabel(1 "2000" 25 "2002" 49 "2004" 73 "2006" 97 "2008" 121 "2010") ///
   xscale(range(0 132)) ///
   legend(label(1 "1-20") label(2 "21-50") label(3 "51-100") ///
     label(4 "Other/unranked") rows(2) size(*0.9)) ///
   subtitle("By medical school rank") ///
   saving("$graphdir/diffusion_by_medschool", replace)
graph export "$graphdir/diffusion_by_medschool.eps", logo(off) replace
!epstopdf "$graphdir/diffusion_by_medschool.eps"
drop fracz tagged

*by prescription volume
egen meanmrx = mean(mrx), by(me_nbr)
gen pvol = 1 if meanmrx<25
replace pvol = 2 if meanmrx>=25 & meanmrx<100
replace pvol = 3 if meanmrx>=100
egen fracz = mean(z), by(mon pvol)
replace fracz = 100*fracz
la var fracz "Percent adopted"
la var mon "Year"
egen tagged = tag(mon pvol)
sort pvol mon
tw line fracz mon if tagged & pvol==1 || ///
   line fracz mon if tagged & pvol==2, lpattern(longdash) || ///
   line fracz mon if tagged & pvol==3, lpattern(shortdash) ///
   ylabel(0 20 40 60, angle(0)) ///
   xlabel(1 "2000" 25 "2002" 49 "2004" 73 "2006" 97 "2008" 121 "2010") ///
   xscale(range(0 132)) ///
   legend(label(1 "<25") label(2 "25-100") label(3 "100+") rows(2) size(*0.9)) ///
   subtitle("By monthly Rx volume") ///
   saving("$graphdir/diffusion_by_rxvol", replace)
graph export "$graphdir/diffusion_by_rxvol.eps", logo(off) replace
!epstopdf "$graphdir/diffusion_by_rxvol.eps"
drop fracz tagged

*combine all the graphs into one
cd "$graphdir"
graph combine diffusion_by_sex.gph diffusion_by_specialty.gph diffusion_by_medschool.gph diffusion_by_gradyear.gph diffusion_by_region.gph diffusion_by_rxvol.gph, iscale(*0.5) rows(3) ysize(9) xsize(6.85) saving(diffusion_by_combined, replace)
graph export diffusion_by_combined.eps, logo(off) replace
!epstopdf diffusion_by_combined.eps

*combine by pairs into three
graph combine diffusion_by_sex.gph diffusion_by_gradyear.gph, rows(2) iscale(*0.8) ysize(9) xsize(6.85) saving(diffusions_1, replace)
graph export diffusions_1.eps, logo(off) replace
!epstopdf diffusions_1.eps

graph combine diffusion_by_specialty.gph diffusion_by_rxvol.gph, rows(2) iscale(*0.8) ysize(9) xsize(6.85) saving(diffusions_2, replace)
graph export diffusions_2.eps, logo(off) replace
!epstopdf diffusions_2.eps

graph combine diffusion_by_medschool.gph diffusion_by_region.gph, rows(2) iscale(*0.8) ysize(9) xsize(6.85) saving(diffusions_3, replace)
graph export diffusions_3.eps, logo(off) replace
!epstopdf diffusions_3.eps

clear

***************************************************************************************
*Figure 2: Drug diffusion curves, by drug
***************************************************************************************

use masterfile_1_new
 
keep me_number Rx Z_pt zipcode year_month drug_name 
 
encode drug_name, gen(drug_num)
 
egen first_rx_date = min(year_month) if Rx != 0, by(me_number drug_num)
egen max_first_rx_date = max(first_rx_date), by(me_number drug_num)
summ max_first_rx_date if drug_name == "SIMVASTATIN"
 
gen firstyear = substr(string(max_first_rx_date),1,4)
destring firstyear, replace
gen firstmonth = substr(string(max_first_rx_date),5,2)
destring firstmonth, replace
gen first_rx_date_in_months =12*(firstyear - 2000) + firstmonth

gen month = substr(string(year_month),5,2)
destring month, replace
gen year = substr(string(year_month),1,4)
destring year, replace
gen date_in_months =12*(year - 2000) + month

save tempo1.dta, replace

gen Rx_yet_indicator = 1
replace Rx_yet_indicator = 0 if date_in_months <= first_rx_date_in_months
gen flag = 1

tab Rx_yet_indicator if drug_name == "CRESTOR" & year_month == 200308
tab Rx_yet_indicator if drug_name == "CRESTOR" & year_month == 200408
tab Rx_yet_indicator if drug_name == "CRESTOR" & year_month == 200508
tab Rx_yet_indicator if drug_name == "CRESTOR" & year_month == 201012

egen num_doctors = sum(flag), by(drug_name year_month)
egen num_adopters = sum(Rx_yet_indicator), by(drug_name year_month)
gen adoption_share = num_adopters / num_doctors
summ adoption_share, detail

save tempo10.dta, replace

summ adoption_share if drug_name == "CRESTOR" & year_month == 200308
summ adoption_share if drug_name == "CRESTOR" & year_month == 201012

keep year_month adoption_share drug_name date_in_months year month
duplicates drop

save tempo11.dta, replace

gen date_t = ym(year, month)
format date_t %tm

twoway (line adoption_share date_t if drug_name == "CRESTOR") (line adoption_share date_t if drug_name == "SIMVASTATIN") (line adoption_share date_t if drug_name == "LOVASTATIN") (line adoption_share date_t if drug_name == "PRAVASTATIN") (line adoption_share date_t if drug_name == "ZETIA") (line adoption_share date_t if drug_name == "VYTORIN") (line adoption_share date_t if drug_name == "LOVAZA") (line adoption_share date_t if drug_name == "ADVICOR") (line adoption_share date_t if drug_name == "ALTOPREV") (line adoption_share date_t if drug_name == "LESCOL XL") (line adoption_share date_t if drug_name == "SIMCOR") (line adoption_share date_t if drug_name == "PRAVIGARD PAC"), ytitle("Percent adopted") ttitle("Year") tlabel(2000m1(12)2010m1,format(%tmCY)) legend(on) legend(label(1 "Crestor") label(2 "Simvastatin") label(3 "Lovastatin") label(4 "Pravastatin")  label(5 "Zetia") label(6 "Vytorin") label(7 "Lovaza") label(8 "Advicor") label(9 "Altoprev") label(10 "Lescol XL") label(11 "Simcor") label(12 "Pravigard PAC")) graphregion(color(white))
graph export "adopt_by_year.pdf", replace

twoway (line adoption_share date_t if drug_name == "CRESTOR") (line adoption_share date_t if drug_name == "SIMVASTATIN") (line adoption_share date_t if drug_name == "LOVASTATIN") (line adoption_share date_t if drug_name == "PRAVASTATIN") (line adoption_share date_t if drug_name == "ZETIA") (line adoption_share date_t if drug_name == "VYTORIN") (line adoption_share date_t if drug_name == "LOVAZA") (line adoption_share date_t if drug_name == "ADVICOR") (line adoption_share date_t if drug_name == "ALTOPREV") (line adoption_share date_t if drug_name == "LESCOL XL") (line adoption_share date_t if drug_name == "SIMCOR") (line adoption_share date_t if drug_name == "PRAVIGARD PAC"), legend(off) ytitle("Percent adopted",size(small)) ttitle("Year",size(small)) tlabel(2000m1(12)2010m1,format(%tmCY) labsize(small)) graphregion(color(white)) xsize(7) ysize(4) 
graph export "adopt_by_year_no_legend.pdf", replace

clear

***************************************************************************************
*Figure 3: Drug diffusion maps, Crestor 
***************************************************************************************

use masterfile_1_new

keep if drug_name == "CRESTOR"

gen year = substr(string(year_month),1,4)
destring year, replace
gen month = substr(string(year_month),5,2)
destring month, replace
gen date_in_months = 12*year + month
gen months_since_new = date_in_months - 12*2003 - 8

xtset me_number months

gen cumulative_rx = 0

egen cum_rx = sum(Rx) if months <= 3, by(me_number)
replace cumulative_rx = cum_rx if months == 3
drop cum_rx

egen cum_rx = sum(Rx) if months <= 6, by(me_number)
replace cumulative_rx = cum_rx if months == 6
drop cum_rx

egen cum_rx = sum(Rx) if months <= 12, by(me_number)
replace cumulative_rx = cum_rx if months == 12
drop cum_rx

egen cum_rx = sum(Rx) if months <= 36, by(me_number)
replace cumulative_rx = cum_rx if months == 36
drop cum_rx

gen rx_flag = 1
replace rx_flag = 0 if cumulative_rx == 0 | Rx == .
gen rx_flag_date = rx_flag*year_month
replace rx_flag_date = 300000 if rx_flag == 0

egen min_rx_date = min(rx_flag_date), by(me_number)
gen ever_prescribed_flag = 1
replace ever_prescribed_flag = 0 if year_month < min_rx_date

keep if months == 3 | months == 6 | months == 12 | months == 36

tab year_month

keep ever_prescribed_flag rx_flag Rx me_number year_month zipcode

egen num_rx_zipcode_month = sum(ever_prescribed_flag), by(zipcode year_month)
drop me_number Rx rx_flag ever_p
duplicates drop
gen rx_flag = 1 
replace rx_flag = 0 if num_rx_zipcode_month == 0
sort zipcode

save crestor_diffusion.dta, replace

preserve
keep if year_month == 200309
sort zipcode
save crestor_diffusion_200309.dta, replace
restore

preserve
keep if year_month == 200311
sort zipcode
save crestor_diffusion_200311.dta, replace
restore

preserve
keep if year_month == 200402
sort zipcode
save crestor_diffusion_200402.dta, replace
restore

preserve
keep if year_month == 200408
sort zipcode
save crestor_diffusion_200408.dta, replace
restore

preserve
keep if year_month == 200608
sort zipcode
save crestor_diffusion_200608.dta, replace
restore

clear

use zipcodemap.dta
count
merge zipcode using crestor_diffusion_200311.dta
tab _merge
drop if _merge == 2
drop _merge
count
replace rx_flag = 0 if rx_flag == .
spmap rx_flag using allzipcodes, id(_ID) fcolor(Blues) ocolor(none ..) 
graph export us_zip_map_rx_flag_200311.pdf, replace
clear

use zipcodemap.dta
count
merge zipcode using crestor_diffusion_200402.dta
tab _merge
drop if _merge == 2
drop _merge
count
replace rx_flag = 0 if rx_flag == .
spmap rx_flag using allzipcodes, id(_ID) fcolor(Blues) ocolor(none ..) 
graph export us_zip_map_rx_flag_200402.pdf, replace
clear

use zipcodemap.dta
count
merge zipcode using crestor_diffusion_200408.dta
tab _merge
drop if _merge == 2
drop _merge
count
replace rx_flag = 0 if rx_flag == .
spmap rx_flag using allzipcodes, id(_ID) fcolor(Blues) ocolor(none ..) 
graph export us_zip_map_rx_flag_200408.pdf, replace
clear

use zipcodemap.dta
count
merge zipcode using crestor_diffusion_200608.dta
tab _merge
drop if _merge == 2
drop _merge
count
replace rx_flag = 0 if rx_flag == .
spmap rx_flag using allzipcodes, id(_ID) fcolor(Blues) ocolor(none ..) 
graph export us_zip_map_rx_flag_200608.pdf, replace
clear

use masterfile_1_new

keep if drug_name == "CRESTOR"
keep if year_month < 200310
keep if year_month > 200307

gen year = substr(string(year_month),1,4)
destring year, replace
gen month = substr(string(year_month),5,2)
destring month, replace
gen date_in_months = 12*year + month
gen months_since_new = date_in_months - 12*2003 - 8

xtset me_number months

gen cumulative_rx = 0

egen cum_rx = sum(Rx) if months <= 1, by(me_number)
replace cumulative_rx = cum_rx if months == 1
drop cum_rx

gen rx_flag = 1
replace rx_flag = 0 if cumulative_rx == 0 | Rx == .
gen rx_flag_date = rx_flag*year_month
replace rx_flag_date = 300000 if rx_flag == 0

egen min_rx_date = min(rx_flag_date), by(me_number)
gen ever_prescribed_flag = 1
replace ever_prescribed_flag = 0 if year_month < min_rx_date

keep if months == 1

tab year_month

keep ever_prescribed_flag rx_flag Rx me_number year_month zipcode

egen num_rx_zipcode_month = sum(ever_prescribed_flag), by(zipcode year_month)
drop me_number Rx rx_flag ever_p
duplicates drop
gen rx_flag = 1 
replace rx_flag = 0 if num_rx_zipcode_month == 0
sort zipcode

save crestor_diffusion_200309.dta, replace

use zipcodemap.dta
count
merge zipcode using crestor_diffusion_200309.dta
tab _merge
drop if _merge == 2
drop _merge
count
replace rx_flag = 0 if rx_flag == .
spmap rx_flag using allzipcodes, id(_ID) fcolor(Blues) ocolor(none ..) 
graph export us_zip_map_rx_flag_200309.pdf, replace

clear

***************************************************************************************
*Figures 4, 5, and 6: Event study graphs (with tables)
***************************************************************************************

use tempC2g.dta

*restrict to eventual adopters
egen max_Z_ptxUse = max(Z_ptxUse), by(me_number)
keep if max_Z_ptxUse == 1

sort me_number year_month
merge me_number year_month using generic_rx_share.dta
tab _merge 
keep if _m == 3
drop _m

sort me_number
merge me_number using join_year.dta
tab _merge
drop if _m == 2
drop _m

gen join_year = substr(string(max_join_yr),1,4)
destring join_year, replace
gen join_month = substr(string(max_join_yr),5,2)
destring join_month, replace
gen adopt_date_in_months = 12*join_year + join_month
replace adopt_date_in_months = 30000 if max_join_yr == .

gen Z_pt_minus3yrs = 0
replace Z_pt_minus3yrs = 1 if (date_in_months >= adopt_date_in_months - 36 & date_in_months < adopt_date_in_months - 24)
gen Z_pt_minus2yrs = 0
replace Z_pt_minus2yrs = 1 if (date_in_months >= adopt_date_in_months - 24 & date_in_months < adopt_date_in_months - 12)
gen Z_pt_minus1yrs = 0
replace Z_pt_minus1yrs = 1 if (date_in_months >= adopt_date_in_months - 12 & date_in_months < adopt_date_in_months) 
gen Z_pt_year = 0
replace Z_pt_year = 1 if (date_in_months >= adopt_date_in_months & date_in_months < adopt_date_in_months + 12) 
gen Z_pt_plus1yrs = 0
replace Z_pt_plus1yrs = 1 if (date_in_months >= adopt_date_in_months + 12 & date_in_months < adopt_date_in_months + 24) 
gen Z_pt_plus2yrs = 0
replace Z_pt_plus2yrs = 1 if (date_in_months >= adopt_date_in_months + 24 & date_in_months < adopt_date_in_months + 36) 
gen Z_pt_plus3yrs_more = 0
replace Z_pt_plus3yrs_more = 1 if (date_in_months >= adopt_date_in_months + 36) 

*include doctor-specific time trends
gen months_since_start = date_in_months - 12*2000 - 2

reghdfe num_drugs L1num_rx Z_pt_minus3yrs Z_pt_minus2yrs Z_pt_minus1yrs Z_pt_year Z_pt_plus1yrs Z_pt_plus2yrs Z_pt_plus3yrs_more, absorb(i.zipcode##i.year_month i.me_number##c.months_since_start) vce(cluster zipcode)

reghdfe hhi_rx L1num_rx Z_pt_minus3yrs Z_pt_minus2yrs Z_pt_minus1yrs Z_pt_year Z_pt_plus1yrs Z_pt_plus2yrs Z_pt_plus3yrs_more, absorb(i.zipcode##i.year_month i.me_number##c.months_since_start) vce(cluster zipcode)

reghdfe generic_rx_share L1num_rx Z_pt_minus3yrs Z_pt_minus2yrs Z_pt_minus1yrs Z_pt_year Z_pt_plus1yrs Z_pt_plus2yrs Z_pt_plus3yrs_more, absorb(i.zipcode##i.year_month i.me_number##c.months_since_start) vce(cluster zipcode)

*paste estimates into stata by hand, then build figures

save event_study_estimates.dta, replace

ren var1 adoptyear

label define adoptyear 1 "Adopt - 3" 2 "Adopt - 2" 3 "Adopt - 1" 4 "Adopt Year" 5 "Adopt + 1" 6 "Adopt + 2" 7 "Adopt + 3" 

label values adoptyear adoptyear

serrbar num_rx se_num_rx adoptyear, scale (1.96) yline(0) xlab(1(1)7) xlabel(, valuelabel) graphregion(color(white)) 

graph export "event_num_rx.pdf", replace

serrbar hhi se_hhi adoptyear, scale (1.96) yline(0) xlab(1(1)7) xlabel(, valuelabel) graphregion(color(white)) 

graph export "event_hhi.pdf", replace

serrbar gen_rx_share se_gen_rx_share adoptyear, scale (1.96) yline(0) xlab(1(1)7) xlabel(, valuelabel) graphregion(color(white)) 

graph export "event_gen_rx_share.pdf", replace

*add/edit axis labels in acrobat pro
*add lines in preview, drawing the lines in excel and copying them over

*without doctor-specific time trends
reghdfe num_drugs L1num_rx Z_pt_minus3yrs Z_pt_minus2yrs Z_pt_minus1yrs Z_pt_year Z_pt_plus1yrs Z_pt_plus2yrs Z_pt_plus3yrs_more, absorb(i.zipcode##i.year_month me_number) vce(cluster zipcode)

reghdfe hhi_rx L1num_rx Z_pt_minus3yrs Z_pt_minus2yrs Z_pt_minus1yrs Z_pt_year Z_pt_plus1yrs Z_pt_plus2yrs Z_pt_plus3yrs_more, absorb(i.zipcode##i.year_month me_number) vce(cluster zipcode)

reghdfe generic_rx_share L1num_rx Z_pt_minus3yrs Z_pt_minus2yrs Z_pt_minus1yrs Z_pt_year Z_pt_plus1yrs Z_pt_plus2yrs Z_pt_plus3yrs_more, absorb(i.zipcode##i.year_month me_number) vce(cluster zipcode)

clear

*all doctors with trends

use physician_characteristics_1.dta

summ max_join_yr, detail

keep me_number max_join_yr 
duplicates drop
sort me_number

save join_year.dta, replace

*extend to include generic rx share

use masterfile_1_new
keep me_number Rx year_month drug_name zipcode
egen max_zip = max(zipcode), by(me_number year_month)
replace zipcode = max_zip
drop max_zip

*generic rx share
egen num_rx = sum(Rx), by(me_number year_month)
gen generic_flag = 0
replace generic_flag = 1 if (drug_name == "SIMVASTATIN" | drug_name == "LOVASTATIN" | drug_name == "PRAVASTATIN")
egen num_rx_generic = sum(Rx*generic_flag), by(me_number year_month)
gen generic_rx_share = num_rx_generic / num_rx
summ generic_rx_share, detail

keep me_number year_month generic_rx_share 
duplicates drop
sort me_number year_month

save generic_rx_share.dta, replace

use tempC2g.dta

sort me_number year_month
merge me_number year_month using generic_rx_share.dta
tab _merge 
keep if _m == 3
drop _m

sort me_number
merge me_number using join_year.dta
tab _merge
drop if _m == 2
drop _m

gen join_year = substr(string(max_join_yr),1,4)
destring join_year, replace
gen join_month = substr(string(max_join_yr),5,2)
destring join_month, replace
gen adopt_date_in_months = 12*join_year + join_month
replace adopt_date_in_months = 30000 if max_join_yr == .

gen Z_pt_minus3yrs = 0
replace Z_pt_minus3yrs = 1 if (date_in_months >= adopt_date_in_months - 36 & date_in_months < adopt_date_in_months - 24)
gen Z_pt_minus2yrs = 0
replace Z_pt_minus2yrs = 1 if (date_in_months >= adopt_date_in_months - 24 & date_in_months < adopt_date_in_months - 12)
gen Z_pt_minus1yrs = 0
replace Z_pt_minus1yrs = 1 if (date_in_months >= adopt_date_in_months - 12 & date_in_months < adopt_date_in_months) 
gen Z_pt_year = 0
replace Z_pt_year = 1 if (date_in_months >= adopt_date_in_months & date_in_months < adopt_date_in_months + 12) 
gen Z_pt_plus1yrs = 0
replace Z_pt_plus1yrs = 1 if (date_in_months >= adopt_date_in_months + 12 & date_in_months < adopt_date_in_months + 24) 
gen Z_pt_plus2yrs = 0
replace Z_pt_plus2yrs = 1 if (date_in_months >= adopt_date_in_months + 24 & date_in_months < adopt_date_in_months + 36) 
gen Z_pt_plus3yrs_more = 0
replace Z_pt_plus3yrs_more = 1 if (date_in_months >= adopt_date_in_months + 36) 

gen months_since_start = date_in_months - 12*2000 - 2

reghdfe num_drugs L1num_rx Z_pt_minus3yrs Z_pt_minus2yrs Z_pt_minus1yrs Z_pt_year Z_pt_plus1yrs Z_pt_plus2yrs Z_pt_plus3yrs_more, absorb(i.zipcode##i.year_month i.me_number##c.months_since_start) vce(cluster zipcode)

reghdfe hhi_rx L1num_rx Z_pt_minus3yrs Z_pt_minus2yrs Z_pt_minus1yrs Z_pt_year Z_pt_plus1yrs Z_pt_plus2yrs Z_pt_plus3yrs_more, absorb(i.zipcode##i.year_month i.me_number##c.months_since_start) vce(cluster zipcode)

reghdfe generic_rx_share L1num_rx Z_pt_minus3yrs Z_pt_minus2yrs Z_pt_minus1yrs Z_pt_year Z_pt_plus1yrs Z_pt_plus2yrs Z_pt_plus3yrs_more, absorb(i.zipcode##i.year_month i.me_number##c.months_since_start) vce(cluster zipcode)

clear

***************************************************************************************
***************************************************************************************



***************************************************************************************
*Table A.2: Prescription Diversity and Propensity, All U.S. Physicians, 2000—2010	
***************************************************************************************

*diversity: columns 1 and 2

use tempC2g.dta

gen months_since_start = date_in_months - 12*2000 - 2

reghdfe num_drugs L1num_rx Z_ptxUse, absorb(i.zipcode##i.year_month i.me_number#c.months_since_start me_number) vce(cluster zipcode#month)

reghdfe hhi_rx L1num_rx Z_ptxUse, absorb(i.zipcode##i.year_month i.me_number#c.months_since_start me_number) vce(cluster zipcode#month)

clear

*column 3 

use tempor.dta

egen nobs = count(Rx_flag), by(zipcode year_month)

drop if nobs == 1

reghdfe Rx_flag L1num_rx Z_ptxUse_x_NewGeneric_2 Z_ptxUse_x_NewBranded_2 Z_ptxUse_x_OldGeneric_2 Z_ptxUse_x_OldBranded_2, absorb(drug_month_num i.zipcode##i.year_month me_number##c.months_since_start) vce(cluster zipcode) 

clear

************************************************************************************



***************************************************************************************
*Table A.4: Prescription Outcomes, Intensity of Use, 2000—2010	
***************************************************************************************

*column 1

use tempC2.dta

sort me_number year_month
merge me_number year_month using tempynew.dta
tab _merge
drop if _m == 2
drop _m

*repeat these results with three terciles
summ Use, detail
gen q12 = r(p75)
summ Use, detail
gen q23 = r(p90)
 
gen N_q1 = 1
replace N_q1 = 0 if Use > q12
 
gen N_q2 = 1
replace N_q2 = 0 if Use <= q12 
replace N_q2 = 0 if Use > q23 
 
gen N_q3 = 1
replace N_q3 = 0 if Use <= q23 
  
keep me_number N_q1-N_q3
duplicates drop
sort me_number
save lookupintensity2.dta, replace

use temp8j.dta

*merge the L1.num_rx variable, N_{it-t]
sort me_number drug_num
merge me_number drug_num using num_rx_drug_calc.dta
tab _merge
drop if _merge == 2
drop _merge

count

sort me_number drug_num
merge me_number drug_num using tempynew2.dta
tab _merge
keep if _merge == 3
drop _merge

count

sort me_number
merge me_number using lookupintensity2.dta
tab _merge
keep if _merge == 3
drop _merge

count

gen Z_drug_p_q1 = Z_drug_p*N_q1
gen Z_drug_p_q2 = Z_drug_p*N_q2
gen Z_drug_p_q3 = Z_drug_p*N_q3
 
gen Z_drug_p_q1_x_generic = Z_drug_p_q1*generic
gen Z_drug_p_q2_x_generic = Z_drug_p_q2*generic
gen Z_drug_p_q3_x_generic = Z_drug_p_q3*generic

count

save temp8k.dta, replace

use temp8k.dta

gen rx_within_one_year = 1
replace rx_within_one_year = 0 if time_to_first_rx > 12

drop num_obs
egen num_obs = count(rx_within_one_year), by(me_number)
drop if num_obs == 1
egen num_obs_zip = count(rx_within_one_year), by(zipcode drug_num)
drop if num_obs_z == 1

drop num_obs num_obs_z
egen num_obs = count(rx_within_one_year), by(me_number)
drop if num_obs == 1
egen num_obs_zip = count(rx_within_one_year), by(zipcode drug_num)
drop if num_obs_z == 1

*restrict sample to eventual adopters+users

egen max_Z_drug_p = max(Z_drug_p), by(me_number) 

keep if max_Z_drug_p == 1

drop num_obs num_obs_z
egen num_obs = count(rx_within_one_year), by(me_number)
drop if num_obs == 1
egen num_obs_zip = count(rx_within_one_year), by(zipcode drug_num)
drop if num_obs_z == 1

drop num_obs num_obs_z
egen num_obs = count(rx_within_one_year), by(me_number)
drop if num_obs == 1
egen num_obs_zip = count(rx_within_one_year), by(zipcode drug_num)
drop if num_obs_z == 1
 
*cluster by 5-digit zipcode

reghdfe rx_within_one_year L1num_rx_drug Z_drug_p_q1-Z_drug_p_q3 Z_drug_p_q1_x_generic-Z_drug_p_q3_x_generic, absorb(i.zipcode#i.drug_num) vce(cluster zipcode)

est sto a3

reghdfe rx_within_one_year L1num_rx_drug Z_drug_p_q1-Z_drug_p_q3 Z_drug_p_q1_x_generic-Z_drug_p_q3_x_generic, absorb(me_number i.zipcode#i.drug_num) vce(cluster zipcode)

est sto a4

estimates table a3 a4, b(%7.4f) se(%7.4f) t(%7.4f) p(%7.4f) stfmt(%7.4f) stats(N r2_a r2)

clear

*columns 2 and 3

use tempC2f4.dta

gen zip2 = substr(string(zipcode),1,2)
destring zip2, replace

gen zipcode_mo_fe = string(zip2) + "00000" + string(year_month)
destring zipcode_mo_fe, replace

gen months_since_start = date_in_months - 12*2000 - 2

egen max_Z_pt = max(Z_pt), by(me_number)

keep if max_Z_pt == 1

reghdfe num_drugs_month L1num_rx Z_pt_q1-Z_pt_q3, absorb(me_number##c.months_since_start i.zipcode##i.year_month) vce(cluster zipcode)
est sto b3

reghdfe hhi_rx L1num_rx Z_pt_q1-Z_pt_q3, absorb(me_number##c.months_since_start i.zipcode##i.year_month) vce(cluster zipcode)
est sto b4

clear

************************************************************************************



***************************************************************************************
*Table A.5: Prescription Propensity, Intensity of Use, 2000—2010	
***************************************************************************************

use tempB224.dta

gen months_since_start = date_in_months - 12*2000 - 2

sort me_number year_month
merge me_number year_month using num_rx_merge.dta
tab _merge

keep if _merge == 3

egen nobs = count(Rx_flag), by(zipcode year_month)

drop if nobs == 1

reghdfe Rx_flag L1num_rx Z_pt_q1_x_NewGeneric_2-Z_pt_q3_x_OldBranded_2, absorb(me_number drug_month_num i.zipcode##i.year_month) vce(cluster zipcode)

*doctor trends
reghdfe Rx_flag L1num_rx Z_pt_q1_x_NewGeneric_2-Z_pt_q3_x_OldBranded_2, absorb(me_number##c.months_since_start drug_month_num i.zipcode##i.year_month) vce(cluster zipcode)

egen max_Z_pt = max(Z_pt), by(me_number)
 
keep if max_Z_pt == 1

*doctor trends
reghdfe Rx_flag L1num_rx Z_pt_q1_x_NewGeneric_2-Z_pt_q3_x_OldBranded_2, absorb(me_number##c.months_since_start drug_month_num i.zipcode##i.year_month) vce(cluster zipcode)

clear

************************************************************************************



***************************************************************************************
*Table A.6, A.7, A.8: Second-stage estimates, first-stage estimates, all outcomes	
***************************************************************************************

*column 1, time to adoption

use temp8j.dta

*merge the L1.num_rx variable, N_{it-t]
sort me_number drug_num
merge me_number drug_num using num_rx_drug_calc.dta
tab _merge
drop if _merge == 2
drop _merge

gen rx_within_one_year = 1
replace rx_within_one_year = 0 if time_to_first_rx > 12

gen rx_within_six_months = 1
replace rx_within_six_months = 0 if time_to_first_rx > 6

gen rx_within_two_years = 1
replace rx_within_two_years = 0 if time_to_first_rx > 24

drop num_obs
egen num_obs = count(rx_within_one_year), by(me_number)
drop if num_obs == 1
egen num_obs_zip = count(rx_within_one_year), by(zipcode drug_num)
drop if num_obs_z == 1

drop num_obs num_obs_z
egen num_obs = count(rx_within_one_year), by(me_number)
drop if num_obs == 1
egen num_obs_zip = count(rx_within_one_year), by(zipcode drug_num)
drop if num_obs_z == 1

keep if max_Z_ptxUse == 1

drop if L1num_rx_drug == .

drop num_obs num_obs_zip
egen num_obs = count(rx_within_one_year), by(me_number)
drop if num_obs == 1
egen num_obs_zip = count(rx_within_one_year), by(zipcode drug_num)
drop if num_obs_z == 1

drop num_obs num_obs_z
egen num_obs = count(rx_within_one_year), by(me_number)
drop if num_obs == 1
egen num_obs_zip = count(rx_within_one_year), by(zipcode drug_num)
drop if num_obs_z == 1

sort me_number drug_num
xtset me_number drug_num

count

*cluster by 5-digit zipcode
reghdfe rx_within_one_year L1num_rx_drug (Z_drug_pxUse Z_drug_pxUse_x_generic = L1oth_adoption_share_zip_drug IV_drug_pxUse_x_generic), absorb(drug_num me_number) vce(cluster zipcode) ffirst
est sto a4

*Table A6, column 1
*first stage regressions

reghdfe Z_drug_pxUse L1num_rx_drug L1oth_adoption_share_zip_drug IV_drug_pxUse_x_generic, absorb(drug_num me_number)

reghdfe Z_drug_pxUse_x_generic L1num_rx_drug L1oth_adoption_share_zip_drug IV_drug_pxUse_x_generic, absorb(drug_num me_number)

clear

*columns 2 and 3, diversity

use tempC2g.dta

*so that IV and OLS sample sizes match:
egen nobs = count(num_drugs), by(zipcode year_month)
drop if nobs == 1

*with instruments and doctor-specific time trends

egen max_Z_ptxUse = max(Z_ptxUse), by(me_number)
keep if max_Z_ptxUse == 1

*so that IV and OLS sample sizes match:
drop nobs
egen nobs = count(num_drugs), by(zipcode year_month)
drop if nobs == 1

*with instruments and doctor-specific time trends

egen nobs = count(num_drugs), by(zipcode year_month)
drop if nobs == 1

reghdfe num_drugs L1num_rx (Z_ptxUse = L1oth_adoption_share_zip), absorb(year_month i.me_number##c.months_since_start) vce(cluster zipcode#year_month) ffirst

reghdfe hhi_rx L1num_rx (Z_ptxUse = L1oth_adoption_share_zip), absorb(year_month i.me_number##c.months_since_start) vce(cluster zipcode#year_month) ffirst

*first stage
reghdfe Z_ptxUse L1num_rx L1oth_adoption_share_zip, absorb(i.year_month i.me_number##c.months_since_start)

clear

*column 4, propensity

use tempor.dta

egen nobs = count(Rx_flag), by(zipcode year_month)

drop if nobs == 1

drop max_Z_ptxUse

egen max_Z_ptxUse = max(Z_ptxUse), by(me_number)

keep if max_Z_ptxUse == 1

drop nobs
egen nobs = count(Rx_flag), by(zipcode year_month)

drop if nobs == 1

reghdfe Rx_flag L1num_rx (Z_ptxUse_x_NewGeneric_2 Z_ptxUse_x_NewBranded_2 Z_ptxUse_x_OldGeneric_2 Z_ptxUse_x_OldBranded_2 = Z_IV_x_NewGeneric_2 Z_IV_x_NewBranded_2 Z_IV_x_OldGeneric_2 Z_IV_x_OldBranded_2), absorb(i.me_number##c.months_since_start drug_month_num) vce(cluster zipcode#month) ffirst

*first stages:

reghdfe Z_ptxUse_x_NewGeneric_2 L1num_rx Z_IV_x_NewGeneric_2 Z_IV_x_NewBranded_2 Z_IV_x_OldGeneric_2 Z_IV_x_OldBranded_2, absorb(i.me_number##c.months_since_start drug_month_num) 

reghdfe Z_ptxUse_x_NewBranded_2 L1num_rx Z_IV_x_NewGeneric_2 Z_IV_x_NewBranded_2 Z_IV_x_OldGeneric_2 Z_IV_x_OldBranded_2, absorb(i.me_number##c.months_since_start drug_month_num) 

reghdfe Z_ptxUse_x_OldGeneric_2 L1num_rx Z_IV_x_NewGeneric_2 Z_IV_x_NewBranded_2 Z_IV_x_OldGeneric_2 Z_IV_x_OldBranded_2, absorb(i.me_number##c.months_since_start drug_month_num) 

reghdfe Z_ptxUse_x_OldBranded_2 L1num_rx Z_IV_x_NewGeneric_2 Z_IV_x_NewBranded_2 Z_IV_x_OldGeneric_2 Z_IV_x_OldBranded_2, absorb(i.me_number##c.months_since_start drug_month_num) 

clear

***********************************************************************************************



***************************************************************************************
*Table A.9: Mandatory Substitution Laws by Zipcode, Time to First Prescription 
***************************************************************************************

use temp8j.dta

sort zipcode
merge zipcode using mandatory_sub_laws_2000.dta
tab _merge
drop if _merge == 2
drop _merge

*triple interaction
gen ZxUse_mand = Z_drug_pxUse*mandatory
gen ZxUsexgeneric_mand = Z_drug_pxUse_x_generic*mandatory
gen mandatory_generic = mandatory*generic

drop if ZxUse_mand == .

*merge the L1.num_rx variable, N_{it-t]
sort me_number drug_num
merge me_number drug_num using num_rx_drug_calc.dta
tab _merge
drop if _merge == 2
drop _merge

gen rx_within_one_year = 1
replace rx_within_one_year = 0 if time_to_first_rx > 12

gen rx_within_two_years = 1
replace rx_within_two_years = 0 if time_to_first_rx > 24

keep if max_Z_ptxUse == 1

drop num_obs 
egen num_obs = count(rx_within_one_year), by(zipcode drug_num)
drop if num_obs == 1
drop num_obs
egen num_obs = count(rx_within_one_year), by(me_number)
drop if num_obs == 1
drop num_obs 
egen num_obs = count(rx_within_one_year), by(zipcode drug_num)
drop if num_obs == 1
drop num_obs
egen num_obs = count(rx_within_one_year), by(me_number)
drop if num_obs == 1

count

reghdfe rx_within_one_year L1num_rx_drug Z_drug_pxUse ZxUse_mand Z_drug_pxUse_x_generic ZxUsexgeneric_mand mandatory_generic, absorb(me_number i.drug_num) vce(cluster zipcode)

*split-sample results

preserve

keep if mandatory == 1

drop if L1num_rx_drug == .

drop num_obs 
egen num_obs = count(rx_within_one_year), by(drug_num)
drop if num_obs == 1
drop num_obs
egen num_obs = count(rx_within_one_year), by(me_number)
drop if num_obs == 1
drop num_obs 
egen num_obs = count(rx_within_one_year), by(drug_num)
drop if num_obs == 1
drop num_obs
egen num_obs = count(rx_within_one_year), by(me_number)
drop if num_obs == 1

reghdfe rx_within_one_year L1num_rx_drug Z_drug_pxUse Z_drug_pxUse_x_generic, absorb(me_number i.drug_num) vce(cluster zipcode)

restore

keep if mandatory == 0

drop if L1num_rx_drug == .

drop num_obs 
egen num_obs = count(rx_within_one_year), by(drug_num)
drop if num_obs == 1
drop num_obs
egen num_obs = count(rx_within_one_year), by(me_number)
drop if num_obs == 1
drop num_obs 
egen num_obs = count(rx_within_one_year), by(drug_num)
drop if num_obs == 1
drop num_obs
egen num_obs = count(rx_within_one_year), by(me_number)
drop if num_obs == 1

reghdfe rx_within_one_year L1num_rx_drug Z_drug_pxUse Z_drug_pxUse_x_generic, absorb(drug_num me_number) vce(cluster zipcode)

clear

************************************************************************************************
*Table A.10: Medical Innovation by Zipcode, Time to First Prescription, U.S. Physicians, 2000—2010	
************************************************************************************************	

*counted patents granted between 1975 and 1999 by 4-digit zipcode (pharma patents, all patents): "patents_by_zip4.dta"

use temp8j.dta

gen zip4 = substr(string(zipcode),1,4)
destring zip4, replace

sort zip4
merge zip4 using "patents_by_zip4.dta"
tab _merge
drop if _m==2
drop _merge

replace num_pharma_patents = 0 if num_pharma_patents == .
replace num_patents = 0 if num_patents == .

summ num_pharma_patents, detail
gen academic_cutoff = r(p95)
gen nonacademic_cutoff = r(p5)

*merge the L1.num_rx variable, N_{it-t]
sort me_number drug_num
merge me_number drug_num using num_rx_drug_calc.dta
tab _merge
drop if _merge == 2
drop _merge

gen rx_within_one_year = 1
replace rx_within_one_year = 0 if time_to_first_rx > 12

gen rx_within_two_years = 1
replace rx_within_two_years = 0 if time_to_first_rx > 24

keep if max_Z_ptxUse == 1

drop num_obs 
egen num_obs = count(rx_within_one_year), by(zipcode drug_num)
drop if num_obs == 1
drop num_obs
egen num_obs = count(rx_within_one_year), by(me_number)
drop if num_obs == 1
drop num_obs 
egen num_obs = count(rx_within_one_year), by(zipcode drug_num)
drop if num_obs == 1
drop num_obs
egen num_obs = count(rx_within_one_year), by(me_number)
drop if num_obs == 1

*split-sample results for high vs. low pharma patenting

preserve

keep if num_pharma_patents > academic_cutoff

keep if max_Z_ptxUse == 1

drop num_obs
drop if L1num_rx_drug == .

egen num_obs = count(log_T), by(me_number)
drop if num_obs == 1
drop num_obs

egen num_obs = count(log_T), by(drug_num)
drop if num_obs == 1
drop num_obs

egen num_obs = count(log_T), by(me_number)
drop if num_obs == 1
 
drop num_obs 
egen num_obs = count(rx_within_one_year), by(drug_num)
drop if num_obs == 1
drop num_obs
egen num_obs = count(rx_within_one_year), by(me_number)
drop if num_obs == 1

reghdfe rx_within_one_year L1num_rx_drug Z_drug_pxUse Z_drug_pxUse_x_generic, absorb(drug_num me_number) vce(cluster zipcode)

restore

preserve

keep if num_pharma_patents <= nonacademic_cutoff

keep if max_Z_ptxUse == 1

drop num_obs
drop if L1num_rx_drug == .

egen num_obs = count(log_T), by(me_number)
drop if num_obs == 1
drop num_obs

egen num_obs = count(log_T), by(drug_num)
drop if num_obs == 1
drop num_obs

egen num_obs = count(log_T), by(me_number)
drop if num_obs == 1
 
drop num_obs 
egen num_obs = count(rx_within_one_year), by(drug_num)
drop if num_obs == 1
drop num_obs
egen num_obs = count(rx_within_one_year), by(me_number)
drop if num_obs == 1

reghdfe rx_within_one_year L1num_rx_drug Z_drug_pxUse Z_drug_pxUse_x_generic, absorb(drug_num me_number) vce(cluster zipcode)

restore

*consider triple interactions with continuous measure

gen log_pats = log(num_pharma_patents)
replace log_pats = 0 if num_pharma_patents == 0

gen ZxUse_x_log_pats = Z_drug_pxUse*log_pats
gen ZxUse_x_generic_log_pats = Z_drug_pxUse_x_generic*log_pats
gen log_pats_x_generic = log_pats*generic

preserve

keep if max_Z_ptxUse == 1

drop num_obs
drop if L1num_rx_drug == .

egen num_obs = count(log_T), by(me_number)
drop if num_obs == 1
drop num_obs

egen num_obs = count(log_T), by(zipcode drug_num)
drop if num_obs == 1
drop num_obs

egen num_obs = count(log_T), by(me_number)
drop if num_obs == 1
 
drop num_obs 
egen num_obs = count(rx_within_one_year), by(zipcode drug_num)
drop if num_obs == 1
drop num_obs
egen num_obs = count(rx_within_one_year), by(me_number)
drop if num_obs == 1

reghdfe rx_within_one_year L1num_rx_drug Z_drug_pxUse ZxUse_x_log_pats Z_drug_pxUse_x_generic ZxUse_x_generic_log_pats log_pats_x_generic, absorb(drug_num me_number) vce(cluster zipcode)

restore

*discrete categories
gen high_pats = 1
replace high_pats = 0 if num_pharma_patents <= academic_cutoff
gen low_pats = 0
replace low_pats = 1 if num_pharma_patents <= nonacademic_cutoff

gen ZxUse_x_high_pats = Z_drug_pxUse*high_pats
gen ZxUse_x_generic_high_pats = Z_drug_pxUse_x_generic*high_pats
gen high_pats_x_generic = high_pats*generic

gen ZxUse_x_low_pats = Z_drug_pxUse*low_pats
gen ZxUse_x_generic_low_pats = Z_drug_pxUse_x_generic*low_pats
gen low_pats_x_generic = low_pats*generic

keep if max_Z_ptxUse == 1

drop num_obs
drop if L1num_rx_drug == .

egen num_obs = count(log_T), by(me_number)
drop if num_obs == 1
drop num_obs

egen num_obs = count(log_T), by(zip3 drug_num)
drop if num_obs == 1
drop num_obs

egen num_obs = count(log_T), by(me_number)
drop if num_obs == 1
 
drop num_obs 
egen num_obs = count(rx_within_one_year), by(zip3 drug_num)
drop if num_obs == 1
drop num_obs
egen num_obs = count(rx_within_one_year), by(me_number)
drop if num_obs == 1

reghdfe rx_within_one_year L1num_rx_drug Z_drug_pxUse ZxUse_x_high_pats ZxUse_x_low_pats Z_drug_pxUse_x_generic ZxUse_x_generic_high_pats ZxUse_x_generic_low_pats high_pats_x_generic low_pats_x_generic, absorb(drug_num me_number) vce(cluster zipcode)

clear
	
***************************************************************************************
*Table A.11: Information and Prescribing Heterogeneity Among U.S. Physicians
***************************************************************************************	

use masterfile_1_new

keep if year_month == 201012 | year_month == 200001
tab Z_pt if year_month == 200001
tab Z_pt if year_month == 201012

gen Z2000 = Z_pt if year_month == 200001
egen Z_pt_2000 = max(Z2000), by(me_number)
drop if Z_pt_2000 == 1

gen Z2010 = Z_pt if year_month == 201012
egen Z_pt_2010 = max(Z2010), by(me_number)

*assign doctors based on their database status in 2010
keep me_number drug_name Rx Z_pt_2010 zipcode year_month

save temp.dta, replace

egen num_rx = sum(Rx), by(me_number year_month)
gen rx_share = Rx / num_rx

*create average rx vector separately for Z = 0 and Z = 1 
egen mean_rx_unit_vector = mean(rx_share), by(Z_pt drug_name year_month)

*compute the euclidian distance (sum of squared differences) between each doctor's rx_share and this average
gen delta_rx_share = rx_share - mean_rx_unit_vector
egen rx_distance = sum(delta_rx_share*delta_rx_share), by(me_number year_month)

keep rx_distance Z_pt_2010 me_number zipcode num_rx year_month
duplicates drop

summ rx_distance if Z_pt == 1 & year_month == 201012, detail
summ rx_distance if Z_pt == 1 & year_month == 200001, detail
summ rx_distance if Z_pt == 0 & year_month == 201012, detail
summ rx_distance if Z_pt == 0 & year_month == 200001, detail

*is the difference in means across Z = 0 and Z = 1 significant?  

reg rx_distance Z_pt if year_month == 201012
areg rx_distance Z_pt if year_month == 201012, absorb(zipcode)
reg rx_distance Z_pt if year_month == 200001
areg rx_distance Z_pt if year_month == 200001, absorb(zipcode)

*is the difference in differences of means across Z = 0 and Z = 1 and t = 200001 vs 201012 significant?  
gen post = 1
replace post = 0 if year_month == 200001

gen Z_pt_x_post = Z_pt*post

areg rx_distance Z_pt_x_post post, absorb(me_number)

clear

*difference in differences - lipitor:
use masterfile_1_new

keep if year_month == 201012 | year_month == 200001
tab Z_pt if year_month == 200001
tab Z_pt if year_month == 201012

gen Z2000 = Z_pt if year_month == 200001
egen Z_pt_2000 = max(Z2000), by(me_number)
drop if Z_pt_2000 == 1

gen Z2010 = Z_pt if year_month == 201012
egen Z_pt_2010 = max(Z2010), by(me_number)

*assign doctors based on their database status in 2010
keep me_number drug_name Rx Z_pt_2010 zipcode year_month

save temp.dta, replace

gen lipitor_flag = 0
replace lipitor_flag = 1 if (drug_name == "LIPITOR")

egen num_rx = sum(Rx), by(me_number)
egen lipitor_rx = sum(Rx*lipitor_flag), by(me_number year_month)
gen rx_share = lipitor_rx / num_rx

drop if rx_share == .

keep rx_share me_number Z_pt zipcode year_month
duplicates drop

*create average rx vector separately for Z = 0 and Z = 1 
egen mean_rx_unit_vector = mean(rx_share), by(Z_pt year_month)

*compute the euclidian distance (sum of squared differences) between each doctor's rx_share and this average
gen delta_rx_share = rx_share - mean_rx_unit_vector
gen rx_distance = delta_rx_share*delta_rx_share

keep rx_distance mean_rx_unit_vector Z_pt me_number zipcode year_month
duplicates drop

summ rx_distance if Z_pt == 1 & year_month == 201012, detail
summ rx_distance if Z_pt == 1 & year_month == 200001, detail
summ rx_distance if Z_pt == 0 & year_month == 201012, detail
summ rx_distance if Z_pt == 0 & year_month == 200001, detail

*is the difference in means across Z = 0 and Z = 1 significant? 
reg rx_distance Z_pt if year_month == 201012

areg rx_distance Z_pt if year_month == 201012, absorb(zipcode)

reg rx_distance Z_pt if year_month == 200001

areg rx_distance Z_pt if year_month == 200001, absorb(zipcode)

*is the difference in differences of means across Z = 0 and Z = 1 and t = 200001 vs 201012 significant?  
gen post = 1
replace post = 0 if year_month == 200001

gen Z_pt_x_post = Z_pt*post

areg rx_distance Z_pt_x_post post, absorb(me_number)

clear				

***************************************************************************************
***************************************************************************************
