* Create data set for doing GT2017 at hospital level

use "data/EUdata_agg_raw", clear
keep manufacturer product yyyy mm age clinical USlater pct_life_profit profit_lifetime pay USeventual DES enterEU
order manufacturer product yyyy mm age clinical USlater pct_life_profit profit_lifetime pay USeventual DES enterEU
sort manufacturer product yyyy mm
merge 1:m manufacturer product yyyy mm using "data/EUdata_h_raw"

gen m2=(_merge==2)
gen qlost=m2*q
bysort lab_id yyyy mm: egen Qlost=total(qlost)
su Qlost if _merge==3, detail
gen pctlost=Qlost/Q
su pctlost if _merge==3, detail

keep if _merge==3
*drop if Qlost>0

su Q
drop Q
bysort lab_id yyyy mm: egen Q=total(q)
su Q
drop if Q==0 //implies all q=0 so nothing affected
drop if Q<=40 //sampling error in s0ht and sght for very small hospitals
bysort manufacturer product yyyy mm: egen Qjt=total(q)
su Qjt
drop if Qjt==0 //implies all q=0 so nothing affected
bysort manufacturer product yyyy mm: egen Hjt=total(1-zeros)
bysort manufacturer product: egen Hj=total(1-zeros)
su Hjt Hj, detail
bysort lab_id yyyy mm: egen Hdrop_Hjt=total(Hjt<=1)
*drop if Hdrop_Hjt==1 //identification problems as zeros get too many?
bysort manufacturer product: egen Tj=total((1-zeros)/Hjt)
su Tj, detail
bysort lab_id yyyy mm: egen Hdrop_Tj=total(Tj<=2)
*drop if Hdrop_Tj==1 //identifying jFE 
bysort manufacturer product: egen Tj2=total((1-zeros)/Hjt)
su Tj2

su Hjt Qjt Q

********************************************************************
* calculate shares and indices on final data set and save/export

egen t=group(yyyy mm)

gen sj=q/M
gen s0=1-Q/M
bysort lab_id t DES: egen sght = total(sj)
gen sjg = sj/sght

bysort lab_id t: egen Jht = total(1)
bysort t: egen Mt=total(M/Jht)
gen w_ht = M/Mt
bysort t: egen Ht=total(1/Jht)

* aggregate shares

*---------------------
* old ages not where we want to do estimation
*drop if age>36
gen old=(age>36)
*---------------------

*indices
*egen j=group(product)
egen j=group(product old)
egen tj=group(t j)
egen tg = group(t DES) // nest group
egen h=group(lab_id)
egen th=group(t h)
egen thg=group(th DES)
egen jh=group(j h)

sort tj h
gen id = _n
sort jh t
by jh: gen id_p1 = id[_n+1]
replace id_p1 = id if id_p1==.

bysort t h DES: egen J_ght = total(1)
bysort th: egen J_ht = total(1)
bysort tj: egen J_ght_Tot_tj = total(J_ght)
bysort tj: egen J_ht_Tot_tj = total(J_ht)

* for QW hospital heterogeneity analysis
sort tj h
outsheet t j h tj th Q w_ht s0 sght thg Ht DES jh old id_p1 id zeros M p J_ght J_ht using "data/EUdata_h.csv", replace 

drop _merge
su t h j th tj Q w_ht s0 
save "data/EUdata_h", replace
********************************************************************
* collapse to aggregate level

gen sj_gt0 = sj if sj>0

sort jh t
by jh: gen sj_p1 = sj_gt0[_n+1]
gen Dsjht_gt0 = sj_p1-sj if sj>0 & sj_p1>0

gen rev=p*q
gen p_w=p*w_ht
gen sj_w = sj*w_ht
gen s0_w = s0*w_ht
gen sjg_w = sjg*w_ht
gen sgt_w = sght*w_ht

count
collapse (min) enterEU (mean) J_ght_Tot_tj J_ht_Tot_tj age old clinical USlater pct_life_profit profit_lifetime pay DES USeventual zeros Mt Ht pavg=p (sum) q M rev w_ht p_w sj_w s0_w sjg_w sgt_w (sd) Vs=sj_gt0 VDs=Dsjht_gt0, by(tj tg t j manufacturer product)
count

replace Vs=0 if Vs==.
replace VDs=0 if VDs==.

gen p = rev/q
*replace p_w=p

gen sj=q/M
bysort t: egen Q=total(q)
gen s0=1-Q/M
bysort t DES: egen sght = total(sj)
gen sjg = sj/sght


*IV for p an NLogit
sort j t
by j: gen p_m1 = p_w[_n-1]
by j: gen sjg_m1 = sjg[_n-1]
replace p_m1 = p_w if p_m1==.
replace sjg_m1 = sjg if sjg_m1==.
bysort t DES: egen Jg=total(1)

sort tj
outsheet t j tj age clinical USlater USeventual profit_lifetime pay DES tg zeros s0 sj sjg p_w Mt p_m1 sjg_m1 Jg Ht Vs VDs old J_ght_Tot_tj J_ht_Tot_tj pct_life_profit using "data/EUdata_agg.csv", replace
save "data/EUdata_agg", replace

bysort j: egen Tj=total(1)
su j t s0 sj p pavg p_w p_m1 age clinical zeros w_ht Tj
bysort DES: su zeros Jg

* EU entry dates
preserve
collapse enterEU , by(manufacturer product)
sort manufacturer product
save data/EUdata_entrydates, replace
restore


*simple aggregate regressions
gen lnsjs0 = log(sj/s0)
gen lnsjg = log(sjg)
gen lnsjg_des = DES*lnsjg
gen lnsjg_bms = (1-DES)*lnsjg
replace Jg = log(Jg)
gen Jg2 = Jg^2
gen Jg_des = DES*Jg
gen Jg2_des = DES*Jg2
gen Jg_bms = (1-DES)*Jg
gen Jg2_bms = (1-DES)*Jg2
drop Jg Jg2 lnsjg
quietly tab age, gen(aFE)
su age
forvalues i = 1/`r(max)' { 
	gen aEUtrials_FE`i'=aFE`i'*USeventual 
	}
reghdfe lnsjs0 lnsjg_des lnsjg_bms p_w aFE1-aFE24 aEUtrials_FE1-aEUtrials_FE24 if age<=30, a(j) cluster(t)
reghdfe lnsjs0 aFE1-aFE24 aEUtrials_FE1-aEUtrials_FE24 (lnsjg* p_w = Jg* p_m1) if age<=30, a(j) cluster(t)
bysort j: egen mc = min(p_w)
replace mc = .9*mc
gen Ep = DES * _b[p_w] * (p_w-mc) * ( 1/(1-_b[lnsjg_des]) - _b[lnsjg_des]/(1-_b[lnsjg_des])*sjg - sj) ///
	+ (1-DES) * _b[p_w] * (p_w-mc) * ( 1/(1-_b[lnsjg_bms]) - _b[lnsjg_bms]/(1-_b[lnsjg_bms])*sjg - sj)
su Ep, detail



