## ======================================
## Table 5: Bias from omitting two-way interaction terms with CA 
## as the control group, with and without campylobacter in the disease selection.
## ======================================

  rm(list=ls())
  
  #Assume base R is running
  my.wd <- getSrcDirectory(function(x){x})
  
  #Check for errors
  if(grepl("error", tolower(class(my.wd)[1])) | my.wd==""){
    #Try to access working directory through R Studio API
    my.wd <- tryCatch(dirname(rstudioapi::getActiveDocumentContext()$path),
                      error = function(e) e)
    
  }
  
  #Set working directory
  setwd(my.wd)


## ======================================
## Load packages and functions
## ======================================

  #Read in function to reshape data for modeling
  source("Functions/prepModelData.R")
  source("Functions/installPackageNotFound.R")
  source("Functions/formatSig.R")
  source("Functions/calculateFRatio.R")
  
  #Libraries for modeling
  installPackageNotFound("data.table")
  installPackageNotFound("plm")
  installPackageNotFound("stargazer")
  installPackageNotFound("lmtest")
  
## ======================================
## Read in data
## ======================================
  
  #Hospitalizations
  hosp.zip3.qtr.1983.2009 <- read.csv("../Data/hosp_zip3_quarter_1983_2009.csv", stringsAsFactors = FALSE)
  
## ==========================
## Run models with and without campylobacter in disease selection
## ==========================  

  #Observation window lists
  obs.window.list <- list(c(1995:1999), c(1993:2009))
  disease.list <- list("orig" = c("jl.codes.mdc6", "control.jl.codes"),
                       "camp" = c("jl.codes.camp", "control.jl.codes.camp"))
  
  #Loop through combinations of observation windows and disease selections
  for(i in 1:length(disease.list)){
    for(obs.window in obs.window.list){
      
      ## JL disease selection no camp
      dta.agg.jl <- prepModelData(dta = hosp.zip3.qtr.1983.2009, 
                                  fbi.name = disease.list[[i]][1], 
                                  control.name = disease.list[[i]][2], 
                                  years = obs.window,
                                  control.grp = TRUE,
                                  filter = FALSE)
      
      ## Grab label for disease type
      disease.label <- names(disease.list)[i]
      
      ## ======================================
      ## Run all models
      ## ======================================
      
      #Number of observations (same for all models)
      n.obs <- nrow(dta.agg.jl)
      
      #Run JL Model with no campylobacter
      jl.model <- plm(logCount ~ I(disease.type*m) + I(disease.type*v) + m + v , 
                      index = c("geo.type.pk", "year.qtr"), 
                      model="within", effect="twoways", data= dta.agg.jl)
      
      ## Adjust standard errors to cluster robust standard errors
      adjusted.se <- coeftest(jl.model, vcov=vcovHC(jl.model, type="HC0", cluster="group"))
      
      #Specify models with fixed effects to adjust degrees of freedom
      jl.lm <- lm(logCount ~ m + v + I(disease.type*m) + I(disease.type*v) +
                    factor(year.qtr) + factor(geo.type.pk), data = dta.agg.jl)
      
      #Calculate r2 with fixed effects
      r2 <- formatSig(summary(jl.lm)$r.squared, 2)
      
      #Triple diff no campylobacter
      jl.model.triple <- plm(logCount ~ I(disease.type*m) + I(disease.type*v) + I(as.numeric(year>=1998)*disease.type) + m + v , 
                             index = c("geo.type.pk", "year.qtr"), 
                             model="within", effect="twoways", data= dta.agg.jl)
      
      ## Adjust standard errors to cluster robust standard errors
      adjusted.se.triple <- coeftest(jl.model.triple, vcov=vcovHC(jl.model.triple, type="HC0", cluster="group"))
      
      #Specify models with fixed effects to adjust degrees of freedom
      triple.lm <- lm(logCount ~ m + v + I(disease.type*m) + I(disease.type*v) +
                        I(as.numeric(year>=1998)*disease.type) +
                        factor(year.qtr) + factor(geo.type.pk), data = dta.agg.jl)
      
      #Calculate r2 with fixed effects
      r2.triple <- formatSig(summary(triple.lm)$r.squared, 2)
      
      #Save results
      assign(paste0("jl.model.", disease.label, ".",min(obs.window), ".", max(obs.window)), jl.model)
      assign(paste0("adjusted.se.", disease.label, ".",min(obs.window), ".", max(obs.window)), adjusted.se)
      assign(paste0("r2.", disease.label, ".", min(obs.window), ".", max(obs.window)), r2)
      
      assign(paste0("jl.model.triple.", disease.label, ".", min(obs.window), ".", max(obs.window)), jl.model.triple)
      assign(paste0("adjusted.se.triple.", disease.label, ".", min(obs.window), ".", max(obs.window)), adjusted.se.triple)
      assign(paste0("r2.triple.", disease.label, ".", min(obs.window), ".", max(obs.window)), r2.triple)
      
      assign(paste0("n.obs.", min(obs.window), ".", max(obs.window)), n.obs)
      
      assign(paste0("jl.lm.", disease.label, ".", min(obs.window), ".", max(obs.window)), jl.lm)
      assign(paste0("triple.lm.", disease.label, ".", min(obs.window), ".", max(obs.window)), triple.lm)
      
    }
  }


## ======================================
## Calculate F ratios
## ======================================
  
  #Original disease selection, 1995-1999
  f1 <- calculateFRatio(triple.lm.orig.1995.1999, jl.lm.orig.1995.1999)
  
  #Original disease selection, 1993-2009
  f2 <- calculateFRatio(triple.lm.orig.1993.2009, jl.lm.orig.1993.2009)
  
  #Camp disease selection, 1995-1999
  f3 <- calculateFRatio(triple.lm.camp.1995.1999, jl.lm.camp.1995.1999)
  
  #Camp disease selection, 1993-2009
  f4 <- calculateFRatio(triple.lm.camp.1993.2009, jl.lm.camp.1993.2009)
  
## ======================================
## Latex output
## ======================================

  #Labels
  cov.label <- c("Foodborne  x LA mandatory disclosure post-1998", 
                 "Foodborne x LA voluntary disclosure post-1998", 
                 "Foodborne x CA post-1998", 
                 "LA Mandatory disclosure post-1998 (Digestive)", 
                 "LA Voluntary disclosure post-1998 (Digestive)")
  title <- c("Table 5: Bias from omitting two-way interaction terms with CA as the control group, with and without campylobacter in the disease selection.")
  
  #Print table
  stargazer(jl.model.orig.1995.1999,
            jl.model.triple.orig.1995.1999,
            jl.model.orig.1993.2009,
            jl.model.triple.orig.1993.2009,
            jl.model.camp.1995.1999,
            jl.model.triple.camp.1995.1999,
            jl.model.camp.1993.2009,
            jl.model.triple.camp.1993.2009,
            coef = list(round(jl.model.orig.1995.1999$coefficients, 2), 
                        round(jl.model.triple.orig.1995.1999$coefficients, 2),
                        round(jl.model.orig.1993.2009$coefficients, 2), 
                        round(jl.model.triple.orig.1993.2009$coefficients, 2),
                        round(jl.model.camp.1995.1999$coefficients, 2), 
                        round(jl.model.triple.camp.1995.1999$coefficients, 2), 
                        round(jl.model.camp.1993.2009$coefficients, 2),
                        round(jl.model.triple.camp.1993.2009$coefficients, 2)),
            se = list(adjusted.se.orig.1995.1999[,2], 
                      adjusted.se.triple.orig.1995.1999[,2], 
                      adjusted.se.orig.1993.2009[,2], 
                      adjusted.se.triple.orig.1993.2009[,2], 
                      adjusted.se.camp.1995.1999[,2], 
                      adjusted.se.triple.camp.1995.1999[,2], 
                      adjusted.se.camp.1993.2009[,2], 
                      adjusted.se.triple.camp.1993.2009[,2]),
            p = list(adjusted.se.orig.1995.1999[,4], 
                     adjusted.se.triple.orig.1995.1999[,4], 
                     adjusted.se.orig.1993.2009[,4], 
                     adjusted.se.triple.orig.1993.2009[,4], 
                     adjusted.se.camp.1995.1999[,4], 
                     adjusted.se.triple.camp.1995.1999[,4], 
                     adjusted.se.camp.1993.2009[,4], 
                     adjusted.se.triple.camp.1993.2009[,4]),
            covariate.labels = cov.label,
            title = title,
            digits = 2,
            dep.var.labels = "",
            dep.var.caption = "",
            notes.append = FALSE,
            column.sep.width = "1pt",
            no.space = TRUE,
            omit.stat = c("rsq", "adj.rsq", "f"),
            notes.align = "l",
            type = "text",
            add.lines = list(c("R2", 
                                r2.orig.1995.1999,
                                r2.orig.1993.2009, 
                                r2.triple.orig.1995.1999,
                                r2.triple.orig.1993.2009,
                                r2.camp.1995.1999,
                                r2.camp.1993.2009,
                                r2.triple.camp.1995.1999,
                                r2.triple.camp.1993.2009),
                             c("F ratio", "", paste0(formatSig(f1[1], 2), ifelse(f1[2]<0.01, "***", 
                                                                                 ifelse(f1[2]<0.05, "**", 
                                                                                        ifelse(f1[2]<0.10, "*")))), 
                               "", paste0(formatSig(f2[1], 2), ifelse(f2[2]<0.01, "***", 
                                                                      ifelse(f2[2]<0.05, "**", 
                                                                             ifelse(f2[2]<0.10, "*")))), 
                               "", paste0(formatSig(f3[1], 2), ifelse(f3[2]<0.01, "***", 
                                                                      ifelse(f3[2]<0.05, "**", 
                                                                             ifelse(f3[2]<0.10, "*")))), "", 
                               paste0(formatSig(f4[1], 2), ifelse(f4[2]<0.01, "***", 
                                                                  ifelse(f4[2]<0.05, "**", 
                                                                         ifelse(f4[2]<0.10, "*")))))))
