/*******************************************************/

*	This program provides a an example of how the
	underlying data for the paper is created. The
	program will need modification depending on
	the format of the raw administrative tax data;

*	This program is written for SAS.;

/*******************************************************/
*	Tax forms:

	- T2 (Schdule 200)
	- Schedule 1
	- Schedule 31
	- Schedule 33
	- Schedule 34
	- Schedule 35
	- T661
	- Schedule 8
	- Schedule 100
	- Schedule 125


libname FINAL "PATH-WHERE-FINAL-DATA-SET-WILL-BE-STORED";

libname TYEAR2000 "PATH-2000";
libname TYEAR2001 "PATH-2001";
libname TYEAR2002 "PATH-2002";
libname TYEAR2003 "PATH-2003";
libname TYEAR2004 "PATH-2004";
libname TYEAR2005 "PATH-2005";
libname TYEAR2006 "PATH-2006";
libname TYEAR2007 "PATH-2007";
libname TYEAR1999 "PATH-1999";

/*******************************************************/
/*******************************************************/
/*******************************************************/
*	Part 1;
/*******************************************************/
/*******************************************************/
/*******************************************************/

%MACRO dat(y);
/*******************************************************/
*	Retreive data from T2.;

DATA firms;
	set TYEAR&y..T2;

	tyear			= &y;
	ccpc			= T2_40;
	constant_type		= T2_43;
	btyear			= T2_60;
	etyear			= T2_61;
	tyear_days		= (etyear - btyear)/(24*3600);	*	Dates initially in seconds;
	associated		= T2_160;
	T2_bl			= T2_410;
	large_corp_tax		= T2_415;
	T2_rbl			= T2_425;
	TY			= T2_360 - T2_370;
	prov			= T2_750;
	T2			= 1;
	if tyear <= 2005 then T2_710 = 0;

	keep 	firmid			app_date	tyear
			CCPC		constant_type	btyear
			etyear		tyear_days	associated		
			T2_bl		T2_rbl		TY
			prov		naics		T2
			large_corp_tax	T2_400		T2_405;
RUN;

/*	Drop observations where firm files multiple times on same day.	*/
PROC sort data = firms	out = firms;
	by firmid app_date;
RUN;
PROC freq data = firms;
	by firmid;
	tables app_date / noprint out = temp;
RUN;
DATA firms;
	merge firms (IN=x) temp (IN=y);
	by firmid app_date;
	if x=1 and y=1;
	if count = 1;			
	drop count percent;
RUN;
PROC freq data = firms;
	tables firmid / noprint out = temp;
RUN;
DATA firms;
	merge firms (IN=x) temp;
	by firmid;
	if x=1;
	multi_filers	= count;
	drop count percent;
RUN;

/*******************************************************/
/*	Retrieve data from Schedule 31		       */
DATA temp;
	set TYEAR&y..SCH31;

	TY_lag				= SCH31_390;
	association_check	= SCH31_400;
	_31					= 1;

	keep	firmid		app_date
			TY_lag		association_check		_31;
RUN;
PROC sort data = temp out = temp;
	by firmid app_date;
RUN;
DATA firms;
	merge firms (IN=x) temp;
	by firmid app_date;
	if x=1;
	_31 = (_31 = 1);
RUN;

/*******************************************************/
/*	Retrieve data from T661			       */

/*	Ensure data provided gives totals for the      */
/*	firm or for each project undertaken.           */
/*	Add up information to the firm level.          */
/*	Potentially depending on the data format.      */

PROC sort data = TYEAR&y..T661 (keep = firmid app_date T661_400 T661_460) out = temp (rename = (T661_400 = E T661_460 = ded_claimed));
	by firmid app_date;
RUN;
PROC freq data = temp;
	by firmid;
	tables app_date / noprint out = t661 (keep = firmid app_date count);
RUN;
PROC univariate data = t661 noprint;
	by firmid app_date count;
	var E ded_claimed;
	output out = temp
		sum = E ded_claimed;
RUN;	
DATA t661;
	set t661;
	multi_projects		= count;
	keep 	firmid 		app_date 		E 	ded_claimed	multi_projects;
RUN;
PROC sort data = t661 out = t661;
	by firmid app_date;
RUN;
DATA firms;
	merge firms (IN=x) t661;
	by firmid app_date;
	if x=1;
	if E 		= . then E = 0;
	if ded_claimed 	= . then ded_claimed = 0;
	if count 	= . then multi_projects = 0;
RUN;

/*******************************************************/
/*	Find capital stocks: SCH 33		       */
DATA temp;
	set TYEAR&y..SCH33;
	gross_part3_33 = SCH33_820;
	_33 = 1;
	keep firmid app_date gross_part3_33 _33;
RUN;
DATA firms;
	merge firms (IN=x) temp;
	by firmid app_date;
	if x=1;
RUN;

/*******************************************************/
/*	Find capital stocks: SCH 34		       */
DATA temp;
	set TYEAR&y..SCH34;
	gross_part3_34 = SCH34_820;
	_34 = 1;
	keep firmid app_date gross_part3_34 _34;
RUN;
DATA firms;
	merge firms (IN=x) temp;
	by firmid app_date;
	if x=1;
RUN;

/*******************************************************/
/*	Find capital stocks: SCH 35		       */
DATA temp;
	set TYEAR&y..SCH35;
	gross_part3_35 = SCH35_820;
	_35 = 1;
	keep firmid app_date gross_part3_35 _35;
RUN;
DATA firms;
	merge firms (IN=x) temp;
	by firmid app_date;
	if x=1;
RUN;

/*	Join the data setst into one			*/
DATA firm_dat;
	set firm_dat firms;
RUN;

%MEND dat;

/*	Consolidate datasets				*/
DATA firm_dat;
RUN;
%DAT(2000);
%DAT(2001);
%DAT(2002);
%DAT(2003);
%DAT(2004);
%DAT(2005);
%DAT(2006);
%DAT(2007);
DATA firm_dat;
	set firm_dat;
	if firmid ^= .;
	if _33 = 1 and _34 = . and _35 = . then gross_part3 = gross_part3_33;
	if _33 = . and _34 = 1 and _35 = . then gross_part3 = gross_part3_34;
	if _33 = . and _34 = . and _35 = 1 then gross_part3 = gross_part3_35;

	drop gross_part3_33 gross_part3_34 gross_part3_35;

	if ded_claimed    = . then ded_claimed = 0;
	if E 			  = . then E = 0;
	if multi_projects = . then multi_projects = 0;
RUN;
PROC sort data = firm_dat out = firm_dat;
	by firmid;
RUN;

/*******************************************************/
/*******************************************************/
/*******************************************************/
*	Part 2;
/*******************************************************/
/*******************************************************/
/*******************************************************/

/*******************************************************/
/*******************************************************/
*	Often used MACROs;

/*******************************************************/
*	TYPE 1 MACRO:

	-	This macro identifies firms that 
		satisfiy the condition stated in the
		preceding %LET statement at least once
		during the entire sample period.;

%MACRO T1(infile,new_var,outfile);
	DATA temp;
		set &infile;
		if &condition1;
		&new_var = 1;
		keep firmid &new_var;
	RUN;
	PROC sort data = temp out = temp nodupkey;
		by firmid;
	RUN;
	DATA &outfile;
		merge &infile (IN=x) temp;
		by firmid;
		if x=1;
		&new_var = (&new_var = 1);
	RUN;
%MEND T1;

/*******************************************************/
*	TYPE 2 MACRO:

	-	This macro identifies firms that see
		the value of a key variable change.;

%MACRO T2(infile,key_var,new_var,outfile);
	PROC sort data = &infile out = temp nodupkey;
		by firmid &key_var;
	RUN;
	PROC freq data = temp;
		tables firmid / noprint out = temp;
	RUN;
	DATA temp;
		set temp;
		&new_var = count;
		drop count percent;
	RUN;
	DATA &outfile;
		merge &infile (IN=x) temp;
		by firmid;
		if x=1;
		if &new_var = . then &new_var = 0;
	RUN;
%MEND T2;

/*******************************************************/
*	TYPE 3 MACRO:

	-	This macro eliminates any firms that
		have multiple tax years that have the
		same:
		-	app_date (filing date)
		-	btyear (tax year beginning)
		-	etyear (tax year ending);

%MACRO T3(infile,key_var,outfile);
	DATA temp;
		set &infile; 
		keep firmid &key_var;
	RUN;
	PROC sort data = temp out = temp;
		by firmid &key_var;
	RUN;
	PROC freq data = temp;
		by firmid;
		tables &key_var / noprint out = temp;
	RUN;
	DATA temp;
		set temp;
		if 1 < count;
		keep firmid;
	RUN;
	PROC sort data = temp out =temp nodupkey;
		by firmid;
	RUN;
	DATA &outfile;
		merge &infile (IN=x) temp (IN=y);
		by firmid;
		if x=1 and y^=1;
	RUN;
%MEND T3;

/*******************************************************/
/*******************************************************/

DATA firm_dat;

	set firm_dat;

	if 2000				<= tyear;
	if tyear			<= 2007;
	if T2 				 = 1;
	NAICS5				 = int(NAICS/10);
RUN;
PROC sort data = firm_dat out = firm_dat;
	by firmid app_date tyear;
RUN;

*	Eliminate firms that have multiple tax years
	with the same beginning date;
%T3(firm_dat,btyear,firm_dat);
*	Eliminate firms that have multiple tax years
	with the same end date;
%T3(firm_dat,etyear,firm_dat);
*	Eliminate firms that have multiple tax years
	with the app_date;
%T3(firm_dat,app_date,firm_dat);

*	id_count enumerate a firm's tax years in
	chronological order;
DATA firm_dat;
	set firm_dat;
	id_count + 1;
	by firmid;
	if first.firmid then id_count = 1;
RUN;

/*******************************************************/
/*******************************************************/
*	Determine the maximum id_count and tyear
	and the minimum tyear.;
DATA temp;
	set firm_dat;
	keep firmid id_count tyear;
RUN;
PROC univariate data = temp noprint;
	by firmid;
	var id_count;
	output out = temp
		max = id_count_max;
RUN;
DATA firm_dat;
	merge firm_dat (IN=x) temp (IN=y);
	by firmid;
	if x=1 and y=1;
RUN;

/*******************************************************/
/*******************************************************/
*	Obtain the lagged GDP and number of days in
	the previous tax filing if any.;

*	Attach lagged TY from T2 and create data.;
DATA temp;
	set firm_dat;

	id_count		=  id_count + 1;
	TY_lag_T2		=  TY;
	tyear_days_lag		=  tyear_days;
	gross_part3_lag 	=  gross_part3;
	if id_count	   	<= id_count_max;

	keep firmid id_count TY_lag_T2 tyear_days_lag gross_part3_lag;
RUN;

/*******************************************************/
/*******************************************************/
*	Attach lagged taxable income from 1999.;

*	The firms that could possibly need this extra
	piece of information;
DATA temp2;
	set firm_dat;
	if tyear = 2000 and id_count = 1;
	keep firmid id_count;
RUN;
DATA temp3;
	set SRED1999.T2;
	if firmid		^= .;
	if T2_360 - T2_370	^= .;
	if T2_60		^= .;
	if T2_61		^= .;
	tyear_days_lag 		 = T2_60 - T2_61;
	TY_lag_T2 		 = T2_360 - T2_370;
	id_count = 1;
	keep firmid id_count TY_lag_T2 tyear_days_lag;
RUN;
DATA temp2;
	merge temp2 (IN=x) temp3;
	by firmid id_count;
	if x=1;
RUN;
DATA temp;
	set temp2 temp;
RUN;
PROC sort data = temp out = temp;
	by firmid id_count;
RUN;
DATA firm_dat;
	merge firm_dat (IN=x) temp;
	by firmid id_count;
	if x=1;

	if CCPC ^= 1 and tyear_days_lag ^= . and tyear_days_lag ^= 0 and tyear_days_lag < 7*51 - 0.25  
		then TY_lag = TY_lag_T2 * 365 / tyear_days_lag;
	if CCPC ^= 1 and tyear_days_lag ^= . and 7*51 - 0.25 <= tyear_days_lag 
		then TY_lag = TY_lag_T2;

	if CCPC  = 1 and tyear_days_lag ^= . and tyear_days_lag < 7.51 - 0.25  and (TY_lag = . or _31 ^= 1 or (TY_lag = 0 and 0 < TY_lag_T2)) 
		then TY_lag = TY_lag_T2 * 365 / tyear_days_lag;
	if CCPC  = 1 and tyear_days_lag ^= . and 7.51 - 0.25 <= tyear_days_lag and (TY_lag = . or _31 ^= 1 or (TY_lag = 0 and 0 < TY_lag_T2)) 
		then TY_lag = TY_lag_T2;

	if large_corp_tax = . and gross_part3_lag ^= . and tyear_days_lag ^= . and tyear_days_lag < 7.51 - 0.25
		then large_corp_tax = gross_part3_lag * 365 / tyear_days_lag;
	if large_corp_tax = . and gross_part3_lag ^= . and tyear_days_lag ^= . and 7.51 - 0.25 <= tyear_days_lag
		then large_corp_tax = gross_part3_lag;
RUN;

/*******************************************************/
/*******************************************************/
*	Create indicator variables;

/*******************************************************/
*	"FULL" year indicator based on the number of days
	in operation during a given tax year.;

DATA temp;
	set firm_dat;
	full_years = (7*51 <= tyear_days and tyear_days <= 366);
	keep firmid full_years;
RUN;
PROC univariate data = temp noprint;
	by firmid;
	var full_years;
	output out = temp
		sum = full_years;
RUN;
DATA firm_dat;
	merge firm_dat (IN=x) temp;
	by firmid;
	if x=1;
	if full_years = . then full_years = 0;
RUN;

/*******************************************************/
*	Counts the number of provinces where a firm
	has operated in 2000-2007.;

%T2(firm_dat,PROV,CP,firm_dat);

/*******************************************************/
*	Identify firms that operate at least in one
	tax year in multiple juresdictions during
	2000-2007.;

%LET condition1 = PROV = "MJ";
%T1(firm_dat,MJ,firm_dat);

/*******************************************************/
*	Identify firms that are associated at some
	point during the 2000-2007 period.;

%LET condition1 = (CCPC = 1 and associated ^= 0) or (CCPC = 1 and association_check = 1);
%T1(firm_dat,CCPC_associated,firm_dat);

/*******************************************************/
*	Identify firms that file multiple times
	during one calander year during the
	2000-2007 period.;

%LET condition1 = multi_filers ^= 1;
%T1(firm_dat,mf,firm_dat);

/*******************************************************/
*	Identify firms that change type during a 
	given tax year at point during 2000-2007 period.;

%LET condition1 = constant_type ^= .;
%T1(firm_dat,ct,firm_dat);

/*******************************************************/
*	Counts the number of firm types a given
	firm has been during the 2000-2007 period.;

%T2(firm_dat,CCPC,firm_types,firm_dat);

/*******************************************************/
/*******************************************************/
*	Create variables in dataset.;

DATA firm_dat;
	set firm_dat;

	*	Correct some problems with reported CCPC = 1 T2_BL;
	if CCPC = 1 and T2_BL ^= . and T2_BL < 200000 then T2_BL = T2_RBL;

/*******************************************************/

	*	Calculate expenditure limits for CCPCs with full information.;	
	if CCPC = 1 and 0 < T2_BL and T2_BL ^= . and T2_RBL ^= . and TY_lag ^= . and tyear <= 2007
		then T2_EL1 = max(0,5000000 - 10*max(300000,TY_lag)) * T2_RBL / T2_BL;
	if CCPC = 1 and 0 < T2_BL and T2_BL ^= . and T2_RBL ^= . and TY_lag ^= .
		then T2_EL0 = max(0,4000000 - 10*max(200000,TY_lag)) * T2_RBL / T2_BL;

	*	Calculate expenditure limits for CCPCs without all information.;	
	if CCPC = 1 and ( (T2_BL = 0 or T2_BL = .) or T2_RBL = . ) and TY_lag ^= . and large_corp_tax ^= . and tyear <= 2007
		then T2_EL1 = max(0,5000000 - 10*max(300000,TY_lag)) * max( 0 , 1 - large_corp_tax / 11250 );
	if CCPC = 1 and ( (T2_BL = 0 or T2_BL = .) or T2_RBL = . ) and TY_lag ^= . and large_corp_tax ^= .
		then T2_EL0 = max(0,4000000 - 10*max(200000,TY_lag)) * max( 0 , 1 - large_corp_tax / 11250 );

	*	Calculate expenditure limits for Non-CCPCs.;	
	if CCPC ^= 1 and TY_lag ^= . and large_corp_tax ^= . and tyear <= 2007
		then T2_EL1 = max(0,5000000 - 10*max(300000,TY_lag)) * max( 0 , 1 - large_corp_tax / 11250 );
	if CCPC ^= 1 and TY_lag ^= . and large_corp_tax ^= .
		then T2_EL0 = max(0,4000000 - 10*max(200000,TY_lag)) * max( 0 , 1 - large_corp_tax / 11250 );

	if CCPC = 1 and 0 < T2_BL and T2_BL ^= . and T2_RBL ^= . and TY_lag ^= . and tyear <= 2007
		then T2_EL2 = max(0,6000000 - 10*max(400000,TY_lag)) * T2_RBL / T2_BL;
	if CCPC = 1 and ( (T2_BL = 0 or T2_BL = .) or T2_RBL = . ) and TY_lag ^= . and large_corp_tax ^= . and tyear <= 2007
		then T2_EL2 = max(0,6000000 - 10*max(400000,TY_lag)) * max( 0 , 1 - large_corp_tax / 11250 );
	if CCPC ^= 1 and TY_lag ^= . and large_corp_tax ^= . and tyear <= 2007
		then T2_EL2 = max(0,6000000 - 10*max(400000,TY_lag)) * max( 0 , 1 - large_corp_tax / 11250 );

	*	Firms that would see increase in EL with policy shock;
	EL_increase				= 	(T2_EL0 ^= . and T2_EL0 < T2_EL1);
RUN;

/*******************************************************/
/*******************************************************/
*	Create more identifier variables.;

/*******************************************************/
*	Identify firms for which we cannot determine the effect
	of ITC cut after 2003.;

%LET condition1 = 2004 <= tyear and T2_EL0 = .;
%T1(firm_dat,missing_T2_EL2,firm_dat);

/*******************************************************/
*	Identify firms that did R&D prior to policy
	shock.;

%LET condition1 = tyear < 2004 and 0 < E;
%T1(firm_dat,E_2000_2003,firm_dat);

/*******************************************************/
/*******************************************************/
*	1.	ADD BALANCE SHEET AND INCOME SHEET INFO.;

*	This part of the program aggregates the balance
	and income sheet information for all firms
	before matching it to the firms in the sample.;

/*******************************************************/
*	Create main database file where data from
	the balance sheet and income sheet will
	be stored;

DATA BSIS;
	set firm_dat;
	keep firmid app_date;
RUN;
PROC sort data = BSIS out = BSIS;
	by firmid app_date;
RUN;

/*******************************************************/
*	Create general MACROs to aggregate all 
	the necessary data.;

%MACRO AGG(infile,outfile);
	DATA &outfile;
	RUN;
	%SUBAGG(2000);
	%SUBAGG(2001);
	%SUBAGG(2002);
	%SUBAGG(2003);
	%SUBAGG(2004);
	%SUBAGG(2005);
	%SUBAGG(2006);
	%SUBAGG(2007);
	DATA &outfile;
		set &outfile;
		if firmid = . or app_date = . then delete;
	RUN;
%MEND AGG;
%MACRO SUBAGG(year);
	DATA temp;
		set TYEAR&year..&infile;
	RUN;
	DATA &outfile;
		set &outfile temp;
	RUN;
%MEND SUBAGG;

/*******************************************************/
*	Balance sheet information (SC 100);

*	Run macro for SC 100;
%AGG(SCH100,BS);

*	Keep only desired variables from the
	balance sheet.;
DATA BS;
	set BS;
	keep 	firmid		app_date	
			SCH100_1599	SCH100_3660	SCH100_2599	SCH100_3499	SCH100_1740;
RUN;
PROC sort data = BS out = BS;
	by firmid app_date;
RUN;
DATA BSIS;
	merge BSIS (IN=x) BS;
	by firmid app_date;
	if x=1;
RUN;

/*******************************************************/
*	Income sheet information (SC 125);

*	Run macro for SC 125;
%AGG(SCH125,IS);

*	Keep only desired variables from the
	balance sheet.;
DATA IS;
	set IS;
	total_interest	= SCH125_8710 + SCH125_8711	+ SCH125_8712 + SCH125_8713	+ SCH125_8714 + SCH125_8715	+ SCH125_8716	+ SCH125_8717;
	keep 	firmid 		app_date	SCH125_8299	SCH125_8456	SCH125_9277	SCH125_9278	total_interest;
RUN;
PROC sort data = IS out = IS;
	by firmid app_date;
RUN;
DATA BSIS;
	merge BSIS (IN=x) IS;
	by firmid app_date;
	if x=1;
RUN;

*	Join BSIS to FIRM_DAT;
PROC SQL;
	create table temp as
	select*
	from firm_dat as a,
		BSIS as b
	where a.firmid = b.firmid and a.app_date = b.app_date;
QUIT;

PROC sort data = temp out = firm_dat;
	by firmid app_date;
RUN;

/*******************************************************/
/*******************************************************/
*	2.	ADD R&D INFORMATION.;

/*******************************************************/
*	A.	For each firm retrieve the information from 
		the T661,;

DATA firms;
	set firm_dat;
	keep firmid app_date;
RUN;
PROC sort data = firms out = firms nodupkey;
	by firmid app_date;
RUN;

*	Create macro that will obtain information 
	from the T661 form for each firm.;

%MACRO T661(yr);
	DATA zztemp;
		set TYEAR&yr..T661;
	RUN;
	PROC SQL;
		create table temp as
		select*
		from firms as a,
			zztemp as b
		where	a.firmid = b.firmid and
				a.app_date  = b.app_date;
	QUIT;
	DATA BG;
		set BG temp;
		
		keep	firmid app_date
				T661_300 T661_305 T661_310 T661_315 T661_320 T661_325
				T661_340 T661_345 T661_350 T661_355 T661_370 T661_390
				T661_400 T661_430 T661_570 T661_605;
	RUN;
%MEND T661;

DATA BG;
RUN;
%T661(2000);
%T661(2001);
%T661(2002);
%T661(2003);
%T661(2004);
%T661(2005);
%T661(2006);
%T661(2007);
DATA BG;
	set BG;
	if firmid ^= .;
RUN;
PROC sort data = BG out = BG;
	by firmid app_date;
RUN;

/*******************************************************/
*	B.	Clean data;

*	Eliminate duplicate information as a result of
	conducting multiple R&D projects.;

*	Focus on firms that conducted R&D;
DATA BG;
	set BG;
	if 0 < T661_400 + T661_570 + T661_605;
RUN;

*	Despite the above convention, a firm may 
	repeat information in multiple lines in the
	raw data.

	The following identifies firms with mulitple observations
	in the raw data that result for example if multiple R&D 
	projects are conducted. In this case information
	should not add to totals.;

PROC freq data = BG;
	by firmid;
	table app_date / noprint out = temp;
RUN;

*	Uncomplicated cases: Majority of observations are these;
DATA temp1;
	merge BG (IN=x) temp (IN=y);
	by firmid app_date;
	if x=1 and y=1;
	if count = 1;
RUN;

*	Complicated cases;
DATA temp;
	set temp;
	if 1 < count;
	keep firmid app_date count;
RUN;
DATA temp;
	merge temp (IN=x) BG (IN=y);
	by firmid app_date;
	if x=1 and y=1;
RUN;
PROC univariate data = temp noprint;
	by firmid app_date;
	var T661_400 T661_570 T661_605;
	output out = temp
		std = T661_400 T661_570 T661_605;
RUN;
DATA temp2;
	set temp;
	if T661_400 = 0 and T661_570 = 0 and T661_605 = 0;
	keep firmid app_date;
RUN;
PROC sort data = temp2 out = temp2 nodupkey;
	by firmid app_date;
RUN;
PROC SQL;
	create table temp3 as
	select*
	from temp2 as a,
		BG as b
	where a.firmid = b.firmid and a.app_date = b.app_date;
QUIT;
PROC surveyselect data = temp3 out = temp3 n = 1 seed = 123456 noprint;
	strata firmid app_date;
RUN;
*	Combine relatively uncomplicated cases.;
DATA BG;
	set temp1 temp3;
RUN;

PROC sort data = BG out = BG;
	by firmid app_date;
RUN;
DATA firm_dat;
	merge firm_dat (IN=x) BG;
	by firmid app_date;
	if x=1;
RUN;

/*******************************************************/
/*******************************************************/
*	Shape sample and create additional variables;

/*******************************************************/
*	Sample criteria:
	- Tax years with length [7*51,366] days
	- Does not change corp. type during a given
	  tax year.
	- Not an associated CCPC in a given year.
	- Does not file multiple times in a tax year.
	- Does not operate in multiple provinces
	- Firm operates 8 full years in the period
	- Only manufacturing firms.
	- Do not change corp. type from one year to
	  the next.
	- Has no data missing to calculate EL after 2003.
	- Carried out R&D prior to 2004.
	- Remained in the same province throughout entire time;

DATA full_period;
	set firm_dat;

	if 7*51 <= tyear_days and tyear_days <= 366;
	if associated 		= 1 then delete;
	if ct 				= 0;	*	Exclude any firm that changed type halfway through 
									a tax year.;
	if multi_filers 	= 1;	*	Firm does not file multiple times in a given
									year.;
	if PROV 	   	   ^= "MJ";	*	Firm does not operate in multiple provinces.;
	if firm_types 		= 1;	*	Firms that don't change firm type only have one
									type through the sample period.;
	if missing_T2_EL2	= 0;	*	No data missing after 2003 that prevents EL calculation.;
	if E_2000_2003 		= 1;	*	Carried out R&D 2000 to 2003.;
	if CP 				= 1;	*	Include only firms that operated in only one province.;
	if CCPC_associated 	= 0;	*	Exclude CCPC that are associated at any time.;
	if MJ 				= 0;	*	Exclude any firm that operated in MJ at any time.;
	if mf 				= 0;	*	Exclude any firm that files multple times in a year;
	if CCPC 			= 1;	*	Firm is a CCPC.;

	*	Additional variables;

	tot_ass			= SCH100_2599;
	tot_liabilities	= SCH100_3499;
	cur_ass			= SCH100_1599;
	retain_e		= SCH100_3660;
	MNE				= SCH100_1740;
	tot_revs		= SCH125_8299;
	royalty_cost	= SCH125_8456 + SCH125_9277 + SCH125_9278;

	if 0 < SCH100_3499	then rate 		= abs(total_interest) / SCH100_3499 * 100;
	if T661_390 = . 		then T661_390 = 0;
	if 0 < e 			then KI = T661_390 / e;

	drop 	SCH100_1740	SCH100_1599	SCH100_3660	SCH100_2599
			SCH100_3499	SCH125_8456	SCH125_9277	SCH125_9278;

RUN;

/*******************************************************/
*	Count the number of consecutive years in operation;

PROC sort data = full_period out = full_period;
	by firmid app_date tyear;
RUN;
DATA temp1;
	set full_period;
	keep firmid app_date tyear full_years;
RUN;
DATA temp2;
	set full_period;
	tyear_lag = tyear;
	tyear = tyear + 1;
	keep firmid tyear tyear_lag;
RUN;
DATA temp1;
	merge temp1 (IN=x) temp2 (IN=y);
	by firmid tyear;
	if x=1 and y=1;
	dtyear = tyear - tyear_lag;
	keep firmid full_years dtyear;
RUN;
DATA temp2;
	set temp1;
	if 1 < dtyear;
	keep firmid;
RUN;
PROC sort data = temp2 out = temp2 nodupkey;
	by firmid;
RUN;
DATA temp1;
	merge temp1 (IN=x) temp2 (IN=y);
	by firmid;
	if x=1 and y^=1;
RUN;
PROC univariate data = temp1 noprint;
	by firmid full_years;
	var dtyear;
	output out = temp1
		sum = dtyear;
RUN;
DATA temp2;
	set temp1;
	if full_years = dtyear + 1;
	keep firmid;
RUN;
PROC sort data = temp2 out = temp2 nodupkey;
	by firmid;
RUN;
DATA full_period;
	merge full_period (IN=x) temp2 (IN=y);
	by firmid;
	if x=1 and y=1;
RUN;

PROC sort data = full_period out = samp1_full;
	by firmid;
RUN;

/*******************************************************/
*	Add summary statistic;

%MACRO VARS(x,y,z);
DATA temp;
	set samp1_full;
	if tyear < 2004;
	if &x ^= .;
	if &x < 0 then &x = 0;
	i = 1;
	keep firmid tyear &x i;
RUN;
PROC univariate data = temp noprint;
	by i firmid;
	var &x;
	output out = temp
		&y = &x._&z;
QUIT;
PROC univariate data = temp noprint;
	by i;
	var &x._&z;
	output out = temp2
		median = &x._&z._med;
QUIT;
PROC SQL;
	create table temp3 as
	select*
	from temp as a,
		temp2 as b
	where a.i = b.i;
QUIT;
DATA samp1_full;
	merge samp1_full (IN=x) temp3;
	by firmid;
	if x=1;
	drop i;
RUN;
%MEND VARS;

*	4 year KI average 2000-2003;
%VARS(KI,mean,mean);

/*******************************************************/
*	Replace missing T661 data with zero and create
	additional variables.

*	Note: Assume that missing info from T661 equals 0. Firms have
	incentive to claim expenditures when they are undertaken
	due to the potential tax benefits.;

DATA samp1_fullb;
	set samp1_full;

	if T661_300 = . then T661_300 = 0;
	if T661_305 = . then T661_305 = 0;
	if T661_310 = . then T661_310 = 0;
	wages = T661_300 + T661_305 + T661_310;

	if T661_320 = . then T661_320 = 0;
	if T661_325 = . then T661_325 = 0;
	if T661_350 = . then T661_350 = 0;
	if T661_355 = . then T661_355 = 0;
	if T661_390 = . then T661_390 = 0;
	capital	= T661_350 + T661_355 + T661_390;
	inputs	= T661_320 + T661_325;

	capital2 	= T661_390;
	inputs2		= T661_320 + T661_325 + T661_350 + T661_355;

	if T661_340 = . then T661_340 = 0;
	if T661_345 = . then T661_345 = 0;
	if T661_370 = . then T661_370 = 0;
	contracts_paid = T661_340 + T661_345 + T661_370;

	keep	e		rate		tot_revs	retain_e	tot_ass		tot_liabilities 
		mne		royalty_cost 	naics5		tyear		firmid 		el_increase
		ty_lag		ki_mean_med 	ki_mean		ki_mean_med 	ki_mean		wages 
		capital 	inputs		capital2	contracts_paid	inputs2
		T661_390 	T661_300			T661_305	T661_310	T661_400		T661_340
		T661_345 	T661_370			T661_570;
RUN;
PROC sort data = samp1_fullb out = samp1_fullb;
	by firmid;
RUN;

/*******************************************************/
/*******************************************************/
/*******************************************************/

*	Part 3;

/*******************************************************/
/*******************************************************/
/*******************************************************/

/*******************************************************/
*	This program is intended to provide a shock-control
	estimate of the marginal tax rate of sample firms.;

*	Get a subsample of firmid;
DATA BN;
	set samp1_fullb;
	keep firmid;
RUN;
PROC sort data = BN out = BN nodupkey;
	by firmid;
RUN;

*	Make special program for 2000 MUCRD;
DATA BN;
	set BN;
	tyear = 2000;
RUN;

*	Get data from T2 for 2000;
DATA temp;
	set SRED2000.T2;
	keep 	firmid		app_date
			T2_60	T2_61
			T2_360	T2_370
			T2_410	T2_425	T2_415	T2_420	T2_430	T2_438
			T2_550
			T2_600	T2_602	T2_604	T2_608	T2_616	T2_620
			T2_628	T2_632	T2_636	T2_637	T2_638
			T2_639	T2_640	T2_644	T2_648	T2_652
			T2_700;
RUN;
PROC sort data=temp out=temp;
	by firmid app_date;
RUN;
DATA dat;
	merge BN (IN=x) temp;
	by firmid;
	if x=1;
RUN;
PROC sort data= dat out=dat;
	by firmid app_date;
RUN;

*	Get data from Schedule 1 for 2000;
DATA temp;
	set TYEAR2000.SCH1;
	keep firmid app_date SCH1_118 SCH1_231 SCH1_411;
RUN;
PROC sort data = temp out = temp;
	by firmid app_date;
RUN;
DATA dat;
	merge dat (IN=x) temp;
	by firmid app_date;
	if x=1;
RUN;

*	Get data from Schedule 31 for 2000;
DATA temp;
	set TYEAR2000.SCH31;
	keep	firmid 	app_date
			SCH31_101
			SCH31_350	SCH31_360 	SCH31_380
			SCH31_520	SCH31_530	SCH31_540	SCH31_550	SCH31_560	SCH31_580
			SCH31_610
			SCH31_911	SCH31_912	SCH31_913;
RUN;
PROC sort data = temp out = temp;
	by firmid app_date;
RUN;
DATA dat;
	merge dat (IN=x) temp;
	by firmid app_date;
	if x=1;
RUN;

DATA collector;
	set dat;
RUN;

%MACRO DATMKR(tyear,tyear_lag);
	DATA BN;
		set BN;
		tyear = &tyear;
	RUN;

	/*	Get data from T2				*/
	DATA temp;
		set TYEAR&tyear..T2;
		keep 	firmid		app_date
				T2_60	T2_61
				T2_360	T2_370
				T2_410	T2_425	T2_415 	T2_420	T2_430	T2_438
				T2_550
				T2_600	T2_602	T2_604	T2_608	T2_616	T2_620
				T2_628	T2_632	T2_636	T2_637	T2_638
				T2_639	T2_640	T2_644	T2_648	T2_652
				T2_700;
	RUN;
	PROC sort data=temp out=temp;
		by firmid app_date;
	RUN;
	DATA dat;
		merge BN (IN=x) temp;
		by firmid;
		if x=1;
	RUN;
	PROC sort data= dat out=dat;
		by firmid app_date;
	RUN;

	/*	Get data from Schedule 1		*/
	DATA temp;
		set TYEAR&tyear..SCH1;
		keep firmid app_date SCH1_118 SCH1_231 SCH1_411;
	RUN;
	PROC sort data = temp out = temp;
		by firmid app_date;
	RUN;
	DATA dat;
		merge dat (IN=x) temp;
		by firmid app_date;
		if x=1;
	RUN;

	/*	Get data from Schedule 1		*/
	DATA temp;
		set TYEAR&tyear_lag..SCH1;
		lagSCH1_118	= SCH1_118;
		lagSCH1_231	= SCH1_231;
		lagSCH1_411	= SCH1_411;
		keep firmid app_date lagSCH1_118 lagSCH1_231 lagSCH1_411;
	RUN;
	PROC sort data = temp out = temp;
		by firmid app_date;
	RUN;
	DATA dat;
		merge dat (IN=x) temp;
		by firmid app_date;
		if x=1;
	RUN;

	/*	Get data from Schedule 31		*/
	DATA temp;
		set TYEAR&tyear..SCH31;
		keep	firmid 	app_date
				SCH31_101
				SCH31_350	SCH31_360 	SCH31_380
				SCH31_520	SCH31_530	SCH31_540	
				SCH31_550	SCH31_560	SCH31_580
				SCH31_610
				SCH31_911	SCH31_912	SCH31_913;
	RUN;
	PROC sort data = temp out = temp;
		by firmid app_date;
	RUN;
	DATA dat;
		merge dat (IN=x) temp;
		by firmid app_date;
		if x=1;
	RUN;

	/*	Get Previous year SRED tax data	*/
	DATA temp;
		set TYEAR&tyear_lag..SCH31;
		lagSCH31_350 = SCH31_350;
		lagSCH31_360 = SCH31_360;
		keep firmid lagSCH31_350 lagSCH31_360;
	RUN;
	PROC sort data = temp out = temp;
		by firmid;
	RUN;
	DATA dat;
		merge dat (IN=x) temp;
		by firmid;
		if x=1;
	RUN;
	DATA collector;
		set collector dat;
	RUN;
%MEND DATMKR;

%DATMKR(2001,2000);
%DATMKR(2002,2001);
%DATMKR(2003,2002);
%DATMKR(2004,2003);
%DATMKR(2005,2004);
%DATMKR(2006,2005);
%DATMKR(2007,2006);

DATA dat;
	set collector;

	if firmid ^= .;
	
	if SCH1_118 = . then SCH1_118 = 0;
	if SCH1_231 = . then SCH1_231 = 0;
	if SCH1_411 = . then SCH1_411 = 0;
	if lagSCH1_118 = . and 2001 <= tyear then lagSCH1_118 = 0;
	if lagSCH1_231 = . and 2001 <= tyear then lagSCH1_231 = 0;
	if lagSCH1_411 = . and 2001 <= tyear then lagSCH1_411 = 0;
	if SCH31_350 = . then SCH31_350 = 0;
	if SCH31_360 = . then SCH31_360 = 0;
	if SCH31_380 = . then SCH31_380 = 0;
	if SCH31_520 = . then SCH31_520 = 0;
	if SCH31_530 = . then SCH31_530 = 0;
	if SCH31_540 = . then SCH31_540 = 0;
	if SCH31_550 = . then SCH31_550 = 0;
	if SCH31_560 = . then SCH31_560 = 0;
	if SCH31_580 = . then SCH31_580 = 0;
	if SCH31_610 = . then SCH31_610 = 0;
	if SCH31_911 = . then SCH31_911 = 0;
	if SCH31_912 = . then SCH31_912 = 0;
	if SCH31_913 = . then SCH31_913 = 0;

	if lagSCH31_350 = . and 2001 <= tyear then lagSCH31_350 = 0;
	if lagSCH31_360 = . and 2001 <= tyear then lagSCH31_360 = 0;

*	Time in each year;
	time1999	= (tyear = 2000)* (MDY(12,31,1999)+1-(T2_60)/60/60/24);
	time2000	= (tyear = 2000)*((T2_61+1)/60/60/24 - MDY(12,31,1999)-1) 
				+ (tyear = 2001)* (MDY(12,31,2000)+1-(T2_60)/60/60/24);

	time2001	= (tyear = 2001)*((T2_61+1)/60/60/24 - MDY(12,31,2000)-1) 
				+ (tyear = 2002)* (MDY(12,31,2001)+1-(T2_60)/60/60/24);
	time2002	= (tyear = 2002)*((T2_61+1)/60/60/24 - MDY(12,31,2001)-1) 
				+ (tyear = 2003)* (MDY(12,31,2002)+1-(T2_60)/60/60/24);
	time2003	= (tyear = 2003)*((T2_61+1)/60/60/24 - MDY(12,31,2002)-1) 
				+ (tyear = 2004)* (MDY(12,31,2003)+1-(T2_60)/60/60/24);
	time2004	= (tyear = 2004)*((T2_61+1)/60/60/24 - MDY(12,31,2003)-1) 
				+ (tyear = 2005)* (MDY(12,31,2004)+1-(T2_60)/60/60/24);
	time2005	= (tyear = 2005)*((T2_61+1)/60/60/24 - MDY(12,31,2004)-1) 
				+ (tyear = 2006)* (MDY(12,31,2005)+1-(T2_60)/60/60/24);
	time2006	= (tyear = 2006)*((T2_61+1)/60/60/24 - MDY(12,31,2005)-1) 
				+ (tyear = 2007)* (MDY(12,31,2006)+1-(T2_60)/60/60/24);
	time2007	= (tyear = 2007)*((T2_61+1)/60/60/24 - MDY(12,31,2006)-1) 
				+ (tyear = 2008)* (MDY(12,31,2007)+1-(T2_60)/60/60/24);
	time2004p	= time2004 + time2005 + time2006 + time2007;
	time		= (T2_61+1-T2_60)/60/60/24;

RUN;
PROC sort data =  dat out = dat;
	by firmid tyear;
RUN;

*	Add lagged taxable income;
DATA temp;
	set samp1_fullb;
	keep firmid tyear TY_lag;
RUN;
PROC sort data = temp out = temp;
	by firmid tyear;
RUN;
DATA dat;
	merge dat (IN=x) temp;
	by firmid tyear;
	if x=1;
RUN;

/*******************************************************/
/*******************************************************/
*	Approximate tax at the margin;

DATA MUCRD;
	set dat;
	
	/*******************************************************/
	*	Variables that are given.;
	
	other_deds = sum(	T2_616,T2_620,T2_628,T2_632,T2_636,
				T2_438,T2_640,T2_644,T2_648,
				max(0,T2_652-SCH31_560));

		BL	= (
				200000 * (time1999 + time2000 + time2001 + time2002) +
				225000 *  time2003 +
				250000 *  time2004 +
				300000 * (time2005 + time2006) +
				400000 *  time2007
				)/time;
		BL	= round(BL,1000);
	ELpre	= max(0,4000000-max(TY_lag,200000)*10)*min(1,max(0,T2_425/BL));
	ELpos	= max(0,5000000-max(TY_lag,300000)*10)*min(1,max(0,T2_425/BL));

	Ecred	= SCH31_380;

	EL	= (tyear <= 2003)* ELpre + (2004 <= tyear)* ELpos;

	J	= 0.35*min(SCH31_350,EL);
	K	= 0.20*max(0,SCH31_350-EL);
	L	= max(0,EL-SCH31_350);
	M	= 0.35*min(SCH31_360,L);
	N	= 0.20*max(0,SCH31_360-L);
	O	= max(0,SCH31_540-J-K-M-N);

	*	Calculate changes in taxable income;

%MACRO TAXITC(DR,CRD,KRD,z);
	/*******************************************************/
	*	Original tax calculation;
	y	= max(0,T2_360 - T2_370 + &DR);

	tax	= 	0.2912 * max(0,y)
		  -	0.1600 * min(T2_425,max(0,y))
		  -	0.0700 * min(max(0,max(0,y)-T2_425),max(0,300000-T2_425)) 
					   * (time2001 + time2002 + time2003 + time2004p) / time
		  - max(0,max(0,y)-300000) 
			*( 0.01*time2001 + 0.03*time2002 
				+ 0.05*time2003 + 0.07*time2004p) / time;

	if 2006 <= tyear then
	tax	= 	0.2912 * max(0,y)
		  -	0.1600 * min(T2_425,max(0,y))
		  -	0.0700 * max(0,max(0,y)-T2_425);

	/*******************************************************/
	*	Expenditure limit;

	J	= 0.35*min( &CRD ,EL);
	K	= 0.20*max(0, &CRD -EL);
	L	= max(0,EL- &CRD );
	M	= 0.35*min( &KRD ,L);
	N	= 0.20*max(0, &KRD -L);

	ITC_onhand		= SCH31_520 + SCH31_530 + J + K + M + N + O + SCH31_550;
	NITC_onhandp&z	= max(0,sum(ITC_onhand,- sum(SCH31_911,SCH31_912,SCH31_913,SCH31_580)));
	ntaxp&z			= max(0,sum(tax,-other_deds));

	SCH31_560p 	= max( 0, min( NITC_onhandp&z, ntaxp&z ) );

	Q	= max(0,sum(NITC_onhandp&z,-SCH31_560p));

	AA	= Q;
	BB	= J + K + M + N + SCH31_550;
	CC	= min(AA,BB);
	DD	= J;
	EE	= min(CC,DD);
	FF  	= max(0,CC-EE);
	GG  	= 0.4*FF;
	HH	= EE;
	II	= GG + HH;

	JJ	= Q;
	KK	= J;
	LL	= min(JJ,KK);
	MM	= max(0,JJ-LL);
	NN	= M;
	OO	= 0.4*min(MM,NN);
	PP	= LL;
	QQ	= OO + PP;

	SCH31_610p = II;
	if SCH31_101 = . and ty_lag > T2_410 then SCH31_101 = 0;
	if SCH31_101 = . then SCH31_101 = 1;
	if SCH31_101 = 0 then SCH31_610p = QQ;

	tot_taxes&z		= max(0,ntaxp&z);
	itc_refund&z		= sum(SCH31_610p,SCH31_560p);

	SCH31_560p&z		= SCH31_560p;
	SCH31_610p&z		= SCH31_610p;

	BBp&z			= BB;

	Qp&z			= Q;
	Jp&z			= J;
	Kp&z			= K;
	Mp&z			= M;
	Np&z			= N;

	AAp&z			= AA;
	DDp&z			= DD;

	CCp&z			= CC;
	LLp&z			= LL;

	FFp&z			= FF;
	HHp&z			= HH;
	GGp&z			= GG;
	LLp&z			= LL;
	PPp&z			= PP;

	KKp&z			= KK;

	typ&z			= y;

%MEND TAXITC;

	DR1	= 0;
	DR2	= DR1 - 1;
	if tyear = 2000 and lagSCH1_411 = . then lagSCH1_411 = SCH1_411;
	DR3	= SCH1_411 - lagSCH1_411;
	DR4	= DR3 - 1;

	CRD1 = SCH31_350;
	CRD2 = SCH31_350 + 1;

	if tyear = 2000 and lagSCH31_350 = . then lagSCH31_350 = SCH31_350;
	CRD3 = lagSCH31_350;
	CRD4 = lagSCH31_350 + 1;

	if tyear = 2000 and lagSCH31_360 = . then lagSCH31_360 = SCH31_360;
	KRD1 = SCH31_360;
	KRD2 = KRD1;
	KRD3 = lagSCH31_360;
	KRD4 = KRD3;

%TAXITC(DR1,CRD1,KRD1,1);
%TAXITC(DR2,CRD2,KRD2,2);
%TAXITC(DR3,CRD3,KRD3,3);
%TAXITC(DR4,CRD4,KRD4,4);

ditc	= itc_refund2-itc_refund1;
dtax	= tot_taxes1-tot_taxes2;
tao1	= round(tot_taxes1-tot_taxes2,0.0001);

if SCH31_540 = . then SCH31_540 = 0;

keep	firmid 		tyear 		app_date		SCH31_350	SCH31_360 SCH31_380 EL SCH31_101 SCH31_540
		SCH31_610p1	SCH31_610p2	SCH31_560p1	SCH31_560p2
		tot_taxes1	tot_taxes2	tot_taxes3	tot_taxes4
		itc_refund1	itc_refund2	itc_refund3	itc_refund4
		ditc		dtax		tao1
		T2_415 	T2_420 	T2_425	AAp1	AAp2	DDp1	DDp2	KKp1	KKp2
		EL			SCH31_380		T2_700
		NITC_onhandp1 ntaxp1 NITC_onhandp2 ntaxp2
		Jp1 Jp2 Kp1 Kp2 Mp1 Mp2 Np1 Np2 CCp1 CCp2 LLp1 LLp2 FFp1 FFp2 Qp1 Qp2 LLp1 LLp2
		HHp1 GGp1 HHp2 GGp2 LLp1 PPp1 LLp2 PPp2 BBp1 BBp2 typ1 ELpre ELpos Ecred;
RUN;

*	Disaggregate MUCRD into credit effects;
DATA disag;
	set mucrd;

	*	1.	Perfect cenario;
	if ntaxp2 => nitc_onhandp2 then 
		ditc3 = round(nitc_onhandp2 - nitc_onhandp1,0.0001);
	if ntaxp2 => nitc_onhandp2 then 
		dtax3 = round(ntaxp1 - ntaxp2,0.0001);
	*	2.	(a) 3 wrong: 106120959	2001, 121720270	2000, 139426001	2005;
	if ntaxp2 < nitc_onhandp2 and ntaxp1 => nitc_onhandp1 and 
			 max(0,NITC_onhandp2 - BBp2) <= ntaxp2 then
		ditc3 = round((nitc_onhandp2 - nitc_onhandp1)*( (Jp1<Jp2) + 0.4*(Jp1=Jp2)*SCH31_101 ),0.0001);
	if ntaxp2 < nitc_onhandp2 and ntaxp1 => nitc_onhandp1 and
			 max(0,NITC_onhandp2 - BBp2) <= ntaxp2 then
		dtax3 = round((ntaxp1 - ntaxp2)*( (Jp1<Jp2) + 0.4*(Jp1=Jp2)*SCH31_101 ),0.0001);
	*	3.	(I) 8 wrong;
	if ntaxp2 < nitc_onhandp2 and ntaxp1 < nitc_onhandp1 and
		max(0,NITC_onhandp1 - BBp1) => ntaxp1 then 
		ditc3 = round(max(0,Jp2 - Jp1),0.0001) + round(max(0,0.4*(Kp2-Kp1)),0.0001)*SCH31_101;
	if ntaxp2 < nitc_onhandp2 and ntaxp1 < nitc_onhandp1 and
		max(0,NITC_onhandp1 - BBp1) => ntaxp1 then 
		dtax3 = round(max(0,0.4*(FFp1 - FFp2)),0.0001)*(SCH31_101 = 1);

	*	4.	(II) Perfect;
	if ntaxp2 < nitc_onhandp2 and ntaxp1 < nitc_onhandp1 and
		max(0,NITC_onhandp2 - BBp2) < ntaxp2 then		 
		ditc3 = round(max(0,Jp2 - Jp1),0.0001)*(Jp1<Jp2) + round(max(0,0.4*(Kp2-Kp1)),0.0001)*(Jp2 <= Jp1)*SCH31_101;
	if ntaxp2 < nitc_onhandp2 and ntaxp1 < nitc_onhandp1 and
		max(0,NITC_onhandp2 - BBp2) < ntaxp2 then
		dtax3 = round(SCH31_610p2 - SCH31_610p1 - ditc3,0.0001);

	if ntaxp2 < nitc_onhandp2 and ntaxp1 < nitc_onhandp1 and
		max(0,NITC_onhandp2 - BBp2) < ntaxp2 and DDp1 < DDp2 then		 
		ditc3 = round(max(0,Jp2 - Jp1),0.0001)*(Jp1<Jp2) + round(max(0,0.4*(Kp2-Kp1)),0.0001)*(Jp2 <= Jp1)*SCH31_101;
	if ntaxp2 < nitc_onhandp2 and ntaxp1 < nitc_onhandp1 and
		max(0,NITC_onhandp2 - BBp2) < ntaxp2 and DDp1 < DDp2 then
		dtax3 = round(SCH31_610p2 - SCH31_610p1 - ditc3,0.0001);

	if ntaxp2 < nitc_onhandp2 and ntaxp1 < nitc_onhandp1 and
		max(0,NITC_onhandp2 - BBp2) < ntaxp2 and DDp1 > DDp2 then		 
		ditc3 = round(max(0,Jp2 - Jp1),0.0001)*(Jp1<Jp2) + round(max(0,0.4*(Kp2-Kp1)),0.0001)*(Jp2 <= Jp1)*SCH31_101;
	if ntaxp2 < nitc_onhandp2 and ntaxp1 < nitc_onhandp1 and
		max(0,NITC_onhandp2 - BBp2) < ntaxp2 and DDp1 > DDp2 then
		dtax3 = round(SCH31_610p2 - SCH31_610p1 - ditc3,0.0001);

	if ntaxp2 < nitc_onhandp2 and ntaxp1 < nitc_onhandp1 and
		max(0,NITC_onhandp2 - BBp2) < ntaxp2 and DDp1 = DDp2  then		 
		ditc3 = round(min(max(BBp2-BBp1,0),max(SCH31_610p2-SCH31_610p1,0)),0.0001);
	if ntaxp2 < nitc_onhandp2 and ntaxp1 < nitc_onhandp1 and
		max(0,NITC_onhandp2 - BBp2) < ntaxp2 and DDp1 = DDp2  then
		dtax3 = round(max(0,SCH31_610p2-SCH31_610p1)-ditc3,0.0001);

	if 0.08 < ditc3 < 0.2 then dtax3 = ditc3 - 0.08;
	if 0.08 < ditc3 < 0.2 then ditc3 = 0.08;

	*	5.	(III) Perfect;
	if ntaxp2 < nitc_onhandp2 and ntaxp1 < nitc_onhandp1 and
		max(0,NITC_onhandp1 - BBp1) < ntaxp1 and max(0,NITC_onhandp2 - BBp2) => ntaxp2 then
		ditc3 = round(max(0,Jp2 - Jp1),0.0001)*(Jp1<Jp2) + round(max(0,0.4*(Kp2-Kp1)),0.0001)*(Jp2 <= Jp1)*SCH31_101;
	if ntaxp2 < nitc_onhandp2 and ntaxp1 < nitc_onhandp1 and
		max(0,NITC_onhandp1 - BBp1) < ntaxp1 and max(0,NITC_onhandp2 - BBp2) => ntaxp2 then
		dtax3 = round(SCH31_610p2 - SCH31_610p1 - ditc3,0.0001);

	 *	6.	(IV);
	if ntaxp2 < nitc_onhandp2 and ntaxp1 => nitc_onhandp1 and max(0,NITC_onhandp2 - BBp2) >  ntaxp2 then
		ditc3 = round(max(0,Jp2 - Jp1),0.0001)*(Jp1<Jp2) + round(max(0,0.4*(Kp2-Kp1)),0.0001)*(Jp2 <= Jp1)*SCH31_101;
	if ntaxp2 < nitc_onhandp2 and ntaxp1 => nitc_onhandp1 and max(0,NITC_onhandp2 - BBp2) >  ntaxp2 then
		dtax3 = round((ntaxp1 - ntaxp2)*( (Jp1<Jp2) + 0.4*(Jp1=Jp2)*SCH31_101 ),0.0001);


	if round(1 - (tot_taxes1-tot_taxes2) - (itc_refund2-itc_refund1),0.0001) = round(1 - ditc3 - dtax3,0.0001);
	keep firmid tyear Jp1 Jp2 Kp1 Kp2 SCH31_101 ntaxp1 ntaxp2 ditc3 dtax3 NITC_onhandp2 BBp2 SCH31_540 check
			AAp1 AAp2 BBp1 BBp2 DDp1 DDp2 dtax ditc;
RUN;

PROC sort data=disag out=disagout;
	by firmid tyear;
RUN;
DATA disagout;
	set disagout;
	keep firmid tyear ditc3 dtax3 dtax ditc;
RUN;

/*******************************************************/
/*******************************************************/
*	Join data to the main dataset;

DATA temp;
	set MUCRD;
	keep firmid tyear ELpre ELpos Ecred tao1;
RUN;
PROC sort data= temp out=temp;
	by firmid tyear;
RUN;
PROC sort data=samp1_fullb out=temp2;
	by firmid tyear;
RUN;
DATA temp;
	merge temp2 (IN=x) temp;
	by firmid tyear;
	if x=1;
RUN;

/*******************************************************/
/*******************************************************/
/*******************************************************/

%MACRO PK(x);
	DATA pk;
		set TYEAR&x..SCH8;
		tyear = &x;
		keep tyear firmid SCH8_203;
	RUN;
	PROC sort data= pk out=pk;
		by firmid;
	RUN;
	PROC univariate data=pk noprint;
		by tyear firmid;
		var SCH8_203;
		output out=pk
			sum= physicali;
	RUN;
	DATA pkout;
		set pkout pk;
	RUN;
%MEND PK;

DATA pkout;
RUN;
%PK(2000);
%PK(2001);
%PK(2002);
%PK(2003);
%PK(2004);
%PK(2005);
%PK(2006);
%PK(2007);
DATA pkout;
	set pkout;
	if firmid ^= .;
RUN;
PROC sort data=pkout out=pkout;
	by firmid tyear;
RUN;
PROC sort data=temp out=temp;
	by firmid tyear;
RUN;
DATA temp;
	merge temp (IN=x) pkout (IN=y);
	by firmid tyear;
	if x=1;
	if physicali = . then physicali = 0;
RUN;

PROC sort data = temp out = temp;
	by firmid;
RUN;
DATA temp_iv;
	set temp;
	if (ty_lag = . or rate = . or tot_revs = . or tot_ass = . or tot_liabilities = . or mne = . or royalty_cost = . ) = 0;
RUN;
PROC freq data = temp_iv;
	table firmid / noprint out = check_temp_iv;
RUN;
DATA check_temp_iv;
	set check_temp_iv;
	keep_firmid = count;
	keep firmid keep_firmid;
RUN;
DATA temp;
	merge temp (IN=x) check_temp_iv;
	by firmid;
	if x=1;
	if keep_firmid = . then keep_firmid = 0;
RUN;
PROC sort data=temp out=temp;
	by firmid tyear;
RUN;

*	Create final data for analysis;
DATA FINAL_DATA;
	merge temp (IN=x) disagout;
	by firmid tyear;
	if x=1;
RUN;