function output = rddmmsef(X,Y,D,c) %-------------------------------------------------------------------------- % rddmmse.m: compute the sharp regression discontinuity estimator %-------------------------------------------------------------------------- % % DESCRIPTION: % Estimate the fuzzy regression discontinuity estimator with the % bandwidths proposed by Arai and Ichimura (2015). % % REFERENCE: % Arai and Ichimura (2015). "Optimal Bandwidth Selection for the % Fuzzy Regression Discontinuity Estimator," GRIPS Discussion Paper, % 15-13. % % % INPUT ARGUMENTS % X: Imput vector (n by 1) of the forcing variable % Y: Vector of the outcome variable (n by 1) % D: Treat mentstatus (n by 1) % c: Evaluation (discontinuity) point (scalar) % % OUTPUT ARGUMENTS: % tau: fuzzy RD estimate % se: standard error % h: bandwidths, the first element for the right of the cut-off % the second element for the left of the cut-off % tval: bias-corrected t-value % ci: bias-corrected confidence interval, See Section 4.4 of % Fan and Gijbels (1996) % n: the number of observations with nonzero weight, % the first element for the right of the cut-off, % the second element for the left of the cut-off % % AUTHOR: % This code was written by Yoichi Arai. % I will appreciate any feedbacks. % % Note: % Optimization toolbox is necessary. % % % % DEVELOPMENT: % 4 Septermber 2015: Original version of rddmmse.m written. %-------------------------------------------------------------------------- % Check input arguments if nargin < 4 error('more input arguments needed.'); end if nargin > 4 error('Too many arguments.'); end % Step 1. Estimation of density and its first derivative % Density Estimation n = size(X,1); lambda = sqrt(var(X)); a_pilot = 2.34*lambda/n^(1/5); f_k = kdensity_est(X,c,a_pilot); % kernel = 2, Epanechnikov h1_pilot = lambda*(105*16*sqrt(pi)/15/n)^(1/7); f1_pilot = density1d_est(X,c,h1_pilot); % Step 2: Get pilot bandwidths to estimate derivatives for m_y % 4th order Polynomial regression Xbin_lt_x = (X=c); % bigger than x num_X_bt_x = sum(Xbin_bt_x); % # of observations of X bigger than x, N+ num_X_lt_x = sum(Xbin_lt_x); % # of observations of X less than x, N- in p.16 Xx = X-c; Xx2 = Xx.^2; Xx3 = Xx.*Xx2; Xx4 = Xx2.^2; nones = ones(n,1); data_mat = [Y nones Xx Xx2 Xx3 Xx4]; data_mat_p = data_mat(Xbin_bt_x==1,:); % data_mat only for positive X data_mat_m = data_mat(Xbin_lt_x==1,:); % data_mat only for negative X YT_pc = data_mat_p(:,1); TT_pc = data_mat_p(:,2:6); lambda_pc_4_4g = TT_pc\YT_pc; m41_4 = 24*lambda_pc_4_4g(5,1); YT_mc = data_mat_m(:,1); TT_mc = data_mat_m(:,2:6); lambda_mc_4_4g = TT_mc\YT_mc; m42_4 = 24*lambda_mc_4_4g(5,1); vy_p_plugin_4g = sum((YT_pc-TT_pc*lambda_pc_4_4g).^2)/(num_X_bt_x-5); vy_m_plugin_4g = sum((YT_mc-TT_mc*lambda_mc_4_4g).^2)/(num_X_lt_x-5); % Pilot bandwidths for 3rd order LPR to estimate 3rd derivative with uniform kernel band_comp_p = (vy_p_plugin_4g/f_k/m41_4^2/num_X_bt_x)^(1/9); band_comp_m = (vy_m_plugin_4g/f_k/m42_4^2/num_X_lt_x)^(1/9); C33K_U = Const_LPR_U_est(3,3); h3_p = C33K_U*band_comp_p; h3_m = C33K_U*band_comp_m; % Pilot bandwidths for 3rd order LPR to estimate 2nd derivative with uniform kernel C23K_U = Const_LPR_U_est(2,3); h23_p = C23K_U*band_comp_p; h23_m = C23K_U*band_comp_m; % Step 3: Estimate the second and the third derivatives of m_y % 3rd order LPR for the third derivatives Xbin_bet_xxh = Xbin_bt_x.*(X<=c+h3_p); % index for X in [x, x+h3_p] data_mat_4_p = data_mat(Xbin_bet_xxh==1,:); YT_p = data_mat_4_p(:,1); T_p = data_mat_4_p(:,2:5); lambda_p_all_33_4 = T_p\YT_p; Xbin_bet_xhx = (X>=c-h3_m).*Xbin_lt_x; % index for X in [x-h3_m, x] data_mat_4_m = data_mat(Xbin_bet_xhx==1,:); YT_m = data_mat_4_m(:,1); T_m = data_mat_4_m(:,2:5); lambda_m_all_33_4 = T_m\YT_m; m3c_p = 6*lambda_p_all_33_4(4,1); m3c_m = 6*lambda_m_all_33_4(4,1); % 3rd order LPR for the second derivatives of m_y Xbin_bet_xxh = Xbin_bt_x.*(X<=c+h23_p); % index for X in [x, x+h3_p] data_mat_4_p = data_mat(Xbin_bet_xxh==1,:); YT_p = data_mat_4_p(:,1); T_p = data_mat_4_p(:,2:5); lambda_p_all_23 = T_p\YT_p; m2c_p_2 = 2*lambda_p_all_23(3,1); YT_p_hat = T_p*lambda_p_all_23; vy_p_4_2 = sum((YT_p - YT_p_hat).^2)/(size(YT_p,1)-4); Xbin_bet_xhx = (X>=c-h23_m).*Xbin_lt_x; % index for X in [x-h3_m, x] data_mat_4_m = data_mat(Xbin_bet_xhx==1,:); YT_m = data_mat_4_m(:,1); T_m = data_mat_4_m(:,2:5); lambda_m_all_23 = T_m\YT_m; m2c_m_2 = 2*lambda_m_all_23(3,1); YT_m_hat = T_m*lambda_m_all_23; vy_m_4_2 = sum((YT_m - YT_m_hat).^2)/(size(YT_m,1)-4); m2_pilot_y = [m2c_p_2 m2c_m_2]; m3_pilot_y = [m3c_p m3c_m]; v_pilot_y = [vy_p_4_2 vy_m_4_2]; % Step 4: Get pilot bandwidths to estimate derivatives of m_D % 4th order Polynomial regression data_mat2 = [D nones Xx Xx2 Xx3 Xx4]; data_mat2_p = data_mat2(Xbin_bt_x==1,:); % data_mat only for positive X data_mat2_m = data_mat2(Xbin_lt_x==1,:); % data_mat only for negative X DT_pc = data_mat2_p(:,1); lambda_pc_4_4g_d = TT_pc\DT_pc; m41_4_d = 24*lambda_pc_4_4g_d(5,1); DT_mc = data_mat2_m(:,1); lambda_mc_4_4g_d = TT_mc\DT_mc; m42_4_d = 24*lambda_mc_4_4g_d(5,1); vy_p_plugin_4g_d = sum((DT_pc-TT_pc*lambda_pc_4_4g_d).^2)/(num_X_bt_x-5); vy_m_plugin_4g_d = sum((DT_mc-TT_mc*lambda_mc_4_4g_d).^2)/(num_X_lt_x-5); % Pilot bandiwidths for 3rd order LPR to estimate 3rd derivative with uniform kernel band_comp_p_d = (vy_p_plugin_4g_d/f_k/m41_4_d^2/num_X_bt_x)^(1/9); band_comp_m_d = (vy_m_plugin_4g_d/f_k/m42_4_d^2/num_X_lt_x)^(1/9); h3_p_d = C33K_U*band_comp_p_d; h3_m_d = C33K_U*band_comp_m_d; % Pilot bandwidths3rd order LPR to estimate 2nd derivative with uniform kernel h23_p_d = C23K_U*band_comp_p_d; % possible to use either v_pilot_24 or v_pilot_23 h23_m_d = C23K_U*band_comp_m_d; % Step 5: Estimate the second and the third derivatives of m_D % 3rd order LPR for the third derivatives Xbin_bet_xxh_d = Xbin_bt_x.*(X<=c+h3_p_d); % index for X in [x, x+h3_p] data_mat_4_p_d = data_mat2(Xbin_bet_xxh_d==1,:); DT_p = data_mat_4_p_d(:,1); T_p_d = data_mat_4_p_d(:,2:5); lambda_p_all_33_4_d = T_p_d\DT_p; m3c_p_d = 6*lambda_p_all_33_4_d(4,1); Xbin_bet_xhx_d = (X>=c-h3_m_d).*Xbin_lt_x; % index for X in [x-h3_m, x] data_mat_4_m_d = data_mat2(Xbin_bet_xhx_d==1,:); DT_m = data_mat_4_m_d(:,1); T_m_d = data_mat_4_m_d(:,2:5); lambda_m_all_33_4_d = T_m_d\DT_m; m3c_m_d = 6*lambda_m_all_33_4_d(4,1); Xbin_bet_xxh_d = (X>=c).*(X<=c+h23_p_d); % index for X in [x, x+h3_p] data_mat_4_p_d = data_mat2(Xbin_bet_xxh_d==1,:); DT_p = data_mat_4_p_d(:,1); T_p_d = data_mat_4_p_d(:,2:5); lambda_p_all_23_d = T_p_d\DT_p; m2c_p_2_d = 2*lambda_p_all_23_d(3,1); DT_p_hat = T_p_d*lambda_p_all_23_d; vy_p_4_2_d = sum((DT_p - DT_p_hat).^2)/(size(DT_p,1)-4); Xbin_bet_xhx_d = (X>=c-h23_m_d).*(X<=c); % index for X in [x-h3_m, x] data_mat_4_m_d = data_mat2(Xbin_bet_xhx_d==1,:); DT_m = data_mat_4_m_d(:,1); T_m_d = data_mat_4_m_d(:,2:5); lambda_m_all_23_d = T_m_d\DT_m; m2c_m_2_d = 2*lambda_m_all_23_d(3,1); DT_m_hat = T_m_d*lambda_m_all_23_d; vy_m_4_2_d = sum((DT_m - DT_m_hat).^2)/(size(DT_m,1)-4); v_pilot_t = [vy_p_4_2_d vy_m_4_2_d]; % Compute covariances Xbin_p_ai = Xbin_bet_xxh.*Xbin_bet_xxh_d; TT = data_mat(:,2:5); res_y_p = Y - TT*lambda_p_all_23; res_d_p = D - TT*lambda_p_all_23_d; cov_p_ai = (res_y_p.*Xbin_p_ai)'*res_d_p/(num_X_bt_x - 4); Xbin_m_ai = Xbin_bet_xhx.*Xbin_bet_xhx_d; res_y_m = Y - TT*lambda_m_all_23; res_d_m = D - TT*lambda_m_all_23_d; cov_m_ai = (res_y_m.*Xbin_m_ai)'*res_d_m/(num_X_lt_x - 4); cov_pilot = [cov_p_ai cov_m_ai]; m2_pilot_t = [m2c_p_2_d m2c_m_2_d]; m3_pilot_t = [m3c_p_d m3c_m_d]; % Step 6: Get the pilot estimate for tau options = optimset('Algorithm','interior-point','Display','off'); Qn_value = @(theta)Qn(theta,f_k,f1_pilot,m2_pilot_y,m3_pilot_y,v_pilot_y,n); % Lower bound for AI search area NN_min_AI = 3; X_lt_x = data_mat_m(:,3); X_bt_x = data_mat_p(:,3); sorted_X_bt_x = sort(X_bt_x); % ascending order sorted_X_lt_x_des_abs = abs(sort(X_lt_x,'descend')); NN_p_AI = sorted_X_bt_x(NN_min_AI,1); NN_m_AI = sorted_X_lt_x_des_abs(NN_min_AI,1); h_lb = [NN_p_AI,NN_m_AI]; % Upper bound for AI search area hu1 = sorted_X_bt_x(num_X_bt_x,1); hu2 = sorted_X_lt_x_des_abs(num_X_lt_x,1); h_ub = [hu1,hu2]; h_mat_local = zeros(10,2); min_val = zeros(10,1); h1_range = sorted_X_bt_x(num_X_bt_x,1) - sorted_X_bt_x(3,1); h2_range = sorted_X_lt_x_des_abs(num_X_lt_x,1) - sorted_X_lt_x_des_abs(3,1); for k=1:10 for j=1:10 h_ini_1 = sorted_X_bt_x(3,1) + 0.1*h1_range*j; h_ini_2 = sorted_X_lt_x_des_abs(3,1) + 0.1*h2_range*k; h_ini = [h_ini_1 h_ini_2]; [theta_2, fval_2] = fmincon(Qn_value,h_ini,[],[],[],[],h_lb,h_ub,[],options); h_mat_local(10*(k-1)+j,:) = theta_2; min_val(10*(k-1)+j,1) = fval_2; end end [~, min_index] = min(min_val); h_sharp = h_mat_local(min_index,:); [tau_pilot,~,~,~,~] = frdd_est(Y,D,X,c,h_sharp(1),h_sharp(2)); % Step 7: Get bandwidths that minimize MMSE m2_pilot = [m2_pilot_y m2_pilot_t]; m3_pilot = [m3_pilot_y m3_pilot_t]; v_pilot = [v_pilot_y v_pilot_t cov_pilot]; Qn_value_f = @(theta)Qn_FRDD(theta,f_k,f1_pilot,m2_pilot,m3_pilot,v_pilot,tau_pilot,n); h_mat_local_f = zeros(10,2); min_val_f = zeros(10,1); for k=1:10 for j=1:10 h_ini_1 = sorted_X_bt_x(3,1) + 0.1*h1_range*j; h_ini_2 = sorted_X_lt_x_des_abs(3,1) + 0.1*h2_range*k; h_ini = [h_ini_1 h_ini_2]; [theta_f, fval_f] = fmincon(Qn_value_f,h_ini,[],[],[],[],h_lb,h_ub,[],options); h_mat_local_f(j,:) = theta_f; min_val_f(j,1) = fval_f; end end [~, min_index] = min(min_val_f); h_opt = h_mat_local_f(min_index,:); [tau,se,n_p,n_m,tau_d] = frdd_est(Y,D,X,c,h_opt(1),h_opt(2)); % Compute bias corrected t-stat % Compute bias correction terms b11 and b12 Xbin_bt_xh_opt = (X >= c-h_opt(2)); Xbin_lt_xh_opt = (X <= c+h_opt(1)); Xbin_bet_xxh_opt = Xbin_bt_x.*Xbin_lt_xh_opt; % index for X in [x,x+h] Xbin_bet_xhx_opt = Xbin_bt_xh_opt.*Xbin_lt_x; % index for X in [x-h,x] data_trim_mat_p_opt = data_mat(Xbin_bet_xxh_opt==1,:); T_p_opt = data_trim_mat_p_opt(:,2:3); Kvec_p_opt = (1-abs(data_trim_mat_p_opt(:,3))/h_opt(1)).*(abs(data_trim_mat_p_opt(:,3))=c); Xbin_bet_xh = Xbin_bt_x.*(X<=c+h_p); % index for X in [x, x+h2_p] n_p = sum(Xbin_bet_xh); data_trim_mat_p = data_mat(Xbin_bet_xh==1,:); YT_p = data_trim_mat_p(:,1); T_p = data_trim_mat_p(:,2:3); Kvec_p = (1-abs(data_trim_mat_p(:,3))/h_p).*(abs(data_trim_mat_p(:,3))=c-h_m); % index for X in [x, x+h2_p] n_m = sum(Xbin_bet_hx); data_trim_mat_m = data_mat(Xbin_bet_hx==1,:); YT_m = data_trim_mat_m(:,1); T_m = data_trim_mat_m(:,2:3); Kvec_m = (1-abs(data_trim_mat_m(:,3)/h_m)).*(abs(data_trim_mat_m(:,3))