#delimit ; * open log file; log using wild_bs_example_1.log , replace ; * set stata parameters; set mem 5m ; set more off ; * fix seed for replication purposes and; * set the number of bootstrap replications; set seed 365476247 ; global bootreps = 999; tempfile main bootsave ; use carton_sales_taxes; drop if year<2004; /* the data contains monthly market share of cigarette sales by carton (compared to pack) for 29 states over the 2001-2006 period so there are 29*12*6 = 2088 observations. I regress the market share on real taxes (state+federal in dollars/pack) and add state, year and month dummies. Because taxes are at the state level, you clustrer at the state level. The parameter we will generate bootstrap p-values for is on real_tax and the null hypothesis we will impose is ho: beta(real_tax)=0 */ * means of key covariates; sum carton_market_share real_tax; * construct the dummies used in analysis; xi i.state i.month i.year; di ; * run ols without clustered std errors, just for comparison; reg carton_market_share _I* real_tax; * now run ols and cluster at the state level; reg carton_market_share _I* real_tax, cluster(state); * save t-test as a global variable; global maint = _b[real_tax] / _se[real_tax] ; * now run OLS and impose null that real_tax=0; reg carton_market_share _I*; * output residuals; predict epshat , resid; predict yhat , xb ; * sort by state and temp save data; sort state; qui save `main' , replace ; * get the number of states; qui by state: keep if _n == 1 ; qui summ ; global numstates = r(N) ; * output the t-statistics for real_tax to a file; postfile bskeep t_wild using bs_results, replace; * iterate over the bootstrap replications; forvalues b = 1/$bootreps { ; /* wild bootstrap */ use `main', replace ; * with 50% probability constuct dummy; * that adds or substracts Radamaker error; qui by state: gen temp = uniform() ; qui by state: gen pos = (temp[1] < .5) ; gen wildresid = epshat * (2*pos - 1) ; * now construct y; gen wildy = yhat + wildresid ; * now regress y on all x variables; qui reg wildy _I* real_tax, cluster(state); * generate the t-stat; local bst_wild = _b[real_tax] / _se[real_tax] ; * add to the bottom of the post file; post bskeep (`bst_wild') ; } ; /* end of bootstrap reps */ * save the post file; postclose bskeep ; * clear the current data set; clear; * load up the wild t-stats; use bs_results; * figure out where the main-t is in the; * synthetic distribution; gen positive=$maint>0; gen pos=t_wild>$maint; gen neg=t_wild<$maint; gen reject=positive*pos + (1-positive)*neg; sum reject; local sumreject=r(sum); local p_value_wild=2*`sumreject'/$bootreps; local p_value_main=2*(ttail(($numstates-1),abs($maint))); di "Number BS reps = $bootreps"; di "P-value from clustered standard errors = `p_value_main'"; di "P-value from wild boostrap = `p_value_wild'"; log close ;