* this data for this program are a random sample;
* of 10k observations from the data used in;
* evans, farrelly and montgomery, aer, 1999;
* the data are indoor workers in the 1991 and 1993;
* national health interview survey. the survey;
* identifies whether the worker smoked and whether;
* the worker faces a workplace smoking ban;
* set semi colon as the end of line;
# delimit;
* ask it NOT to pause;
set more off;
* open log file;
log using workplace1.log,replace;
* use the workplace data set;
use workplace1;
* print out variable labels;
desc;
* get summary statistics;
sum;
* run a linear probability model for comparison purposes;
* estimate white standard errors to control for heteroskedasticity;
reg smoker age incomel male black hispanic
hsgrad somecol college worka, robust;
* run probit model;
probit smoker age incomel male black hispanic
hsgrad somecol college worka;
* ask for marginal effects/treatment effects;
mfx compute;
* the same type of variables can be produced with;
* prchange. this command is however more flexible;
* in that you can change the reference individual;
prchange, help;
* get marginal effect/treatment effects for specific person;
* male, age 40, college educ, white, without workplace smoking ban;
* if a variable is not specified, its value is assumed to be;
* the sample mean. in this case, the only variable i am not;
* listing is mean log income;
prchange, x(male=1 age=40 black=0 hispanic=0 hsgrad=0 somecol=0 worka=0);
* get marginal effects using dprobit;
dprobit smoker age incomel male black hispanic
hsgrad somecol college worka;
*predict probability of smoking;
predict pred_prob_smoke;
* get detailed descriptive data about predicted prob;
sum pred_prob, detail;
* predict binary outcome with 50% cutoff;
gen pred_smoke1=pred_prob_smoke>=.5;
label variable pred_smoke1 "predicted smoking, 50% cutoff";
* compare actual values;
tab smoker pred_smoke1, row col cell;
* using a wald test, test the null hypothesis that;
* all the education coefficients are zero;
test hsgrad somecol college;
* how to run the same tets with a -2 log like test;
* estimate the unresticted model and save the estimates ;
* in urmodel;
probit smoker age incomel male black hispanic
hsgrad somecol college worka;
estimates store urmodel;
* estimate the restricted model. save results in rmodel;
probit smoker age incomel male black hispanic
worka;
estimates store rmodel;
lrtest urmodel rmodel;
* run logit model;
logit smoker age incomel male black hispanic
hsgrad somecol college worka;
* ask for marginal effects/treatment effects;
* logit model;
mfx compute;
* run dprobit model;
dprobit smoker worka age incomel male black hispanic
hsgrad somecol college;
* run probit model;
probit smoker worka age incomel male black hispanic
hsgrad somecol college;
* this subroutine generates the marginal effects for a probit model;
* the notation follows greene so beta is (k x 1) and xbar is (k x 1);
* so beta1`xbar is a scalar;
probit smoker worka age incomel male black hispanic hsgrad somecol college;
matrix betat=e(b); * get beta from probit (1 x k);
matrix beta=betat';
matrix covp=e(V); * get v/c matric from probit (k x k);
* get means of x -- call it xbar (k x 1);
* must be the same order as in the probit statement;
matrix accum zz = worka age incomel male black hispanic hsgrad somecol college, means(xbart);
matrix xbar=xbart'; * transpose beta;
matrix xbeta=beta'*xbar; * get xbeta (scalar);
matrix pdf=normalden(xbeta[1,1]); * evaluate std normal pdf at xbarbeta;
matrix k=rowsof(beta); * get number of covariates;
matrix Ik=I(k[1,1]); * construct I(k);
matrix G=Ik-xbeta*beta*xbar'; * construct G;
matrix v_c=(pdf*pdf)*G*covp*G'; * get v-c matrix of marginal effects;
matrix me= beta*pdf; * get marginal effects;
matrix se_me1=cholesky(diag(vecdiag(v_c))); * get square root of main diag;
matrix se_me=vecdiag(se_me1)'; *take diagonal values;
matrix z_score=vecdiag(diag(me)*inv(diag(se_me)))'; * get z score;
matrix results=me,se_me,z_score; * construct results matrix;
matrix colnames results=marg_eff std_err z_score; * define column names;
matrix list results; * list results;
* this is an example of a marginal effect for a dichotomous outcome;
* in this case, set the 1st variable worka as 1 or 0;
matrix x1=xbar;
matrix x1[1,1]=1;
matrix x0=xbar;
matrix x0[1,1]=0;
matrix xbeta1=beta'*x1;
matrix xbeta0=beta'*x0;
matrix prob1=normal(xbeta1[1,1]);
matrix prob0=normal(xbeta0[1,1]);
matrix me_1=prob1-prob0;
matrix pdf1=normalden(xbeta1[1,1]);
matrix pdf0=normalden(xbeta0[1,1]);
matrix G1=pdf1*x1 - pdf0*x0;
matrix v_c1=G1'*covp*G1;
matrix se_me_1=sqrt(v_c1[1,1]);
* marginal effect of workplace bans;
matrix list me_1;
* standard error of workplace a;
matrix list se_me_1;
log close;