* this line defines the semicolon as the line delimiter; # delimit ; * set memory for 10 meg; set memory 10m; * write results to a log file; log using c:\bill\econ626\stata\tobit.log,replace; *read in STATA data; use c:\bill\econ626\stata\tobit; *describe what is in data set; desc; * construct some new variables then label them; * after you construct new variables, compress the data; gen age2=age*age; gen earnwkl=ln(earnwke); gen union=unionm==1; gen topcode=earnwke==999; gen black=race==2; gen hispanic=race==3; label var age2 "age squared"; label var earnwkl "log earnings per week"; label var topcode "=1 if earnwkl is topcoded"; label var union "1=in union, 0 otherwise"; label var black "=1 if black, =0 otherwise"; label var hispanic "=1 if hispanic, =0 otherwise"; * get frequencie of topcode; tabulate topcode; *run simple regression on topcoded data; reg earnwkl age age2 educ black hispanic union; * run tobit model; * here, ul specifies that the dependent variable is; * topcoded above (upper censoring); tobit earnwkl age age2 educ black hispanic union, ul; * construct quick fix for topcoded wages; * replace ln(999) with ln(e[y|y>=999]); * estimate e[y|y>=999] assuming tail of income; * distribution is pareto. if income above A is; * pareto and q is the fraction of wages above T; * then the pareto parameter is ln(q)/(ln(A) - ln(T)); * and e[y|y>=t] = alpha x T/(alpha-1); * in this case, A=750; * fraction of people with income>=750 with topcoded; * wages -- attach mean to all topcoded wages; egen q=mean(topcode) if earnwke>=750; gen alpha=ln(q)/(ln(750) - ln(999)); gen ey_y999=999*alpha/(alpha-1); sum q alpha ey_y999; gen earnwkl2=earnwkl; replace earnwkl2=ln(ey_y999) if topcode==1; * run regression on model with quick fix for top coded wages; reg earnwkl2 age age2 educ black hispanic union; * artifically topcode wages at 750; gen top750=earnwke>=750; gen earnwkl3=top750*ln(750) + (1-top750)*ln(earnwke); * run regression on model with artifically topcoded wages; reg earnwkl3 age age2 educ black hispanic union; * run tobit model on data artifically topcoded at \$750; tobit earnwkl3 age age2 educ black hispanic union, ul; * do quick fix. set A=600, calculate the; * fraction of people with income>=600 with topcoded; * wages -- attach mean to all topcoded wages; egen q1=mean(top750) if earnwke>=600; gen alpha1=ln(q1)/(ln(600) - ln(750)); gen ey_y750=750*alpha1/(alpha1-1); sum q1 alpha1 ey_y750; gen earnwkl4=earnwkl3; replace earnwkl4=ln(ey_y750) if top750==1; * run regression on model with quick fix for top coded wages; reg earnwkl4 age age2 educ black hispanic union; * close log file; log close;