clear
clc
echo on
%This script illustrates some elementary properties
%of the normal or Gaussian distribution.  The normal
%distribution is defined by the -probability density-
%the probability of a measurement being in the range
%[x,x+dx] divided by the width of the interval.  It
%is equal to the derivative of the cumulative
%probability.  For a Gaussian distribution, the density
%has the form:
%
%p(x) = 1/((2*pi)^.5*sigma)*exp(-(x-mu)^2/(2*sigma^2))
%
%Which is characterized by both the population mean mu 
%and the population standard deviation sigma.  Let's plot
%this distributuion:

mu=0;
sigma=1;
x=[-3:.01:3];
p = 1/((2*pi)^.5*sigma)*exp(-(x-mu).^2/(2*sigma^2));
figure(1)
plot(x,p)
xlabel('x')
ylabel('probability density')

%We can also use Matlab to generate this probability distribution
%using the randn command.  This produces an array of random 
%numbers normally distributed around zero, with standard deviation
%of one.
n=400;
z=randn(n,1);
%Using the histogram plot we can get the usual bell curve:
figure(2)
dx=.2;
xbins=[-3:dx:3];
hist(z,xbins)
xlabel('x')
ylabel('Number of Observations in evenly spaced bins')
hold on
plot(x,p*n*dx,'r')
hold off
title(['probability density * ',num2str(n*dx)])


%As you can see, the probability of a given measurement
%occuring falls off rapidly as we move away from the mean.
%We can look at the cumulative probability, which is either
%obtained by integrating p(x), or (more conveniently) using
%the function erf, defined as the integral of:
%2/sqrt(pi) * exp(-t^2) from zero to x.  Thus:
P = 0.5 + erf((x-mu)/sigma/sqrt(2))/2;

figure(3)
plot(x,P)
xlabel('x')
ylabel('cumulative probability')
%From this plot you can see that there is about a 31% probability
%of a measurement lying 1 sigma above the mean or 1 sigma below
%the mean.  That reduces to 5% for 2 sigma, and so on.
pause
%Even better than a histogram, the cumulative probability provides
%a very easy way to look at an experimental probability distribution.  
%All we have to do is to sort the data from lowest to highest:
zs=sort(z);
%The cumulative probability is just the fraction below each value:
cumprob=([1:n]-.5)/n;
%And we can plot it up:


figure(3)
plot(zs,cumprob)
xlabel('x')
ylabel('cumulative probability')
%You can see that this looks just like the cumulative probability
%we had before, except it is alot more noisy.  Let's compare the
%two:

figure(3)
hold on
plot(x,P,'r')
hold off
%which demonstrates the equivalence.
%We can calculate the mean of the sample:
mean=sum(z)/n
%and the variance:
variance = sum((z-mean).^2)/(n-1)
%Note that (usually) the variance is much less accurate than the mean.
%This is because we are trying to estimate a higher order moment,
%and that in general takes alot more data!.

%We can see the effect of more data by increasing n:
new=10000;
znew=randn(new,1);
zsnew=sort(znew);
cumprobnew=([1:new]-.5)/new;
figure(3)
hold on
plot(zsnew,cumprobnew,'g')
hold off
axis([-2.5 2.5 0 1])
%We can calculate the mean of the sample:
meannew=sum(znew)/new
%and the variance:
variancenew = sum((znew-mean).^2)/(new-1)
%Which is (usually) an order of magnitude closer to the correct values of
%zero and one.  The code will produce a different result every time you run
%it!

echo off