clear;
clc;
echo on;
%Tutorial 7

%% Linear regression to get parameters

%Activity coefficient models are used in Thermodynamics to account for
%deviations from ideal behaviour in mixture of chemical substances. The
%Margules model is extensively used to correlate activity coefficients in
%binary mixtures. It is mathematically simple and it requires two
%parameters. The Margules equation is represented as follows:

% (1) x1*Ln(gamma1) + x2*Ln(gamma2) = x1*x2*(A*x2+B*x1)

%where x1, x2, gamma1, gamma2 are the concentration and activity
%coefficients of species 1 and 2, respectively. A and B are constants
%independent of temperature or  composition.

%The equation for gamma1 is:

% (2) Ln(gamma1) = x2^2*(2B-A) + 2*x2^3*(A-B)

%The following experimental data were obtained from literature:

x1=[0.0246
0.0496
0.0648
0.0996
0.1498
0.2999
0.3718
0.4548
0.5549
0.6397
0.8031
0.8698
0.9478];

x2=1-x1;

gamma1=[5.2313
4.9516
5.0295
4.6721
3.959
2.4264
2.0725
1.781
1.549
1.3695
1.1698
1.0745
0.9998];

gamma2=[1.0036
1.0084
1.0153
1.0283
1.0502
1.1865
1.2918
1.4699
1.7619
2.1246
3.2094
4.6223
6.5437];

%Let's get first the values of A and B using the normal equations. Setting
%as y the lhs of equation 1 and rearranging the rhs:

y = x1.*log(gamma1)+x2.*log(gamma2);
a=[x1.*x2.^2 x1.^2.*x2];
k=inv(a'*a)*a';
x=k*y;

%The following parameters are obtained:

A=x(1)
B=x(2)

%Let's obtain the activity coefficient for substance 1

loggamma1=x2.^2*(2*B-A)+2*x2.^3*(A-B);

figure(1)
plot(x1,log(gamma1),'x',x1,loggamma1);
legend('Experiments','Margules')
xlabel('Concentration')
ylabel('Log(\gamma)')

%% Infinite dilution activity coefficient

%In Thermodynamics, it's useful to obtain the infinite dilution activity
%coefficient of species 1 (i.e. x2=1)):

%loggammainf = 2*B-A+2*(A-B);
loggammainf = A;
gammainf = exp(loggammainf);

%% Error in A and B

%Let's calculate now the standard deviation of the parameters. First we
%need to compute the residual.

resid = a*x-y;

%Assuming that the residuals are all normally distributed with the same
%standard deviation, we get:

vary = norm(resid)^2/(length(resid)-2); %The two comes from the loss of two
                                        %degrees of freedom (A and B)

%The matrix of covariance is:                                        
varx = k*diag(vary)*k'

%The diagonal elements of the matrix are the variances in x(1) and x(2).
%We are interested in the uncertainty in A and B, so we have to look at the
%off diagonal elements (covariances) matter too.

sigA=varx(1,1)^0.5
sigB=varx(2,2)^0.5


%% Error in infinite dilution activity coefficient

%We can also calculate the uncertainty of the activity coefficient at 
%infinite dilution

gradf = [exp(A) 0]; 

siggammainf = sqrt(gradf * varx * gradf'); %This is the error formula


%We can get the uncertainty of the activity coefficient at any other
%concentration


newa = [2*x2.^3 - x2.^2 2*(x2.^2 - x2.^3)]; %This is eq1 rearranged
myloggamma1 = newa * x;
varplot = sqrt(diag(newa*varx*newa'));
figure(2)
plot (x1,log(gamma1),'x',x1,myloggamma1,x1,myloggamma1+2*varplot,'--r',x1,myloggamma1-2*varplot,'--r')
legend('Experiments','Margules','2-sigma error')
xlabel('Concentration')
ylabel('Log(\gamma)')

%% Analysis of Residuals

%Looking at this graph, we note that very few of the data points fall
%within the 2 sigma limits!  There are two reasons for this: First, the
%error limits are for model predictions, and don't include the scatter in
%the data points.  Effectively it is the error in the mean, rather than the
%envelope of population standard deviation of the data.  We could recover
%the latter by adding the variance of the model prediction to the variance
%of the data points and taking the square root.

%A more serious error is that the residuals aren't really random - there is
%something wrong with either the model or the experiments!  This can be
%seen if we plot up the residuals:

figure(3)
plot(x1,resid,'x')
grid on
xlabel('Concentration')
ylabel('Residual')

%We can quantify this by looking at the covariance:
n=length(resid);
covariance=resid(1:n-1)'*resid(2:n)/vary/(n-1)
%Which is really high!  This means that we have overestimated the random
%error in the data points, and underestimated the error in the fitted
%coefficients.  To resolve this, we would need to go back and re-evaluate
%both the model used, and the experiments to determine the source of the
%discrepancy.

%Remember: Always Plot Your Residuals!
echo off