Home | Download Zip
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% In this simple example, we assume there is a mixture of 2 dimensional
% gaussian variables, where the means and the covariances are unknown. But we know that the
% covariances are diagonal and isotropic. Therefore what we don't know
% are the mean, mu, and the scalar factor sigma. We use the dirichlet process to model
% the problem and do clustering. We assume it has a conjugate prior for
% mu and sigma. Since the likelihood is gaussian, the conjugate prior
% should have a normal-gamma distribution. More specifically, sigma has gamma
% distribution and mu has multivariate student-t distribution. The purpose
% of using dirichlet process is that we do not want to specify the number
% of components in the mixture, but instead give a prior over 1 to
% infinite. Then the Gibbs sampler is used to draw sample from the posterior
% distribution based on the observations. See [2] for detail about the
% algorithm.
%
% most implementation is encapsulated in a DirichMix class (DirichMix.m)
%
% distributable under GPL
% written by Zhiyuan Weng, Nov 26 2011
%
%
% Reference:
% [1] D Fink, "A Compendium of Conjugate Priors"
% [2] R Neal, "Markov Chain Sampling Methods for Dirichlet Process Mixture Models"
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

clear
clc
dirich = DirichMix; % construct an object of the class
K0 = 5;  % number of clusters
Ms = 30; % number of observations associated with each cluster
xp = []; % observations
% generate observations
for k = 1:K0
    [mu,sig] = dirich.PriorSampleGaussGamma; % sample from prior
    xp = [xp;[randn(Ms,2)*sig+repmat(mu,Ms,1),repmat(k,Ms,1)]];
end
subplot(1,2,1)
scatter(xp(:,1),xp(:,2),25,xp(:,3));
title('true clusters');
axis([-5,5,-5,5]);
axis square;
dirich.InputData(xp(:,1:2));
dirich.DoIteration(100); % 100 iterations
subplot(1,2,2)
dirich.PlotData
title('clustering results');
axis([-5,5,-5,5]);
axis square;
[#iter]	# of clusters	|	# of data in each cluster
[1]	K=47	|	22	8	1	1	1	1	7	11	1	10	1	1	2	1	16	1	2	1	1	1	2	4	4	4	1	3	13	5	3	1	1	1	1	1	1	3	1	1	2	1	1	1	1	1	1	1	1	
[2]	K=22	|	32	2	2	1	1	5	3	25	3	5	2	1	3	6	21	16	2	12	3	2	1	2	
[3]	K=15	|	36	3	5	1	1	12	14	27	8	3	3	2	8	5	22	
[4]	K=16	|	41	4	3	1	1	18	11	26	10	3	1	1	8	1	20	1	
[5]	K=11	|	39	1	2	19	14	16	12	29	11	6	1	
[6]	K=9	|	36	8	5	19	17	13	11	30	11	
[7]	K=9	|	37	7	3	17	20	10	13	30	13	
[8]	K=11	|	30	10	1	21	22	8	19	30	7	1	1	
[9]	K=10	|	35	9	1	24	18	12	14	30	6	1	
[10]	K=11	|	42	7	1	26	17	13	8	30	4	1	1	
[11]	K=12	|	37	6	2	27	19	11	11	30	3	2	1	1	
[12]	K=9	|	37	6	2	27	17	13	15	30	3	
[13]	K=9	|	29	8	2	17	7	23	21	30	13	
[14]	K=9	|	20	11	3	25	5	25	26	30	5	
[15]	K=9	|	12	12	2	25	4	26	34	30	5	
[16]	K=8	|	12	10	2	30	4	26	36	30	
[17]	K=8	|	10	11	1	30	12	18	38	30	
[18]	K=8	|	11	9	30	30	16	14	38	2	
[19]	K=10	|	15	7	30	30	15	15	33	3	1	1	
[20]	K=10	|	13	7	29	30	7	23	35	3	2	1	
[21]	K=11	|	7	12	29	30	6	24	36	2	2	1	1	
[22]	K=8	|	9	5	30	30	6	24	39	7	
[23]	K=8	|	7	14	30	30	5	25	35	4	
[24]	K=8	|	13	10	30	30	4	26	33	4	
[25]	K=9	|	8	5	30	30	11	19	38	8	1	
[26]	K=13	|	7	1	30	28	12	18	42	5	1	2	2	1	1	
[27]	K=13	|	5	2	29	29	14	16	43	4	1	4	1	1	1	
[28]	K=10	|	14	4	30	30	23	7	32	6	3	1	
[29]	K=8	|	13	2	30	30	23	7	37	8	
[30]	K=9	|	16	2	30	29	23	7	36	5	2	
[31]	K=9	|	17	1	30	30	23	7	32	9	1	
[32]	K=10	|	22	1	30	30	25	5	25	10	1	1	
[33]	K=8	|	18	1	30	30	30	2	25	14	
[34]	K=8	|	17	12	30	30	30	4	26	1	
[35]	K=8	|	17	11	30	30	30	4	27	1	
[36]	K=9	|	20	10	30	30	30	4	24	1	1	
[37]	K=8	|	22	7	29	30	30	8	23	1	
[38]	K=11	|	15	3	28	30	30	15	23	2	2	1	1	
[39]	K=10	|	22	3	29	30	30	14	12	1	5	4	
[40]	K=9	|	28	1	30	30	30	19	10	1	1	
[41]	K=8	|	31	2	30	30	30	19	6	2	
[42]	K=8	|	30	2	30	30	30	18	8	2	
[43]	K=8	|	32	2	29	30	30	17	7	3	
[44]	K=11	|	28	2	28	30	30	16	10	3	1	1	1	
[45]	K=11	|	29	1	29	30	30	17	5	1	5	2	1	
[46]	K=9	|	31	1	30	30	30	18	6	3	1	
[47]	K=11	|	26	2	30	30	30	19	5	3	3	1	1	
[48]	K=8	|	35	2	30	30	30	17	3	3	
[49]	K=8	|	39	1	30	30	30	17	1	2	
[50]	K=7	|	42	9	30	30	30	8	1	
[51]	K=7	|	43	9	29	30	30	8	1	
[52]	K=6	|	43	10	30	30	30	7	
[53]	K=5	|	50	10	30	30	30	
[54]	K=5	|	49	11	30	30	30	
[55]	K=5	|	48	12	30	30	30	
[56]	K=8	|	46	12	30	29	30	1	1	1	
[57]	K=7	|	47	10	30	30	30	2	1	
[58]	K=7	|	47	10	30	29	30	3	1	
[59]	K=6	|	46	11	30	30	30	3	
[60]	K=6	|	45	12	30	30	30	3	
[61]	K=6	|	40	14	30	30	30	6	
[62]	K=7	|	40	15	29	30	30	5	1	
[63]	K=6	|	40	15	30	30	30	5	
[64]	K=8	|	34	16	30	30	30	8	1	1	
[65]	K=7	|	44	11	30	30	30	4	1	
[66]	K=9	|	43	10	29	30	30	4	1	2	1	
[67]	K=8	|	47	9	29	30	30	2	1	2	
[68]	K=9	|	47	7	27	30	30	4	3	1	1	
[69]	K=9	|	43	12	29	30	30	3	1	1	1	
[70]	K=7	|	42	13	30	30	30	3	2	
[71]	K=9	|	35	10	30	30	30	4	9	1	1	
[72]	K=11	|	34	8	30	30	29	1	13	1	1	1	2	
[73]	K=8	|	37	4	30	30	30	4	8	7	
[74]	K=9	|	38	4	29	30	30	4	7	7	1	
[75]	K=9	|	38	6	30	30	30	4	5	6	1	
[76]	K=9	|	31	6	30	30	30	4	8	10	1	
[77]	K=9	|	24	7	30	30	30	4	11	13	1	
[78]	K=8	|	23	6	30	30	30	4	13	14	
[79]	K=10	|	21	8	30	29	30	3	18	8	1	2	
[80]	K=12	|	29	5	30	28	30	4	9	7	1	2	1	4	
[81]	K=9	|	26	9	30	29	30	5	11	6	4	
[82]	K=10	|	35	10	29	30	30	4	7	1	3	1	
[83]	K=10	|	46	5	30	30	30	2	3	1	2	1	
[84]	K=7	|	48	9	30	30	30	2	1	
[85]	K=8	|	44	8	30	30	30	6	1	1	
[86]	K=7	|	43	10	30	30	30	6	1	
[87]	K=7	|	45	11	30	30	30	3	1	
[88]	K=6	|	47	10	30	30	30	3	
[89]	K=5	|	50	10	30	30	30	
[90]	K=5	|	54	6	30	30	30	
[91]	K=6	|	48	11	30	30	30	1	
[92]	K=7	|	50	7	30	30	30	2	1	
[93]	K=7	|	51	6	30	30	30	2	1	
[94]	K=7	|	50	7	30	30	30	2	1	
[95]	K=7	|	48	11	29	30	30	1	1	
[96]	K=5	|	51	9	30	30	30	
[97]	K=5	|	50	10	30	30	30	
[98]	K=5	|	48	12	30	30	30	
[99]	K=7	|	47	12	29	30	30	1	1	
[100]	K=7	|	50	9	29	30	30	1	1