function drawAxes() %label the axes xlabel('x') ylabel('y') zlabel('z') %plot the axes hold on plot3([-100 100], [0 0], [0 0], 'r', 'LineWidth', 3) plot3([0 0], [-100 100], [0 0], 'g', 'LineWidth', 3) plot3([0 0], [0 0], [-100 100], 'b', 'LineWidth', 3) axis image num_pts = 100; %create some data o = [75 -60 -85]; m = [2 -3 1]; t = 200*rand(num_pts, 1)-100; p = repmat(o, [num_pts 1]) + repmat(t, [1 3]) .* repmat(m, [num_pts 1]); %plot the data figure plot3(p(:, 1), p(:, 2), p(:, 3), 'k.') drawAxes(); %remove the mean - PCA is always performed on mean-centered data i.e. there %is an assumptions that the data is centered around the origin. p_mean = mean(p, 1); p1 = p - repmat(p_mean, [num_pts 1]); %plot the mean-centered data figure plot3(p1(:, 1), p1(:, 2), p1(:, 3), 'k.') drawAxes(); %compute the principal component(s) [u s v] = svd(p1); %look at the sizes of u, s, and v size(u) size(s) size(v) %we can reconstruct p1 from u, s, and v WITHOUT losing any information recon_p1 = u * s * v'; plot3(recon_p1(:, 1), recon_p1(:, 2), recon_p1(:, 3), 'ro') %look at s - its all zeros except for one entry s(1:3, 1:3) %this means that we don't actually need the entire u and v matrices %we can actually reduce u and v also and reconstruct the %data from the smaller matrices recon_p1 = u(:, 1) * s(1, 1) * v(:, 1)'; figure plot3(p1(:, 1), p1(:, 2), p1(:, 3), 'k.') drawAxes(); plot3(recon_p1(:, 1), recon_p1(:, 2), recon_p1(:, 3), 'ro') %now look at v v1 = v(:, 1); %corresponds to s(1,1) v2 = v(:, 2); %corresponds to s(2,2) v3 = v(:, 3); %corresponds to s(3,3) figure plot3(p1(:, 1), p1(:, 2), p1(:, 3), 'k.') drawAxes(); %plot the v1s on the data plot3([0 50*v1(1)], [0 50*v1(2)], [0 50*v1(3)], 'm', 'LineWidth', 3); plot3([0 50*v2(1)], [0 50*v2(2)], [0 50*v2(3)], 'c', 'LineWidth', 3); plot3([0 50*v3(1)], [0 50*v3(2)], [0 50*v3(3)], 'y', 'LineWidth', 3); %v1 is the most important axis since s(1,1) is the largest value in the s %matrix --> v1 is the LARGEST PRINCIPAL COMPONENT v1 / v1(3) %now what would happen if the data did not lie exactly along a line? This %happens all the time in the real world and could be due to noise or could %just be some features in the data p_noise = p + 5*randn(size(p)); %plot the data figure plot3(p_noise(:, 1), p_noise(:, 2), p_noise(:, 3), 'k.') drawAxes(); p_mean = mean(p_noise, 1); p1 = p_noise - repmat(p_mean, [num_pts 1]); [u s v] = svd(p1); %as before we can reconstruct the data perfectly if we use all of u, s, and %v recon_p1 = u * s * v'; plot3(recon_p1(:, 1), recon_p1(:, 2), recon_p1(:, 3), 'ro') % now the new s matrix is different from what we had before s(1:3, 1:3) %as are the new data axes v v1 = v(:, 1); v2 = v(:, 2); v3 = v(:, 3); v1 / v1(3) figure plot3(p1(:, 1), p1(:, 2), p1(:, 3), 'k.') drawAxes(); plot3(recon_p1(:, 1), recon_p1(:, 2), recon_p1(:, 3), 'ro') plot3([0 50*v1(1)], [0 50*v1(2)], [0 50*v1(3)], 'm', 'LineWidth', 3); plot3([0 50*v2(1)], [0 50*v2(2)], [0 50*v2(3)], 'c', 'LineWidth', 3); plot3([0 50*v3(1)], [0 50*v3(2)], [0 50*v3(3)], 'y', 'LineWidth', 3); %what if we reconstruct using just the 1st component recon_p1 = u(:, 1) * s(1, 1) * v(:, 1)'; figure plot3(p1(:, 1), p1(:, 2), p1(:, 3), 'k.') hold on plot3(recon_p1(:, 1), recon_p1(:, 2), recon_p1(:, 3), 'ro') drawAxes(); %and if we reconstruct using 2 components recon_p1 = u(:, 1:2) * s(1:2, 1:2) * v(:, 1:2)'; figure plot3(p1(:, 1), p1(:, 2), p1(:, 3), 'k.') hold on plot3(recon_p1(:, 1), recon_p1(:, 2), recon_p1(:, 3), 'ro') drawAxes(); %MusicBox demo %http://thesis.flyingpudding.com/videos/demo/index.html