from sklearn.datasets import load_digits
import seaborn as sns
from sklearn.decomposition import PCA
import pandas as pd
import numpy as np
= pd.DataFrame(load_digits()['data'])
digits = load_digits(return_X_y=True)[1] classes
PCA
The principal components are the eigenvectors+eigenvalues of the Covariance matrix of our data.
This is because we are looking for the “Direction of stretching and how much streching happens” regarding the variance of our data.
= PCA(n_components=2).fit_transform(digits)
low_dim_digits =low_dim_digits[:,0], y=low_dim_digits[:,1], hue=classes) sns.scatterplot(x
= digits - digits.mean()
digits_normed
# compute the covariance matrix
= digits_normed.T @ digits_normed / len(digits_normed) # same as digits_normed.cov()
cov_matrix = np.linalg.eig(cov_matrix)
eigen_values, eigen_vectors
eigen_values, eigen_vectors
# Sort eigen values end eigen vectors
= np.argsort(eigen_values)[::-1]
sorted_index = eigen_values[sorted_index]
sorted_eigenvalue = eigen_vectors[:,sorted_index]
sorted_eigenvectors
# Select the 2 best
= sorted_eigenvectors[:, 0:2]
eigenvector_subset
= np.dot(eigenvector_subset.transpose(), digits_normed.transpose()).transpose()
X_reduced =X_reduced[:,0], y=X_reduced[:,1], hue=classes) sns.scatterplot(x