import pandas as pd
import numpy as np
from scipy.linalg import eig
= pd.read_csv("https://raw.githubusercontent.com/smanihwr/ml-latest-small/master/ratings.csv")
raw = raw.pivot(values="rating", columns="movieId", index="userId")
user_item_interactions = user_item_interactions.fillna(0) user_item_interactions
= np.array([
A 5,5,0,1],
[5,5,0,0],
[0,1,5,5],
[0,0,5,5],
[0,0,3,5]
[
])
# Get the singular vectors of V from the eigenvectors of the covariance matrix
= np.linalg.eig(A.T @ A)
V_eigen_values, V_unordered # We need to sort them by the magnitude of the eigenvalues
= np.argsort(V_eigen_values)[::-1]
idx_V = V_unordered[:,idx_V]
V
# Compute the singular vectors of U. We could also use the eingenvectors, but we need to base it on V to have the correct vector directions.
# U_eigen_values, U = np.linalg.eig(A @ A.T) this is similar but leads to incorrect directions for the eigenvectors
= A @ V / np.linalg.norm(A @ V, axis=0)
U
# The matrix D is the square root of the eigenvalues.
= np.sqrt(np.around(V_eigen_values[idx_V], decimals=10)) D
@ np.diag(D) @ np.matrix(V.T), decimals=1) np.around(np.matrix(U)
array([[ 5., 5., -0., 1.],
[ 5., 5., 0., -0.],
[ 0., 1., 5., 5.],
[-0., 0., 5., 5.],
[ 0., -0., 3., 5.]])
= np.linalg.svd(A)
U_, D_, Vt_ @ np.vstack((np.diag(D_), np.zeros((len(Vt_))))) @ np.matrix(Vt_), decimals=1) np.around(np.matrix(U_)
array([[ 5., 5., -0., 1.],
[ 5., 5., -0., -0.],
[-0., 1., 5., 5.],
[-0., 0., 5., 5.],
[-0., 0., 3., 5.]])
Truncated SVD
Truncate the SVD to 2 components by only keeping the two bigest eigenvalues
2]) np.matrix(U[:, :
matrix([[-0.23093819, -0.66810948],
[-0.16863574, -0.68636674],
[-0.59892473, 0.13274366],
[-0.57986295, 0.20070102],
[-0.47252267, 0.15693514]])
2]) @ np.diag(D[:2]) @ np.matrix(V[:,:2].T), decimals=1) np.around(np.matrix(U[:, :
array([[ 5. , 5. , 0.3, 0.8],
[ 5. , 5. , -0.2, 0.2],
[ 0.3, 0.7, 4.7, 5.3],
[-0.2, 0.2, 4.7, 5.3],
[-0.1, 0.2, 3.8, 4.3]])