SVD

Singular value decomposition from scratch
ai
Published

September 16, 2021

import pandas as pd
import numpy as np
from scipy.linalg import eig

raw = pd.read_csv("https://raw.githubusercontent.com/smanihwr/ml-latest-small/master/ratings.csv")
user_item_interactions = raw.pivot(values="rating", columns="movieId", index="userId")
user_item_interactions = user_item_interactions.fillna(0)
A = np.array([
              [5,5,0,1],
              [5,5,0,0],
              [0,1,5,5],
              [0,0,5,5],
              [0,0,3,5]
])

# Get the singular vectors of V from the eigenvectors of the covariance matrix
V_eigen_values, V_unordered = np.linalg.eig(A.T @ A) 
# We need to sort them by the magnitude of the eigenvalues
idx_V = np.argsort(V_eigen_values)[::-1] 
V = V_unordered[:,idx_V]

# Compute the singular vectors of U. We could also use the eingenvectors, but we need to base it on V to have the correct vector directions.
# U_eigen_values, U = np.linalg.eig(A @ A.T) this is similar but leads to incorrect directions for the eigenvectors
U = A @ V / np.linalg.norm(A @ V, axis=0)

# The matrix D is the square root of the eigenvalues.
D = np.sqrt(np.around(V_eigen_values[idx_V], decimals=10))
np.around(np.matrix(U) @ np.diag(D) @ np.matrix(V.T), decimals=1)
array([[ 5.,  5., -0.,  1.],
       [ 5.,  5.,  0., -0.],
       [ 0.,  1.,  5.,  5.],
       [-0.,  0.,  5.,  5.],
       [ 0., -0.,  3.,  5.]])
U_, D_, Vt_ = np.linalg.svd(A)
np.around(np.matrix(U_) @ np.vstack((np.diag(D_), np.zeros((len(Vt_))))) @ np.matrix(Vt_), decimals=1)
array([[ 5.,  5., -0.,  1.],
       [ 5.,  5., -0., -0.],
       [-0.,  1.,  5.,  5.],
       [-0.,  0.,  5.,  5.],
       [-0.,  0.,  3.,  5.]])

Truncated SVD

Truncate the SVD to 2 components by only keeping the two bigest eigenvalues

np.matrix(U[:, :2])
matrix([[-0.23093819, -0.66810948],
        [-0.16863574, -0.68636674],
        [-0.59892473,  0.13274366],
        [-0.57986295,  0.20070102],
        [-0.47252267,  0.15693514]])
np.around(np.matrix(U[:, :2]) @ np.diag(D[:2]) @ np.matrix(V[:,:2].T), decimals=1)
array([[ 5. ,  5. ,  0.3,  0.8],
       [ 5. ,  5. , -0.2,  0.2],
       [ 0.3,  0.7,  4.7,  5.3],
       [-0.2,  0.2,  4.7,  5.3],
       [-0.1,  0.2,  3.8,  4.3]])