Skip to content

Commit b952e9f

Browse files
committed
Update ddasp_exercise_slides.tex
PCA mods
1 parent 6e92061 commit b952e9f

File tree

1 file changed

+54
-24
lines changed

1 file changed

+54
-24
lines changed

slides/ddasp_exercise_slides.tex

Lines changed: 54 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -2678,14 +2678,16 @@ \subsection{Exercise 07}
26782678

26792679
\end{frame}
26802680

2681-
\begin{frame}{Ex07: Audio Features}
2681+
%\begin{frame}{Ex07: Audio Features}
26822682
% empty slide for clever touch
2683-
\end{frame}
2684-
2683+
%\end{frame}
26852684

2685+
\subsection{Exercise 08}
26862686

2687+
%\begin{frame}{Ex08: Principal Component Analysis (PCA)}
2688+
% empty slide for clever touch
2689+
%\end{frame}
26872690

2688-
\subsection{Exercise 08}
26892691

26902692
\begin{frame}{Ex08: Principal Component Analysis (PCA)}
26912693
Objectives
@@ -2697,27 +2699,54 @@ \subsection{Exercise 08}
26972699
\end{frame}
26982700

26992701

2702+
\begin{frame}[t]{Recap Diagonalization / SVD}
2703+
2704+
$\cdot$ Assume $N \times F$ \underline{full-column rank} matrix $\bm{X}$ with \underline{mean-free columns}
2705+
2706+
$\cdot$ Then $\bm{C}_X = \frac{1}{N-1}\bm{X}^\mathrm{T}\bm{X}$ is symmetric and positive definite (i.e. all \underline{eigenvalues are non-negative})
2707+
2708+
$\cdot$ Eigendecomposition (cf. Spectral Theorem)
2709+
2710+
$$\bm{C}_X \bm{v}_1 = \lambda_1 \bm{v}_1$$
2711+
2712+
$\bm{v}_1$ is an eigenvector of $\bm{C}_X$ corresponding to the largest eigenvalue denoted by $\lambda_1$
2713+
2714+
$\cdot$ SVD
2715+
2716+
$$\bm{X} \bm{v}_1 = \sigma_1 \bm{u}_1$$
2717+
2718+
the same $\bm{v}_1$ is a right singular vector of $\bm{X}$ corresponding to the largest singular value $\sigma_1$
2719+
2720+
$\cdot$ Then it is simple to show
2721+
2722+
$$\text{variance related:}\,\,\frac{\sigma_1^2}{N-1} = \lambda_1 \qquad\qquad \text{std dev related:}\,\,\frac{\sigma_1}{\sqrt{N-1}} = \sqrt{\lambda_1}$$
2723+
2724+
$\cdot$ This also holds for all other $1\leq f \leq F$ sorted $\lambda_f$ vs. $\sigma_f$ and their corresponding $\bm{v}_f$
2725+
2726+
2727+
\end{frame}
2728+
27002729
\begin{frame}[t]{Principal Component Analysis (PCA)}
27012730

2702-
PCA is typically applied on mean-free data
2731+
PCA is typically applied on \textbf{mean-free} data
27032732

27042733
$\cdot$ for an $N \times F$ full-column rank matrix $\bm{X}$ we ensure that each column is mean-free by
27052734
$$\bm{X}_{N \times F} \leftarrow \bm{X}_{N \times F} - \frac{1}{N} \bm{1}_{N \times N} \bm{X}_{N \times F}$$
27062735

27072736
$\cdot$ for an $F \times N$ full-row rank matrix $\bm{X}$ we ensure that each row is mean-free by
27082737
$$\bm{X}_{F \times N} \leftarrow \bm{X}_{F \times N} - \frac{1}{N} \bm{X}_{F \times N} \bm{1}_{N \times N}$$
27092738

2710-
PCA is often additionally performed on unit-variance preprocessed data, cf. function zscore()
2739+
PCA is often additionally (especially if features have different physical units!) performed on \textbf{unit-variance} preprocessed data, cf. function zscore()
27112740

2712-
this then yields a total variance of $$\mathrm{trace}(\mathrm{cov}(\mathrm{zscore}(\bm{X})))= F$$
2741+
$\cdot$ this normalization yields a total variance of $$\mathrm{trace}(\mathrm{cov}(\mathrm{zscore}(\bm{X})))= F$$
27132742

27142743
which the PCA spreads over the principal component (PC) scores
27152744

27162745
\end{frame}
27172746

27182747

27192748

2720-
\begin{frame}[t]{Ex08: Principal Component Analysis (PCA) via SVD}
2749+
\begin{frame}[t]{PCA via SVD}
27212750

27222751
$\cdot$ for $\bm{X}_c \in\mathbb{R}$, $\bm{X}_c = \bm{X}_r^\mathrm{T}$, $\bm{F}_c = \bm{F}_r^\mathrm{T}$, SVD matrices $\bm{U} \bm{\Sigma} \bm{V}^\mathrm{T}$ for $\bm{X}_c$
27232752

@@ -2805,9 +2834,10 @@ \subsection{Exercise 08}
28052834

28062835

28072836

2808-
\begin{frame}[t]{Ex08: Principal Component Analysis (PCA) via Covariance}
2837+
\begin{frame}[t]{PCA via Covariance}
28092838

2810-
$\cdot$ for $\bm{X}_c \in\mathbb{R}$, $\bm{X}_c = \bm{X}_r^\mathrm{T}$, $\bm{F}_c = \bm{F}_r^\mathrm{T}$, SVD matrices $\bm{U} \bm{\Sigma} \bm{V}^\mathrm{T}$ for $\bm{X}_c$
2839+
$\cdot$ full-column rank $\bm{X}_c \in\mathbb{R}$,\,$\bm{X}_c = \bm{X}_r^\mathrm{T}$, $\bm{F}_c = \bm{F}_r^\mathrm{T}$;\,
2840+
symmetric positive definite covariance matrix $\bm{C}_X$
28112841

28122842
$\cdot$ PC scores are ortho\underline{gonal} and variance-sorted, PC loadings are ortho\underline{normal}
28132843

@@ -2845,7 +2875,7 @@ \subsection{Exercise 08}
28452875
\begin{minipage}[t]{0.49\textwidth}
28462876
covariance matrix $\bm{C}_X = \frac{1}{N-1}\bm{X}_c^\mathrm{T}\bm{X}_c$
28472877

2848-
diagonalization (with SVD) $\bm{C}_X = \bm{V} \bm{\Lambda} \bm{V}^\mathrm{T}$
2878+
(sorted!) diagonalization $\bm{C}_X = \bm{V} \bm{\Lambda} \bm{V}^\mathrm{T}$
28492879

28502880
PC scores $\bm{F}_c = \bm{X}_c \bm{V}$
28512881

@@ -2856,7 +2886,7 @@ \subsection{Exercise 08}
28562886
\begin{minipage}[t]{0.49\textwidth}
28572887
covariance matrix $\bm{C}_X = \frac{1}{N-1}\bm{X}_r\bm{X}_r^\mathrm{T}$
28582888

2859-
diagonalization (with SVD) $\bm{C}_X = \bm{V} \bm{\Lambda} \bm{V}^\mathrm{T}$
2889+
(sorted) diagonalization $\bm{C}_X = \bm{V} \bm{\Lambda} \bm{V}^\mathrm{T}$
28602890

28612891
PC scores $\bm{F}_r = \bm{V}^\mathrm{T} \bm{X}_r$
28622892

@@ -2867,17 +2897,17 @@ \subsection{Exercise 08}
28672897
\vspace{0.5em}
28682898

28692899
%\small
2870-
$\cdot$ an SVD-based diagonalization inherently sorts the eigenvalues in $\bm{\Lambda}$, making the orthogonal PC scores \underline{variance-sorted} (i.e. covariance matrix of $\bm{F}$ is a sorted diagonal matrix)
2900+
$\cdot$ diagonalization with sorted eigenvalues/-vectors in $\bm{\Lambda}, \bm{V}$ of $\bm{C}_X$, making the orthogonal PC scores \underline{variance-sorted} (i.e. covariance matrix of $\bm{F}$ is a sorted diagonal matrix)
28712901

2872-
$\cdot$ $\bm{F} / \bm{L}$ might exhibit reflections compared to $\bm{F} / \bm{L}$ from SVD-based approach
2902+
$\cdot$ the $\bm{F} / \bm{L}$ here might exhibit reflections compared to the $\bm{F} / \bm{L}$ from SVD-based approach
28732903

2874-
$\cdot$ SVD / covariance approaches are consistent by itself as calculation of $\bm{F}$ and $\bm{L}$ is linked
2904+
$\cdot$ but SVD / covariance approaches are consistent by itself as calculation of $\bm{F}$ and $\bm{L}$ is linked
28752905

28762906
\end{frame}
28772907

28782908

28792909

2880-
\begin{frame}[t]{Ex08: Principal Component Analysis (PCA) Feature Representation}
2910+
\begin{frame}[t]{PCA Feature Representation}
28812911
\vspace{-1.5em}
28822912
$\cdot$ for $\bm{X}_c \in\mathbb{R}$, $\bm{X}_c = \bm{X}_r^\mathrm{T}$, $\bm{F}_c = \bm{F}_r^\mathrm{T}$, SVD matrices $\bm{U} \bm{\Sigma} \bm{V}^\mathrm{T}$ for $\bm{X}_c$
28832913

@@ -2968,7 +2998,7 @@ \subsection{Exercise 08}
29682998

29692999

29703000

2971-
\begin{frame}[t]{Ex08: Principal Component Analysis (PCA) 2D-Data Example}
3001+
\begin{frame}[t]{PCA 2D-Data Example}
29723002
\begin{minipage}[t]{0.49\textwidth}
29733003
\includegraphics[width=\textwidth]{pca_2d_original_data.pdf}
29743004
\end{minipage}
@@ -2978,7 +3008,7 @@ \subsection{Exercise 08}
29783008
\end{minipage}
29793009
\end{frame}
29803010
%%
2981-
\begin{frame}[t]{Ex08: Principal Component Analysis (PCA) 2D-Data Example}
3011+
\begin{frame}[t]{PCA 2D-Data Example}
29823012
\begin{minipage}[t]{0.49\textwidth}
29833013
\includegraphics[width=\textwidth]{pca_2d_pc_data.pdf}
29843014
\end{minipage}
@@ -2988,7 +3018,7 @@ \subsection{Exercise 08}
29883018
\end{minipage}
29893019
\end{frame}
29903020
%%
2991-
\begin{frame}[t]{Ex08: Principal Component Analysis (PCA) 2D-Data Example}
3021+
\begin{frame}[t]{PCA 2D-Data Example}
29923022
\begin{minipage}[t]{0.49\textwidth}
29933023
\includegraphics[width=\textwidth]{pca_2d_truncated_svd.pdf}
29943024
\end{minipage}
@@ -2998,7 +3028,7 @@ \subsection{Exercise 08}
29983028
\end{minipage}
29993029
\end{frame}
30003030
%%
3001-
\begin{frame}[t]{Ex08: Principal Component Analysis (PCA) 2D-Data Example}
3031+
\begin{frame}[t]{PCA 2D-Data Example}
30023032
\begin{minipage}[t]{0.49\textwidth}
30033033
\includegraphics[width=\textwidth]{pca_2d_pc_data.pdf}
30043034
\end{minipage}
@@ -3014,7 +3044,7 @@ \subsection{Exercise 08}
30143044

30153045

30163046

3017-
\begin{frame}[t]{Ex08: Principal Component Analysis (PCA) 3D-Data Example}
3047+
\begin{frame}[t]{PCA 3D-Data Example}
30183048
\begin{minipage}[t]{0.49\textwidth}
30193049
original data cloud in 3D space
30203050

@@ -3028,7 +3058,7 @@ \subsection{Exercise 08}
30283058
\end{minipage}
30293059
\end{frame}
30303060
%%
3031-
\begin{frame}[t]{Ex08: Principal Component Analysis (PCA) 3D-Data Example}
3061+
\begin{frame}[t]{PCA 3D-Data Example}
30323062
\begin{minipage}[t]{0.49\textwidth}
30333063
PC data cloud in 3D space
30343064

@@ -3042,7 +3072,7 @@ \subsection{Exercise 08}
30423072
\end{minipage}
30433073
\end{frame}
30443074
%%
3045-
\begin{frame}[t]{Ex08: Principal Component Analysis (PCA) 3D-Data Example}
3075+
\begin{frame}[t]{PCA 3D-Data Example}
30463076
\begin{minipage}[t]{0.49\textwidth}
30473077
data \underline{plane} in 3D space
30483078

@@ -3056,7 +3086,7 @@ \subsection{Exercise 08}
30563086
\end{minipage}
30573087
\end{frame}
30583088
%%
3059-
\begin{frame}[t]{Ex08: Principal Component Analysis (PCA) 3D-Data Example}
3089+
\begin{frame}[t]{PCA 3D-Data Example}
30603090
\begin{minipage}[t]{0.49\textwidth}
30613091
data cloud in 3D space
30623092

0 commit comments

Comments
 (0)