Skip to content

Commit 4579dc3

Browse files
committed
numerical linear algebra - first working svd version
1 parent 1ec6f1d commit 4579dc3

File tree

1 file changed

+167
-42
lines changed
  • projects/math/numerical-linalg/notebooks

1 file changed

+167
-42
lines changed
Lines changed: 167 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,12 @@
1-
;; # Background Removal with SVD
1+
;; # Background Removal with SVD - DRAFT 🛠
22

3-
;; [original Fast.ai notebook](https://nbviewer.org/github/fastai/numerical-linear-algebra-v2/blob/master/nbs/02-Background-Removal-with-SVD.ipynb)
3+
;; based on: [original Fast.ai notebook](https://nbviewer.org/github/fastai/numerical-linear-algebra-v2/blob/master/nbs/02-Background-Removal-with-SVD.ipynb)
4+
5+
;; ## Setup
6+
7+
;; We use a few of the [Noj underlying libraries](https://scicloj.github.io/noj/noj_book.underlying_libraries),
8+
;; [clj-media](https://github.com/phronmophobic/clj-media),
9+
;; and [Apache Commons Math](https://commons.apache.org/proper/commons-math/).
410

511
(ns svd
612
(:require [tablecloth.api :as tc]
@@ -10,18 +16,42 @@
1016
[tech.v3.tensor :as tensor]
1117
[tech.v3.libs.buffered-image :as bufimg]
1218
[scicloj.kindly.v4.kind :as kind]
13-
[fastmath.matrix :as mat])
19+
[fastmath.matrix :as mat]
20+
[tech.v3.datatype.functional :as dfn]
21+
[tech.v3.datatype.statistics :as dstats])
1422
(:import (org.apache.commons.math3.linear
1523
SingularValueDecomposition)))
1624

25+
;; ## Reading a video file
26+
27+
;; We downloaded the following file from the
28+
;; original notebook.
29+
;; It seems to be a shorter version of the
30+
;; full original video (just the first 50 seconds).
31+
1732
(def video-path
1833
"notebooks/movie/Video_003.mp4")
1934

2035
(kind/video
2136
{:src video-path})
2237

38+
;; Let us explore it with clj-media:
39+
2340
(clj-media/probe video-path)
2441

42+
;; ## Converting the video to tensor structures
43+
44+
;; Using clj-media, we can reduce over frames:
45+
46+
(clj-media/frames
47+
(clj-media/file video-path)
48+
:video
49+
{:format (clj-media/video-format
50+
{:pixel-format
51+
:pixel-format/rgba})})
52+
53+
;; For example, let us extract the first
54+
;; frame and convert it to an image:
2555

2656
(def first-image
2757
(reduce (fn [_ frame] (clj-media.model/image
@@ -34,71 +64,166 @@
3464
{:pixel-format
3565
:pixel-format/rgba})})))
3666

37-
3867
first-image
3968

40-
(def first-tensor
41-
(bufimg/as-ubyte-tensor
42-
first-image))
43-
44-
first-tensor
69+
;; When converting to a tensor, we have the four
70+
;; color components of `rgba` format:
71+
72+
(bufimg/as-ubyte-tensor first-image)
73+
74+
;; In our case, the first component (a) is fixed:
75+
(-> (let [t (bufimg/as-ubyte-tensor first-image)]
76+
(tensor/compute-tensor [240 320]
77+
(fn [i j]
78+
(t i j 0))
79+
:uint8))
80+
dtype/->buffer
81+
distinct)
82+
83+
;; The rgb components are the other three.
84+
85+
;; We wish to process all frames, but resize
86+
;; the images to a lower resolution, and
87+
;; turn them to gray-scale.
88+
89+
;; See [Luma](https://en.wikipedia.org/wiki/Luma_(video)
90+
;; for discussion of the gray-scale forumla:
91+
;; 0.299 ∙ Red + 0.587 ∙ Green + 0.114 ∙ Blue
92+
93+
(defn image->small-tensor [image]
94+
(let [w 160
95+
h 120
96+
t (-> image
97+
(bufimg/resize w h {})
98+
bufimg/as-ubyte-tensor)]
99+
(tensor/compute-tensor [h w]
100+
(fn [i j]
101+
(+ (* 0.299 (t i j 1))
102+
(* 0.587 (t i j 2))
103+
(* 0.113 (t i j 3))))
104+
:uint8)))
45105

106+
(-> first-image
107+
image->small-tensor
108+
bufimg/tensor->image)
46109

110+
;; Now let us collect the small tensors:
47111

48-
(def images
112+
(def small-tensors
49113
(time
50114
(into []
51-
(map clj-media.model/image)
115+
(map (comp image->small-tensor
116+
clj-media.model/image))
52117
(clj-media/frames
53118
(clj-media/file video-path)
54119
:video
55120
{:format (clj-media/video-format
56121
{:pixel-format
57122
:pixel-format/rgba})}))))
58123

59-
(count images)
60-
61-
62-
(def tensors
63-
(mapv bufimg/as-ubyte-tensor images))
124+
(count small-tensors)
64125

126+
;; ## Reshaping the data
65127

66-
(count tensors)
128+
;; Now we will reshape the data as one matrix
129+
;; with row per pixel and column per frame.
67130

131+
(def flat-tensors
132+
(->> small-tensors
133+
(mapv dtype/->buffer)))
68134

69-
(def all-frames-as-one-rectangular-tensor
70-
(let [row-size (->> tensors
71-
first
72-
dtype/shape
73-
(apply *))]
74-
(tensor/compute-tensor [row-size
75-
(count tensors)]
76-
(fn [j i]
77-
(-> (tensors i)
78-
(tensor/reshape [row-size])
79-
(get j)))
80-
:uint8)))
81-
135+
(def long-tensor
136+
(tensor/compute-tensor [(-> flat-tensors first count)
137+
(count flat-tensors)]
138+
(fn [j i]
139+
((flat-tensors i) j))
140+
:uint8))
82141

83-
(def all-frames-as-one-image
84-
(time
85-
(bufimg/tensor->image
86-
all-frames-as-one-rectangular-tensor)))
142+
;; For visual conveniene, we will display it transposed:
143+
(-> long-tensor
144+
(tensor/transpose [1 0])
145+
bufimg/tensor->image)
87146

147+
;; ## Singular value decomposition
88148

89-
all-frames-as-one-image
149+
;; Let us now compute the [SVD](https://en.wikipedia.org/wiki/Singular_value_decomposition).
90150

151+
;; We can use Fastmath's matrix API to convert out
152+
;; structures to the [RealMatrix](https://commons.apache.org/proper/commons-math/javadocs/api-3.6.1/org/apache/commons/math3/linear/RealMatrix.html) type of Apache Commons Math.
91153

92-
(def all-frames-as-one-matrix
93-
(->> all-frames-as-one-rectangular-tensor
94-
(take 10000)
154+
(def matrix
155+
(->> long-tensor
95156
(map double-array)
96157
(mat/rows->RealMatrix)))
97-
;; 10000x350
98-
99158

100159
(def svd
101-
(SingularValueDecomposition.
102-
all-frames-as-one-matrix))
160+
(SingularValueDecomposition. matrix))
103161

104162
(.getSingularValues svd)
163+
164+
(def shape
165+
(juxt mat/nrow
166+
mat/ncol))
167+
168+
(shape (.getU svd))
169+
(shape (.getS svd))
170+
(shape (.getVT svd))
171+
172+
;; To visualize different parts of the matrix decomposition,
173+
;; we will need to normalize tensors to the [0,1] range:
174+
(defn tensor-normalize
175+
[t]
176+
(let [{:keys [min max]} (dstats/descriptive-statistics
177+
t
178+
#{:min :max})]
179+
(prn [min max])
180+
(-> (dfn/- t min)
181+
(dfn// (- (double max) (double min))))))
182+
183+
;; For example:
184+
(-> [[1 2 3]
185+
[4 5 6]]
186+
tensor/->tensor
187+
tensor-normalize)
188+
189+
;; Now let us visualize the main component of our matrix.
190+
(def component0
191+
(let [i 0]
192+
(-> (.getColumnMatrix (.getU svd) i)
193+
(mat/muls (nth (.getSingularValues svd)
194+
i))
195+
(mat/mulm (.getRowMatrix (.getVT svd) i)))))
196+
197+
(shape component0)
198+
199+
;; This is the first order approximation of the
200+
;; pixel-by-frame matrix by the SVD method.
201+
202+
;; Let us take its first column, which is the first
203+
;; frame, and show it as an image:
204+
205+
(defn matrix->first-frame [m]
206+
(-> m
207+
(.getColumn 0)
208+
dtype/->array-buffer
209+
tensor-normalize
210+
(dfn/* 255)
211+
(dtype/->int-array)
212+
(tensor/reshape [120 160])
213+
bufimg/tensor->image))
214+
215+
(matrix->first-frame component0)
216+
217+
;; We see it is the background image of the video.
218+
219+
220+
;; Now let us compute the remainder after removing
221+
;; the first component.
222+
223+
(def residual
224+
(mat/sub matrix
225+
component0))
226+
227+
(matrix->first-frame residual)
228+
229+
;; We see these are the people.

0 commit comments

Comments
 (0)