numerical linear algebra - first working svd version

daslu · daslu · commit 4579dc3a55d4 · 2024-12-27T23:47:22.000+02:00
diff --git a/projects/math/numerical-linalg/notebooks/svd.clj b/projects/math/numerical-linalg/notebooks/svd.clj
@@ -1,6 +1,12 @@
-;; # Background Removal with SVD
+;; # Background Removal with SVD - DRAFT 🛠
 
-;; [original Fast.ai notebook](https://nbviewer.org/github/fastai/numerical-linear-algebra-v2/blob/master/nbs/02-Background-Removal-with-SVD.ipynb)
+;; based on: [original Fast.ai notebook](https://nbviewer.org/github/fastai/numerical-linear-algebra-v2/blob/master/nbs/02-Background-Removal-with-SVD.ipynb)
+
+;; ## Setup
+
+;; We use a few of the [Noj underlying libraries](https://scicloj.github.io/noj/noj_book.underlying_libraries),
+;; [clj-media](https://github.com/phronmophobic/clj-media),
+;; and [Apache Commons Math](https://commons.apache.org/proper/commons-math/).
 
 (ns svd
   (:require [tablecloth.api :as tc]
@@ -10,18 +16,42 @@
             [tech.v3.tensor :as tensor]
             [tech.v3.libs.buffered-image :as bufimg]
             [scicloj.kindly.v4.kind :as kind]
-            [fastmath.matrix :as mat])
+            [fastmath.matrix :as mat]
+            [tech.v3.datatype.functional :as dfn]
+            [tech.v3.datatype.statistics :as dstats])
   (:import (org.apache.commons.math3.linear
             SingularValueDecomposition)))
 
+;; ## Reading a video file
+
+;; We downloaded the following file from the
+;; original notebook.
+;; It seems to be a shorter version of the
+;; full original video (just the first 50 seconds).
+
 (def video-path
   "notebooks/movie/Video_003.mp4")
 
 (kind/video
  {:src video-path})
 
+;; Let us explore it with clj-media:
+
 (clj-media/probe video-path)
 
+;; ## Converting the video to tensor structures
+
+;; Using clj-media, we can reduce over frames:
+
+(clj-media/frames
+ (clj-media/file video-path)
+ :video
+ {:format (clj-media/video-format
+           {:pixel-format
+            :pixel-format/rgba})})
+
+;; For example, let us extract the first
+;; frame and convert it to an image:
 
 (def first-image
   (reduce (fn [_ frame] (clj-media.model/image
@@ -34,71 +64,166 @@
                      {:pixel-format
                       :pixel-format/rgba})})))
 
-
 first-image
 
-(def first-tensor
-  (bufimg/as-ubyte-tensor
-   first-image))
-
-first-tensor
+;; When converting to a tensor, we have the four
+;; color components of `rgba` format:
+
+(bufimg/as-ubyte-tensor first-image)
+
+;; In our case, the first component (a) is fixed:
+(-> (let [t (bufimg/as-ubyte-tensor first-image)]
+      (tensor/compute-tensor [240 320]
+                             (fn [i j]
+                               (t i j 0))
+                             :uint8))
+    dtype/->buffer
+    distinct)
+
+;; The rgb components are the other three.
+
+;; We wish to process all frames, but resize
+;; the images to a lower resolution, and
+;; turn them to gray-scale.
+
+;; See [Luma](https://en.wikipedia.org/wiki/Luma_(video)
+;; for discussion of the gray-scale forumla:
+;; 0.299 ∙ Red + 0.587 ∙ Green + 0.114 ∙ Blue
+
+(defn image->small-tensor [image]
+  (let [w 160
+        h 120
+        t (-> image
+              (bufimg/resize w h {})
+              bufimg/as-ubyte-tensor)]
+    (tensor/compute-tensor [h w]
+                           (fn [i j]
+                             (+ (* 0.299 (t i j 1))
+                                (* 0.587 (t i j 2))
+                                (* 0.113 (t i j 3))))
+                           :uint8)))
 
+(-> first-image
+    image->small-tensor
+    bufimg/tensor->image)
 
+;; Now let us collect the small tensors:
 
-(def images
+(def small-tensors
   (time
    (into []
-         (map clj-media.model/image)
+         (map (comp image->small-tensor 
+                    clj-media.model/image))
          (clj-media/frames
           (clj-media/file video-path)
           :video
           {:format (clj-media/video-format
                     {:pixel-format
                      :pixel-format/rgba})}))))
 
-(count images)
-
-
-(def tensors
-  (mapv bufimg/as-ubyte-tensor images))
+(count small-tensors)
 
+;; ## Reshaping the data
 
-(count tensors)
+;; Now we will reshape the data as one matrix
+;; with row per pixel and column per frame.
 
+(def flat-tensors
+  (->> small-tensors
+       (mapv dtype/->buffer)))
 
-(def all-frames-as-one-rectangular-tensor
-  (let [row-size (->> tensors
-                      first
-                      dtype/shape
-                      (apply *))]
-    (tensor/compute-tensor [row-size
-                            (count tensors)]
-                           (fn [j i]
-                             (-> (tensors i)
-                                 (tensor/reshape [row-size])
-                                 (get j)))
-                           :uint8)))
-
+(def long-tensor
+  (tensor/compute-tensor [(-> flat-tensors first count)
+                          (count flat-tensors)]
+                         (fn [j i]
+                           ((flat-tensors i) j))
+                         :uint8))
 
-(def all-frames-as-one-image
-  (time
-   (bufimg/tensor->image
-    all-frames-as-one-rectangular-tensor)))
+;; For visual conveniene, we will display it transposed:
+(-> long-tensor
+    (tensor/transpose [1 0])
+    bufimg/tensor->image)
 
+;; ## Singular value decomposition
 
-all-frames-as-one-image
+;; Let us now compute the [SVD](https://en.wikipedia.org/wiki/Singular_value_decomposition).
 
+;; We can use Fastmath's matrix API to convert out
+;; structures to the [RealMatrix](https://commons.apache.org/proper/commons-math/javadocs/api-3.6.1/org/apache/commons/math3/linear/RealMatrix.html) type of Apache Commons Math.
 
-(def all-frames-as-one-matrix
-  (->> all-frames-as-one-rectangular-tensor
-       (take 10000)
+(def matrix
+  (->> long-tensor
        (map double-array)
        (mat/rows->RealMatrix)))
-;; 10000x350
-
 
 (def svd
-  (SingularValueDecomposition.
-   all-frames-as-one-matrix))
+  (SingularValueDecomposition. matrix))
 
 (.getSingularValues svd)
+
+(def shape
+  (juxt mat/nrow
+        mat/ncol))
+
+(shape (.getU svd))
+(shape (.getS svd))
+(shape (.getVT svd))
+
+;; To visualize different parts of the matrix decomposition,
+;; we will need to normalize tensors to the [0,1] range:
+(defn tensor-normalize
+  [t]
+  (let [{:keys [min max]} (dstats/descriptive-statistics
+                           t
+                           #{:min :max})]
+    (prn [min max])
+    (-> (dfn/- t min)
+        (dfn// (- (double max) (double min))))))
+
+;; For example:
+(-> [[1 2 3]
+     [4 5 6]]
+    tensor/->tensor
+    tensor-normalize)
+
+;; Now let us visualize the main component of our matrix.
+(def component0
+  (let [i 0]
+    (-> (.getColumnMatrix (.getU svd) i)
+        (mat/muls (nth (.getSingularValues svd)
+                       i))
+        (mat/mulm (.getRowMatrix (.getVT svd) i)))))
+
+(shape component0)
+
+;; This is the first order approximation of the
+;; pixel-by-frame matrix by the SVD method.
+
+;; Let us take its first column, which is the first
+;; frame, and show it as an image:
+
+(defn matrix->first-frame [m]
+  (-> m
+      (.getColumn 0)
+      dtype/->array-buffer
+      tensor-normalize
+      (dfn/* 255)
+      (dtype/->int-array)
+      (tensor/reshape [120 160])
+      bufimg/tensor->image))
+
+(matrix->first-frame component0)
+
+;; We see it is the background image of the video.
+
+
+;; Now let us compute the remainder after removing
+;; the first component.
+
+(def residual
+  (mat/sub matrix
+           component0))
+
+(matrix->first-frame residual)
+
+;; We see these are the people.