|
166 | 166 | ;; Malli - Schema validation |
167 | 167 | [malli.core :as m] |
168 | 168 | [malli.error :as me] |
| 169 | + [malli.util :as mu] |
169 | 170 |
|
170 | 171 | ;; RDatasets - Example datasets |
171 | 172 | [scicloj.metamorph.ml.rdatasets :as rdatasets])) |
|
536 | 537 | ;; |
537 | 538 | ;; **Why compute transforms ourselves?** |
538 | 539 |
|
539 | | -;; 1. Consistency - We want the isualizations to match the |
| 540 | +;; 1. Consistency - We want the visualizations to match the |
540 | 541 | ;; statistical computations of our Clojure libraries. |
541 | 542 | ;; 2. Efficiency - Especially with browser-based rendering targets, |
542 | 543 | ;; what we wish to pass to the target is summaries (say, 20 histogram bars) |
|
556 | 557 | ;; **You can skim this section** - it's reference material. The schemas will be |
557 | 558 | ;; used by validation helpers later, and referenced in examples as needed. |
558 | 559 |
|
| 560 | +;; ### ⚙️ Malli Registry Setup |
| 561 | +;; |
| 562 | +;; Create a registry that includes both default schemas and malli.util schemas. |
| 563 | +;; This enables declarative schema utilities like :merge, :union, :select-keys. |
| 564 | + |
| 565 | +(def registry |
| 566 | + "Malli registry with default schemas and util schemas (for :merge, etc.)" |
| 567 | + (merge (m/default-schemas) (mu/schemas))) |
| 568 | + |
559 | 569 | ;; ### ⚙️ Core Type Schemas |
560 | 570 |
|
561 | 571 | (def DataType |
|
662 | 672 |
|
663 | 673 | ;; ### ⚙️ Layer Schema |
664 | 674 |
|
665 | | -(def Layer |
666 | | - "Schema for a complete layer specification. |
667 | | - |
668 | | - A layer is a flat map with distinctive :=... keys containing all the |
669 | | - information needed to render a visualization layer: |
670 | | - - Data source |
671 | | - - Aesthetic mappings (x, y, color, size, etc.) |
672 | | - - Plot type |
673 | | - - Visual attributes |
674 | | - - Optional statistical transformation |
675 | | - - Optional faceting" |
| 675 | +(def BaseLayer |
| 676 | + "Base layer fields shared across all plot types." |
676 | 677 | [:map |
677 | 678 | ;; Data (required for most layers) |
678 | 679 | [:=data {:optional true} Dataset] |
679 | 680 |
|
680 | | - ;; Positional aesthetics |
681 | | - [:=x {:optional true} PositionalAesthetic] |
682 | | - [:=y {:optional true} PositionalAesthetic] |
683 | | - |
684 | 681 | ;; Other aesthetics |
685 | 682 | [:=color {:optional true} ColorAesthetic] |
686 | 683 | [:=size {:optional true} SizeAesthetic] |
|
692 | 689 | ;; Attributes (constant visual properties) |
693 | 690 | [:=alpha {:optional true} AlphaAttribute] |
694 | 691 |
|
695 | | - ;; Plot type and transformation |
696 | | - [:=plottype {:optional true} PlotType] |
| 692 | + ;; Transformation |
697 | 693 | [:=transformation {:optional true} Transformation] |
698 | 694 |
|
699 | 695 | ;; Histogram-specific |
|
704 | 700 | [:=scale-y {:optional true} ScaleSpec] |
705 | 701 | [:=scale-color {:optional true} ScaleSpec]]) |
706 | 702 |
|
| 703 | +(def Layer |
| 704 | + "Schema for a complete layer specification with plottype-specific requirements. |
| 705 | + |
| 706 | + Uses :multi to dispatch on :=plottype and enforce different requirements: |
| 707 | + - :scatter, :line, :area require both :=x and :=y |
| 708 | + - :bar, :histogram require :=x (y is optional) |
| 709 | + - nil (no plottype) allows incomplete layers for composition |
| 710 | + |
| 711 | + This replaces the nested conditionals in validate-layer with declarative schemas." |
| 712 | + (m/schema |
| 713 | + [:multi {:dispatch :=plottype} |
| 714 | + |
| 715 | + ;; Scatter requires both x and y |
| 716 | + [:scatter |
| 717 | + [:merge |
| 718 | + BaseLayer |
| 719 | + [:map |
| 720 | + [:=plottype [:enum :scatter]] |
| 721 | + [:=x PositionalAesthetic] |
| 722 | + [:=y PositionalAesthetic]]]] |
| 723 | + |
| 724 | + ;; Line requires both x and y |
| 725 | + [:line |
| 726 | + [:merge |
| 727 | + BaseLayer |
| 728 | + [:map |
| 729 | + [:=plottype [:enum :line]] |
| 730 | + [:=x PositionalAesthetic] |
| 731 | + [:=y PositionalAesthetic]]]] |
| 732 | + |
| 733 | + ;; Bar requires x, y optional |
| 734 | + [:bar |
| 735 | + [:merge |
| 736 | + BaseLayer |
| 737 | + [:map |
| 738 | + [:=plottype [:enum :bar]] |
| 739 | + [:=x PositionalAesthetic] |
| 740 | + [:=y {:optional true} PositionalAesthetic]]]] |
| 741 | + |
| 742 | + ;; Histogram requires x, y optional |
| 743 | + [:histogram |
| 744 | + [:merge |
| 745 | + BaseLayer |
| 746 | + [:map |
| 747 | + [:=plottype [:enum :histogram]] |
| 748 | + [:=x PositionalAesthetic] |
| 749 | + [:=y {:optional true} PositionalAesthetic]]]] |
| 750 | + |
| 751 | + ;; Area requires both x and y |
| 752 | + [:area |
| 753 | + [:merge |
| 754 | + BaseLayer |
| 755 | + [:map |
| 756 | + [:=plottype [:enum :area]] |
| 757 | + [:=x PositionalAesthetic] |
| 758 | + [:=y PositionalAesthetic]]]] |
| 759 | + |
| 760 | + ;; Incomplete layer (no plottype) - for composition |
| 761 | + [nil |
| 762 | + [:merge |
| 763 | + BaseLayer |
| 764 | + [:map |
| 765 | + [:=plottype {:optional true} [:maybe nil?]] |
| 766 | + [:=x {:optional true} PositionalAesthetic] |
| 767 | + [:=y {:optional true} PositionalAesthetic]]]]] |
| 768 | + {:registry registry})) |
| 769 | + |
707 | 770 | (def Layers |
708 | 771 | "Schema for one or more layers. |
709 | 772 | |
|
712 | 775 | [:or Layer [:vector Layer]]) |
713 | 776 |
|
714 | 777 | (def PlotSpec |
715 | | - "Schema for a complete plot specification. |
| 778 | + "Schema for a plot specification (complete or partial). |
716 | 779 | |
717 | | - A plot spec is a map containing: |
718 | | - - Layers: Vector of layer maps |
| 780 | + A plot spec is a map that can contain: |
| 781 | + - Layers: Vector of layer maps (optional - allows partial specs) |
719 | 782 | - Plot-level properties: target, width, height |
720 | | - - Plot-level scales (optional)" |
| 783 | + - Plot-level scales (optional) |
| 784 | + |
| 785 | + All fields are optional to support composition via =* and =+." |
721 | 786 | [:map |
722 | | - ;; Layers (required) |
723 | | - [:=layers [:vector Layer]] |
| 787 | + ;; Layers (optional - allows partial specs with just plot-level properties) |
| 788 | + [:=layers {:optional true} [:vector Layer]] |
724 | 789 |
|
725 | 790 | ;; Plot-level properties (all optional) |
726 | 791 | [:=target {:optional true} Backend] |
|
784 | 849 | "Validate a layer with context-aware checks. |
785 | 850 | |
786 | 851 | Performs: |
787 | | - 1. Schema validation (structure) |
788 | | - 2. Semantic validation (required fields for plottype) |
789 | | - 3. Data validation (columns exist) |
| 852 | + 1. Schema validation (structure + plottype-specific requirements via :multi) |
| 853 | + 2. Data column validation (columns exist) - runtime check |
790 | 854 | |
791 | 855 | Returns nil if valid, error map if invalid." |
792 | 856 | [layer] |
793 | | - ;; First check schema |
| 857 | + ;; Schema validation now handles both structure AND plottype-specific requirements |
794 | 858 | (or |
795 | 859 | (when-let [schema-errors (validate Layer layer)] |
796 | 860 | {:type :schema-error |
797 | 861 | :errors schema-errors |
798 | | - :message "Layer structure is invalid"}) |
799 | | - |
800 | | - ;; Check plottype-specific requirements |
801 | | - (let [plottype (:=plottype layer)] |
802 | | - (when plottype |
803 | | - (case plottype |
804 | | - ;; Scatter/line need x and y |
805 | | - (:scatter :line) |
806 | | - (when-not (and (:=x layer) (:=y layer)) |
807 | | - {:type :missing-required-aesthetic |
808 | | - :plottype plottype |
809 | | - :missing (cond |
810 | | - (and (nil? (:=x layer)) (nil? (:=y layer))) [:=x :=y] |
811 | | - (nil? (:=x layer)) [:=x] |
812 | | - :else [:=y]) |
813 | | - :message (str plottype " plots require both :=x and :=y")}) |
814 | | - |
815 | | - ;; Bar needs at least x |
816 | | - :bar |
817 | | - (when-not (:=x layer) |
818 | | - {:type :missing-required-aesthetic |
819 | | - :plottype plottype |
820 | | - :missing [:=x] |
821 | | - :message "Bar plots require :=x"}) |
822 | | - |
823 | | - ;; Histogram needs just x |
824 | | - :histogram |
825 | | - (when-not (:=x layer) |
826 | | - {:type :missing-required-aesthetic |
827 | | - :plottype plottype |
828 | | - :missing [:=x] |
829 | | - :message "Histogram requires :=x"}) |
830 | | - |
831 | | - ;; Area needs x and y |
832 | | - :area |
833 | | - (when-not (and (:=x layer) (:=y layer)) |
834 | | - {:type :missing-required-aesthetic |
835 | | - :plottype plottype |
836 | | - :missing (cond |
837 | | - (and (nil? (:=x layer)) (nil? (:=y layer))) [:=x :=y] |
838 | | - (nil? (:=x layer)) [:=x] |
839 | | - :else [:=y]) |
840 | | - :message "Area plots require both :=x and :=y"}) |
841 | | - |
842 | | - ;; Default - no specific requirements |
843 | | - nil))) |
844 | | - |
845 | | - ;; Check data-related validations if data is present |
| 862 | + :message "Layer validation failed"}) |
| 863 | + |
| 864 | + ;; Data column validation (runtime check - can't be done in schema) |
846 | 865 | (when-let [data (:=data layer)] |
847 | 866 | (let [column-keys (cond |
848 | 867 | ;; Tablecloth dataset |
|
930 | 949 | (defn- plot-spec? |
931 | 950 | "Check if x is a plot spec (map with :=layers or plot-level keys). |
932 | 951 | |
933 | | - Plot specs are maps that can have: |
934 | | - - :=layers key with vector of layer maps |
935 | | - - Plot-level :=... keys like :=target, :=width, :=height" |
| 952 | + Plot specs are maps that have at least one key starting with := |
| 953 | + Uses Malli validation as a fallback check for well-formed specs." |
936 | 954 | [x] |
937 | 955 | (and (map? x) |
938 | | - (or (contains? x :=layers) |
939 | | - ;; Has plot-level keys |
940 | | - (some #(-> % name first (= \=)) |
941 | | - (keys x))))) |
| 956 | + ;; Has at least one := key |
| 957 | + (some #(-> % name first (= \=)) |
| 958 | + (keys x)) |
| 959 | + ;; Validates against PlotSpec schema (additional safety check) |
| 960 | + (valid? PlotSpec x))) |
942 | 961 |
|
943 | 962 | ;; ### ⚙️ Renderer |
944 | 963 |
|
|
1083 | 1102 | (reduce =* (=* x y) more)))) |
1084 | 1103 |
|
1085 | 1104 | ;; Test helper: check if result is a valid layer vector |
1086 | | -(defn- valid-layers? [x] |
1087 | | - (and (vector? x) |
1088 | | - (seq x) |
1089 | | - (every? map? x) |
1090 | | - (every? #(some (fn [[k _]] |
1091 | | - (-> k name first (= \=))) |
1092 | | - %) |
1093 | | - x))) |
| 1105 | +(defn- valid-layers? |
| 1106 | + "Check if x is a valid vector of layers using Malli validation. |
| 1107 | + |
| 1108 | + Note: This specifically checks for a vector of layers, not a single layer." |
| 1109 | + [x] |
| 1110 | + (valid? [:vector Layer] x)) |
1094 | 1111 |
|
1095 | 1112 | (defn =+ |
1096 | 1113 | "Combine multiple plot specifications for overlay (sum). |
@@ -2383,7 +2400,6 @@ iris |
2383 | 2400 | (=* attrs-or-spec (scatter)) |
2384 | 2401 | (let [result (merge {:=plottype :scatter} |
2385 | 2402 | (update-keys attrs-or-spec =key))] |
2386 | | - (validate! Layer result) |
2387 | 2403 | {:=layers [result]}))) |
2388 | 2404 | ([spec attrs] |
2389 | 2405 | ;; Threading-friendly: (-> spec (scatter {:alpha 0.5})) |
@@ -2685,7 +2701,6 @@ iris |
2685 | 2701 | ([] |
2686 | 2702 | (let [result {:=transformation :linear |
2687 | 2703 | :=plottype :line}] |
2688 | | - (validate! Layer result) |
2689 | 2704 | {:=layers [result]})) |
2690 | 2705 | ([spec-or-data] |
2691 | 2706 | (let [spec (if (plot-spec? spec-or-data) |
@@ -2750,19 +2765,19 @@ iris |
2750 | 2765 |
|
2751 | 2766 | ;; Apply linear regression transform to points. |
2752 | 2767 | ;; |
2753 | | -;; Handles both single and grouped regression based on :group key in points. |
| 2768 | +;; Handles both single and grouped regression based on `:group` key in points. |
2754 | 2769 | ;; |
2755 | 2770 | ;; Args: |
2756 | | -;; - layer: Layer map containing transformation specification |
2757 | | -;; - points: Sequence of point maps with :x, :y, and optional :group keys |
| 2771 | +;; - `layer`: Layer map containing transformation specification |
| 2772 | +;; - `points`: Sequence of point maps with `:x`, `:y`, and optional `:group` keys |
2758 | 2773 | ;; |
2759 | 2774 | ;; Returns: |
2760 | | -;; - For ungrouped: {:type :regression :points points :fitted [p1 p2]} |
2761 | | -;; - For grouped: {:type :grouped-regression :points points :groups {group-val {:fitted [...] :points [...]}}} |
| 2775 | +;; - For ungrouped: `{:type :regression :points points :fitted [p1 p2]}` |
| 2776 | +;; - For grouped: `{:type :grouped-regression :points points :groups {group-val {:fitted [...] :points [...]}}}` |
2762 | 2777 | ;; |
2763 | 2778 | ;; Edge cases: |
2764 | 2779 | ;; - Returns original points if regression fails (< 2 points, degenerate data) |
2765 | | -;; - Handles nil fitted values gracefully (skipped during rendering) |
| 2780 | +;; - Handles `nil` fitted values gracefully (skipped during rendering) |
2766 | 2781 | (defmethod apply-transform :linear |
2767 | 2782 | [layer points] |
2768 | 2783 | (when-not (seq points) |
@@ -2966,7 +2981,6 @@ iris |
2966 | 2981 | :=plottype :bar |
2967 | 2982 | :=bins :sturges} |
2968 | 2983 | (update-keys opts-or-spec =key))] |
2969 | | - (validate! Layer result) |
2970 | 2984 | {:=layers [result]}))) |
2971 | 2985 | ([spec opts] |
2972 | 2986 | (=* spec (histogram opts)))) |
@@ -3016,19 +3030,19 @@ iris |
3016 | 3030 | ;; Apply histogram transform to points. |
3017 | 3031 | ;; |
3018 | 3032 | ;; Bins continuous x values and counts occurrences per bin. |
3019 | | -;; Handles both single and grouped histograms based on :group key in points. |
| 3033 | +;; Handles both single and grouped histograms based on `:group` key in points. |
3020 | 3034 | ;; |
3021 | 3035 | ;; Args: |
3022 | | -;; - layer: Layer map containing :=bins specification |
3023 | | -;; - points: Sequence of point maps with :x and optional :group keys |
| 3036 | +;; - `layer`: Layer map containing `:=bins` specification |
| 3037 | +;; - `points`: Sequence of point maps with `:x` and optional `:group` keys |
3024 | 3038 | ;; |
3025 | 3039 | ;; Returns: |
3026 | | -;; - For ungrouped: {:type :histogram :points points :bars [{:x-min :x-max :x-center :height}...]} |
3027 | | -;; - For grouped: {:type :grouped-histogram :points points :groups {group-val {:bars [...] :points [...]}}} |
| 3040 | +;; - For ungrouped: `{:type :histogram :points points :bars [{:x-min :x-max :x-center :height}...]}` |
| 3041 | +;; - For grouped: `{:type :grouped-histogram :points points :groups {group-val {:bars [...] :points [...]}}}` |
3028 | 3042 | ;; |
3029 | 3043 | ;; Edge cases: |
3030 | | -;; - Returns nil bars if compute-histogram fails (empty, non-numeric, or identical values) |
3031 | | -;; - Histogram with nil bars will not render (graceful degradation) |
| 3044 | +;; - Returns `nil` bars if `compute-histogram` fails (empty, non-numeric, or identical values) |
| 3045 | +;; - Histogram with `nil` bars will not render (graceful degradation) |
3032 | 3046 | (defmethod apply-transform :histogram |
3033 | 3047 | [layer points] |
3034 | 3048 | (when-not (seq points) |
|
0 commit comments