-
Notifications
You must be signed in to change notification settings - Fork 58
/
Copy pathmain.tex
1495 lines (1153 loc) · 95.8 KB
/
main.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
\pdfoutput=1
\documentclass{article}
\usepackage[utf8]{inputenc}
\usepackage[a4paper, total={6.8in, 9.5in}]{geometry}
\usepackage[hyphens]{url}
\usepackage{hyperref}
\usepackage{tikzsymbols}
\usetikzlibrary{backgrounds}
\usepackage{authblk}
\usepackage{tikz}
\usepackage{multirow}
\usepackage{amsmath}
\usepackage{amsfonts}
\usepackage{media9}
\definecolor{darkgreen}{rgb}{0.0, 0.2, 0.13}
\usepackage{listings}
\lstset{escapeinside={<@}{@>}}
\usepackage{graphicx}
\usepackage{pgfplots}
\pgfplotsset{compat=1.14}
\usepackage{subcaption}
\definecolor{ao}{rgb}{0.0, 0.5, 0.0}
\usetikzlibrary{positioning}
\usetikzlibrary{calc}
\usepackage[title]{appendix}
\usepackage{mathtools}
\DeclarePairedDelimiter\ceil{\lceil}{\rceil}
\DeclarePairedDelimiter\floor{\lfloor}{\rfloor}
\tikzset{mybox/.style={draw, minimum width=1cm, fill=gray!10}}
\tikzset{myboxLeading/.style={draw, minimum width=0.7cm, fill=yellow!10}}
\tikzset{myboxA/.style={draw, minimum width=0.7cm, fill=blue!10}}
\tikzset{myboxFst/.style={draw, minimum width=0.5cm, fill=yellow!10}}
\tikzset{myboxSec/.style={draw, minimum width=0.5cm, fill=green!10}}
\tikzset{myboxLst/.style={draw, minimum width=0.5cm, fill=pink!10}}
\tikzset{myboxLstFinal/.style={draw, minimum width=0.5cm, fill=blue!10}}
\tikzset{boxBasis/.style={draw, minimum width=1cm, fill=red!10}}
\usetikzlibrary{shapes}
\usetikzlibrary{decorations.shapes}
\tikzset{decorate sep/.style 2 args=
{decorate,decoration={shape backgrounds,shape=circle,shape size=#1,shape sep=#2}}}
\usepackage{xcolor}
% \highlight[<colour>]{<stuff>}
\newcommand{\highlight}[2][yellow]{\mathchoice%
{\colorbox{#1}{$\displaystyle#2$}}%
{\colorbox{#1}{$\textstyle#2$}}%
{\colorbox{#1}{$\scriptstyle#2$}}%
{\colorbox{#1}{$\scriptscriptstyle#2$}}}%
\makeatletter
\DeclareRobustCommand*{\Myhref}[1][]{% definition copied form hyperref.sty
\begingroup
\setkeys{Hyp}{#1}% Changed href to Hyp
\@ifnextchar\bgroup\Hy@href{\hyper@normalise\href@}%
}
\makeatother
\definecolor{codegreen}{rgb}{0,0.6,0}
\definecolor{codegray}{rgb}{0.5,0.5,0.5}
\definecolor{codepurple}{rgb}{0.58,0,0.82}
\definecolor{backcolour}{rgb}{0.95,0.95,0.92}
%Code listing style named "mystyle"
\lstdefinestyle{mystyle}{
backgroundcolor=\color{backcolour}, commentstyle=\color{codegreen},
keywordstyle=\color{magenta},
numberstyle=\tiny\color{codegray},
stringstyle=\color{codepurple},
basicstyle=\footnotesize,
breakatwhitespace=false,
breaklines=true,
captionpos=b,
keepspaces=true,
numbers=left,
numbersep=5pt,
showspaces=false,
showstringspaces=false,
showtabs=false,
tabsize=2,
linewidth=17cm
}
%"mystyle" code listing set
\lstset{style=mystyle}
\begin{document}
\title{Real numbers, data science and chaos: \\ How to fit any dataset with a single parameter}
\author{Laurent Bou\'e \\ \Myhref[hidelinks]{mailto:[email protected]}{\protect\includegraphics[width=1cm,height=0.6cm]{logos/gmail.png}} \,\,\,\, \Myhref[hidelinks]{https://www.linkedin.com/in/laurent-bou\%C3\%A9-b7923853/}{\protect\includegraphics[width=0.8cm,height=0.8cm]{logos/linkedin.png}} \,\,\,\, \Myhref[hidelinks]{https://github.com/Ranlot}{\protect\includegraphics[width=1cm,height=1cm]{logos/github.png}} \,\,\,\, \Myhref[hidelinks]{https://twitter.com/ranlot75}{\protect\includegraphics[width=0.8cm,height=0.8cm]{logos/twitter.png}} }
\affil{SAP Labs}
\date{}
\maketitle
\begin{abstract}
We show how any dataset of any modality (time-series, images, sound...) can be approximated by a well-behaved (continuous, differentiable...) scalar function with a single real-valued parameter. Building upon elementary concepts from chaos theory, we adopt a pedagogical approach demonstrating how to adjust this parameter in order to achieve arbitrary precision fit to all samples of the data. Targeting an audience of data scientists with a taste for the curious and unusual, the results presented here expand on previous similar observations~\cite{piantadosi} regarding expressiveness power and generalization of machine learning models.
\end{abstract}
\vskip 0.2in
\noindent {\bf Keywords:} Chaotic systems\,\,{\small\textbullet}\,\,Machine Learning\,\,{\small\textbullet}\,\,Generalization
\vskip 0.2in
Real world data comes in huge variety of shapes and sizes with modalities ranging from traditional structured database schemas to unstructured media sources such as video feeds and audio recordings. Nevertheless, any dataset can ultimately be thought of as a list of numerical values~$\mathcal{X} = [x_0, \cdots , x_n]$ describing the data content regardless of the underlying modality. The purpose of this paper is to show that all the samples of any arbitrary dataset~$\mathcal{X}$ can be reproduced by a simple differentiable equation:
\begin{equation}
f_\alpha(x) = \sin^2 \left( 2^{x\tau} \arcsin{\sqrt{\alpha}} \right)
\label{eq::mainResult}
\end{equation}
where~$\alpha \in \mathbb{R}$ is a real-valued parameter to be learned from the data and~$x \in [0, \cdots , n]$ takes integer values. ($\tau \in \mathbb{N}$ is a constant which effectively controls the desired level of accuracy). Before delving into the logic of how and why~$f_\alpha$ is able to achieve such a lofty goal, let us start by a few practical demonstrations. Keeping with the tradition of ``fitting an elephant''~\cite{elephant}, we start by showing how different animal shapes may be generated by choosing appropriate values of~$\alpha$ as displayed in~Figure~\ref{fig::animals}. \\
\begin{figure}[hb!]
\centering
\includegraphics[width=.245\linewidth]{resources/generatedAnimals/elephant.png}
\includegraphics[width=.245\linewidth]{resources/generatedAnimals/bird.png}
\includegraphics[width=.245\linewidth]{resources/generatedAnimals/turtle.png}
\includegraphics[width=.245\linewidth]{resources/generatedAnimals/fish.png}
\caption{Animal shapes obtained with the different values of~$\alpha$ defined on top of each image. One should consider the data as a scatter plot of pairs of values~$(x,y)$ where each~$x \in \mathbb{N}$ is associated with a corresponding~$y$ value given by~$y \equiv f_\alpha(x)$. One goal of the paper will be to show how to find the precise value of~$\alpha \in \mathbb{R}$ required to fit any target dataset.}
\label{fig::animals}
\end{figure}
\begin{figure}
\centering
\begin{subfigure}{.5\textwidth}
\centering
\includegraphics[width=\linewidth]{resources/audio/generated/generated_waveform.png}
\end{subfigure}%
\begin{subfigure}{.5\textwidth}
\centering
\includegraphics[width=\linewidth]{resources/audio/generated/generated_spectrogram.png}
\end{subfigure}
\caption{{\bf Left)} Time series obtained by applying~$f_\alpha$ at equally-spaced arguments in~$\mathbb{N}$. Scaling the horizontal axis by an appropriate sampling rate ($\approx 11000$~Hz) allows the signal to be interpreted as a sound wave. {\bf Right)}~Playing the corresponding \includemedia[
transparent,
passcontext,
addresource=resources/audio/generated/HelloWorld_generated.mp3,
flashvars={source=resources/audio/generated/HelloWorld_generated.mp3},
]{\color{darkgreen}\framebox[0.1\linewidth][c]{\bf audio file}}{APlayer.swf} reveals a male voice saying~``Hello world'' characterized by this spectrogram.}
\label{fig:helloWorld}
\end{figure}
\noindent Following this demonstration that~$f_\alpha$ can generate any kind of doodle-like drawing, let us continue with a literal ``Hello world'' example in order to further illustrate the capabilities of the approach. Namely, we show in~Figure~\ref{fig:helloWorld} how a well-chosen value of~$\alpha$ may be used to produce a complex high-dimensional acoustic signal encoding the actual expression~``Hello world''! \\
\noindent Moving on to the connection with data science, let us consider the data modality that has emerged as the epicenter of the current explosion of interest in deep learning: images. Along with the advent of specialized hardware and smarter neural network architectures, it is widely acknowledged that the availability of very large labeled training data has been one of the most important factor responsible for the perceived computer vision ``coming-of-age'' as manifested by a myriad of academic breakthroughs and integration into successful commercial applications. In this context, the CIFAR-10 dataset continues to stand as one influential yardstick measuring the performance of new learning algorithms. Because of the small resolution of the images, this dataset also serves as a popular research playground. Accordingly, we demonstrate in~Figure~\ref{fig::cifar10} that it is always possible to find values of~$\alpha$ such that~$f_\alpha$ builds up artificial images that mirror the categories of~CIFAR-10.
\paragraph{Model complexity and generalization?} A fundamental question in machine learning research has to do with the evaluation of ``model capacity'' and its connection to the presumed existence of generalization (even in strongly overparametrized models). Traditional approaches tend to fall in categories such as \href{https://en.wikipedia.org/wiki/Vapnik-Chervonenkis\_dimension}{VC dimension} and \href{https://en.wikipedia.org/wiki/Rademacher\_complexity}{Rademacher complexity}... Nevertheless, in the absence of a generic theoretical framework valid for modern deep learning architectures, it is not uncommon for practitioners to simply count the number of parameters and treat it as a proxy for expressiveness power; an approach somehow inspired by classical information theory based estimators such as AIC and BIC. \\
\noindent The examples above have demonstrated that an elegant model~$f_\alpha$ with a simple and differentiable formulation (composition of a few trigonometric and exponential functions) is able to produce any kind of semantically-relevant scatter plot, audio or visual data (text may also be constructed using an identical approach) at the cost of a single real-valued parameter. Without yet revealing all the tricks, one aspect should already be obvious by now: all the information is directly encoded, without any compression or ``learning'', into~$\alpha \in \mathbb{R}$. As mathematical objects, real numbers are non-terminating and therefore contain an infinite amount of information (in particular, they should not be confused with whatever finite-precision data types programming languages may implement)~\cite{chaitin}. As such, no generalization can be expected from~$f_\alpha$ and indeed we conclude the paper by showing this explicitly in the context of time-series in~Figure~\ref{fig:generalization}. \\
\noindent In addition to casting doubts on the validity of parameter-counting methods and highlighting the importance of complexity bounds based on Occam's razor such as~\href{https://en.wikipedia.org/wiki/Minimum\_description\_length}{minimum description length} (that trade off goodness-of-fit with expressive power), we hope that~$f_\alpha$ may also serve as {\bf entertainment for curious data scientists}~\cite{laurent}.
\begin{figure}
\centering
\includegraphics[width=\linewidth]{resources/cifar10/CIFAR10-generatedImgs.png}
\caption[]{Small resolution images are generated by applying~$f_\alpha$ for~$3072$ integer-spaced arguments in~$\mathbb{N}$. Folding the resulting list of numerical values appropriately into~$3$d arrays of shapes~$32\times 32\times 3$ allows one to produce color images that look like they were drawn from the~CIFAR-10 classes: \{{\it airplane, automobile, bird, cat, deer, dog, frog, horse, ship, truck}\}.}
\label{fig::cifar10}
\end{figure}
\paragraph{Setting up a few essential building blocks.} After revealing that~$\alpha$ directly encodes the entire dataset~$\mathcal{X}$ into a single parameter, the rest of the paper will be dedicated to showing how to construct~$\alpha \in \mathbb{R}$ and its companion decoder function~$f_\alpha$. In order to do this, we need to lay out a couple of conceptual tools upon which the whole trick is based.
\paragraph{\framebox{\it a) Fixed-point binary representation}} Without loss of generality, we focus on real numbers~$\alpha \in [0,1]$ in the unit interval. Since the integral part is identically null, the fractional part of any such~$\alpha$ can be expressed as an infinite vector of coefficients~$a_i = \{0,1\}$ where each coefficient is paired with a weight~$1/2^i$ as illustrated by:
\begin{center} \begin{tikzpicture}
\node[mybox] (a1) at (9, 5.45) {$1/2^1$};
\node[mybox] (a2) at (10, 5.45) {$1/2^2$};
\node[mybox] (a3) at (11, 5.45) {$1/2^3$};
\node[mybox] (a4) at (12, 5.45) {$1/2^4$};
\node[mybox] (a5) at (13, 5.45) {$1/2^5$};
\node[mybox] (a6) at (14, 5.45) {$1/2^6$};
\node[mybox] (a7) at (15, 5.45) {$1/2^7$};
\node[mybox] (a8) at (16, 5.45) {$1/2^8$};
\node[mybox] (a9) at (17, 5.45) {$1/2^9$};
\node[mybox] (a10) at (18, 5.45) {$1/2^{10}$};
\node[mybox] (a11) at (19, 5.45) {$1/2^{11}$};
\node[fill=gray!10] (dots1) at (19.9, 5.45) {$\cdots$};
\node[fill=gray!10] (dots2) at (20.4, 5.45) {$\cdots$};
\node[fill=gray!10] (dots3) at (20.9, 5.45) {$\cdots$};
\node[myboxLeading] (ex0) at (7.7, 4.6) {$\alpha = 0 \, . $};
\node[boxBasis] (exs1) at (9, 4.6) {$a_1$};
\node[boxBasis] (exs2) at (10, 4.6) {$a_2$};
\node[boxBasis] (exs3) at (11, 4.6) {$a_3$};
\node[boxBasis] (exs4) at (12, 4.6) {$a_4$};
\node[boxBasis] (exs5) at (13, 4.6) {$a_5$};
\node[boxBasis] (exs6) at (14, 4.6) {$a_6$};
\node[boxBasis] (exs7) at (15, 4.6) {$a_7$};
\node[boxBasis] (exs8) at (16, 4.6) {$a_8$};
\node[boxBasis] (exs9) at (17, 4.6) {$a_9$};
\node[boxBasis] (exs10) at (18, 4.6) {$a_{10}$};
\node[boxBasis] (exs11) at (19, 4.6) {$a_{11}$};
\node[fill=red!10] (adots1) at (19.9, 4.6) {$\cdots$};
\node[fill=red!10] (adots2) at (20.4, 4.6) {$\cdots$};
\node[fill=red!10] (adots3) at (20.9, 4.6) {$\cdots$};
\end{tikzpicture} \end{center}
Converting this binary representation of~$\alpha$ to its equivalent decimal counterpart is accomplished by evaluating the following infinite-sum expression:
\begin{equation*}
\alpha = \sum_{n=1}^{+\infty} \dfrac{a_n}{2^n} \,\, ; \quad \,\, \alpha \in [0,1]
\end{equation*}
Unfortunately, existing data warehouse solutions can only store a finite (however large) amount of information and cannot handle the infinite memory requirements imposed by mathematical real numbers in~$\mathbb{R}$. Instead, one has to decide on a threshold to approximate real numbers thereby resulting in a finite~$\tau$-bit expansion: \\
\noindent \makebox[\textwidth][c]{\begin{tikzpicture}
\node[myboxLeading] () at (8, 3) {$0 \, . $};
\node[boxBasis] (a1) at (9, 3) {$0$};
\node[boxBasis] () at (10, 3) {$0$};
\node[boxBasis] () at (11, 3) {$0$};
\node[boxBasis] () at (12, 3) {$0$};
\node[boxBasis] () at (13, 3) {$0$};
\node[boxBasis] () at (14, 3) {$0$};
\node[boxBasis] () at (15, 3) {$0$};
\node[boxBasis] (a8) at (16, 3) {$0$};
\node[text width=2.15cm] (xx) at (17.8, 3) {$ = 0$};
\node[myboxLeading] () at (8, 2.4) {$0 \, . $};
\node[boxBasis] () at (9, 2.4) {$0$};
\node[boxBasis] () at (10, 2.4) {$0$};
\node[boxBasis] () at (11, 2.4) {$0$};
\node[boxBasis] () at (12, 2.4) {$0$};
\node[boxBasis] () at (13, 2.4) {$0$};
\node[boxBasis] () at (14, 2.4) {$0$};
\node[boxBasis] () at (15, 2.4) {$0$};
\node[boxBasis] () at (16, 2.4) {$1$};
\node[text width=2.15cm] () at (17.8, 2.4) {$ = 0.00390625$};
\node[] (dots) at (12.5, 1.8) {$\vdots$};
\node[myboxLeading] () at (8, 1.1) {$0 \, . $};
\node[boxBasis] () at (9, 1.1) {$1$};
\node[boxBasis] () at (10, 1.1) {$0$};
\node[boxBasis] () at (11, 1.1) {$0$};
\node[boxBasis] () at (12, 1.1) {$0$};
\node[boxBasis] () at (13, 1.1) {$0$};
\node[boxBasis] () at (14, 1.1) {$0$};
\node[boxBasis] () at (15, 1.1) {$0$};
\node[boxBasis] () at (16, 1.1) {$0$};
\node[text width=2.15cm] () at (17.8, 1.1) {$ = 0.5$};
\node[] (dots) at (12.5, 0.5) {$\vdots$};
\node[myboxLeading] () at (8, -0.2) {$0 \, . $};
\node[boxBasis] () at (9, -0.2) {$1$};
\node[boxBasis] () at (10, -0.2) {$1$};
\node[boxBasis] () at (11, -0.2) {$1$};
\node[boxBasis] () at (12, -0.2) {$1$};
\node[boxBasis] () at (13, -0.2) {$1$};
\node[boxBasis] () at (14, -0.2) {$1$};
\node[boxBasis] () at (15, -0.2) {$1$};
\node[boxBasis] () at (16, -0.2) {$0$};
\node[text width=2.15cm] () at (17.8, -0.2) {$ = 0.9921875$};
\node[myboxLeading] () at (8, -0.8) {$0 \, . $};
\node[boxBasis] () at (9, -0.8) {$1$};
\node[boxBasis] () at (10, -0.8) {$1$};
\node[boxBasis] () at (11, -0.8) {$1$};
\node[boxBasis] () at (12, -0.8) {$1$};
\node[boxBasis] () at (13, -0.8) {$1$};
\node[boxBasis] () at (14, -0.8) {$1$};
\node[boxBasis] () at (15, -0.8) {$1$};
\node[boxBasis] (y) at (16, -0.8) {$1$};
\node[text width=2.15cm] (yy) at (17.8, -0.8) {$ = 0.9960938$};
\draw [decorate,decoration={brace,amplitude=10pt}] (a1.north west) -- (a8.north east) node [black,midway,yshift=0.3cm, above] { Truncation to finite~$\tau$-bit precision ($\tau = 8$)};
\draw [align=left, decorate,decoration={brace,amplitude=10pt}] (xx.north east) -- (yy.south east) node [black,midway,xshift=0.6cm, right] { Discretization of the unit interval \\ into $2^{\tau=8} = 256$ uniformly spaced \\ representable numbers. \\ \\ Empirical data points are rounded \\ off to the nearest available number \\ in this fixed-point representation.};
\end{tikzpicture}} \\
\noindent The equivalent decimal value is obtained by evaluating the now-finite sum:
\begin{equation*}
\alpha_\text{approx} = \sum_{n=1}^\tau \dfrac{a_n}{2^n}
\end{equation*}
This approximation is completely faithful only in the degenerate case where all components~$a_{n \geq \tau + 1} \equiv 0$. As soon as one of these components is non-zero~$\alpha_{\text{approx}}$ always underestimates the true value~$\alpha$. In the worst case scenario, all the neglected components are~$a_{n \geq \tau + 1} \equiv 1$ showing that the error is bounded by:
\begin{equation*}
\left| \alpha - \alpha_\text{approx} \right| \leq \sum_{n=\tau+1}^{+\infty} \dfrac{1}{2^n} = \dfrac{1}{2^\tau}
\end{equation*}
\noindent This type of uniform discretization of the reals is referred to as a ``fixed-point'' representation to be contrasted with ``floating-point'' numbers that can represent both large and small reals in a reasonable amount of storage using a system similar to scientific notation. Let us mention, in passing, that with the surge of compute-intensive workloads based on neural networks, research towards reducing hardware overhead has also sparked a renewed wave of interest in alternative number representations such as the ``logarithmic number system''~\cite{lns}...
\newpage
\begin{figure}
\begin{center}
\begin{tikzpicture} %[scale = 1.0]
\begin{axis}[xmin=-0.03, xmax=1.03, ymin=-0.03, ymax=1.03, legend pos=north west, legend style={empty legend, fill=black!10, at={(0.5,1.1)},anchor=north}, axis background/.style={fill=gray!10}, xtick={0, 0.25, 0.5, 0.75, 1}, xticklabels={0, 1/4, 1/2, 3/4, 1},
ytick={0, 0.5, 1}, yticklabels={0, 1/2, 1}, thick, minor x tick num=0, minor y tick num=0, grid=both, xlabel=$\alpha_k$]
\addplot[samples=500,domain=0:0.499, color=black, line width=1.8pt]{2 * x};
\addplot[samples=500,domain=0.501:1, color=black, line width=1.8pt]{2 * x - 1};
\addlegendentry{$\alpha_{k+1} \equiv \mathcal{D} (\alpha_k) = 2 \alpha_k \,\,\text{mod}\,\,1$};
\end{axis}
\end{tikzpicture}
\end{center}
\caption{Graphical illustration of the dyadic transformation~$\mathcal{D}$. Notice how the modulo operation ensures that~$\alpha_{k+1} \in [0,1] \,\,\,\, \forall k \in \mathbb{N}$ by creating a non-differentiable jump at~$1/2$ turning~$\mathcal{D}$ into a piecewise linear function.}
\label{fig:dyadic}
\end{figure}
\paragraph{\framebox{\it b) Chaos theory}} The second (and last) prerequisite consists of a particularly simple incarnation of a one-dimensional discrete dynamical system known as the ``dyadic transformation''. Given a variable~$\alpha_k \in [0,1]$ at time-step~$k$, its evolution at time-step~$k+1$ is defined by:
\begin{equation}
\alpha_{k+1} \equiv \mathcal{D} (\alpha_k) = 2 \alpha_k \,\, \text{mod} \,\, 1
\label{eq:dyadic}
\end{equation}
\noindent Stepping away from decimal representation and moving over to binary form helps reveal the underlying behavior of~$\mathcal{D}$ (illustrated graphically in Figure~\ref{fig:dyadic}). Indeed, in this representation, multiplication by~2 can be interpreted as a simple shift of the whole bit sequence defining~$\alpha_k$ by a single bit in the leftwise direction. In addition, the modulo operation ensures that every bit coming over on to the left-side of the radix point gets turned into a~$0$ thereby ensuring that~$\alpha_{k+1}$ remains in the unit interval. Because of this property, the dyadic transformation is sometimes referred to as the ``bit-shift'' map. As an example, let us consider~$\tau=8$ significant bits and denote by~$\highlight[red!10]{\cdots \cdots \cdots}$ the infinite sequence of random bits that follow. Iterated applications of~$\mathcal{D}$ can be understood as an accumulation of leftwise bit-shifts:
\begin{center} \begin{tikzpicture}
\draw [dashed] (8.4, 3.5) -- (8.4, -2.7);
\node[myboxA] (ex0) at (3.6, 3.3) {$\alpha_k$};
\node[myboxLeading] (ex0) at (7.95, 3.3) {$0.$};
\node[boxBasis] (a1) at (9, 3.3) {$a_1$};
\node[boxBasis] (a2) at (10, 3.3) {$a_2$};
\node[boxBasis] (a3) at (11, 3.3) {$a_3$};
\node[boxBasis] (a4) at (12, 3.3) {$a_4$};
\node[boxBasis] (a5) at (13, 3.3) {$a_5$};
\node[boxBasis] (a6) at (14, 3.3) {$a_6$};
\node[boxBasis] (a7) at (15, 3.3) {$a_7$};
\node[boxBasis] (a8) at (16, 3.3) {$a_8$};
\node[fill=red!10] () at (16.9, 3.3) {$\cdots$};
\node[fill=red!10] () at (17.4, 3.3) {$\cdots$};
\node[fill=red!10] () at (17.9, 3.3) {$\cdots$};
\node[myboxA] (ex0) at (4.5, 2.4) {$\alpha_{k+1} = \mathcal{D}^1(\alpha_k)$};
\node[myboxLeading] (ex0) at (7.15, 2.4) {$a_1\, \text{mod}\, 1 \equiv 0.$};
\node[boxBasis] (b2) at (9, 2.4) {$a_2$};
\node[boxBasis] (b3) at (10, 2.4) {$a_3$};
\node[boxBasis] (b4) at (11, 2.4) {$a_4$};
\node[boxBasis] (b5) at (12, 2.4) {$a_5$};
\node[boxBasis] (b6) at (13, 2.4) {$a_6$};
\node[boxBasis] (b7) at (14, 2.4) {$a_7$};
\node[boxBasis] (b8) at (15, 2.4) {$a_8$};
\node[fill=red!10] () at (15.9, 2.4) {$\cdots$};
\node[fill=red!10] () at (16.4, 2.4) {$\cdots$};
\node[fill=red!10] () at (16.9, 2.4) {$\cdots$};
\draw[->] (a2) to (b2) node[ ] {};
\draw[->] (a3) to (b3) node[ ] {};
\draw[->] (a4) to (b4) node[ ] {};
\draw[->] (a5) to (b5) node[ ] {};
\draw[->] (a6) to (b6) node[ ] {};
\draw[->] (a7) to (b7) node[ ] {};
\draw[->] (a8) to (b8) node[ ] {};
\node[myboxA] (ex0) at (4.5, 1.5) {$\alpha_{k+2} = \mathcal{D}^2(\alpha_k)$};
\node[myboxLeading] (ex0) at (7.15, 1.5) {$a_2\, \text{mod}\, 1 \equiv 0.$};
\node[boxBasis] (c3) at (9, 1.5) {$a_3$};
\node[boxBasis] (c4) at (10, 1.5) {$a_4$};
\node[boxBasis] (c5) at (11, 1.5) {$a_5$};
\node[boxBasis] (c6) at (12, 1.5) {$a_6$};
\node[boxBasis] (c7) at (13, 1.5) {$a_7$};
\node[boxBasis] (c8) at (14, 1.5) {$a_8$};
\node[fill=red!10] () at (14.9, 1.5) {$\cdots$};
\node[fill=red!10] () at (15.4, 1.5) {$\cdots$};
\node[fill=red!10] () at (15.9, 1.5) {$\cdots$};
\draw[->] (b3) to (c3) node[ ] {};
\draw[->] (b4) to (c4) node[ ] {};
\draw[->] (b5) to (c5) node[ ] {};
\draw[->] (b6) to (c6) node[ ] {};
\draw[->] (b7) to (c7) node[ ] {};
\draw[->] (b8) to (c8) node[ ] {};
\node[fill=gray!10] () at (11, 0.8) {Multiplication by~2 means that iterated applications of~$\mathcal{D}$ progressively shift the fractional part};
\node[fill=gray!10] () at (11, 0.2) {of the binary representation of~$\alpha_k$; the integral part is always~0 because of the mod operation.};
\node[myboxA] (ex0) at (4.5, -0.6) {$\alpha_{k+6} = \mathcal{D}^6(\alpha_k)$};
\node[myboxLeading] (ex0) at (7.15, -0.6) {$a_6\, \text{mod}\, 1 \equiv 0.$};
\node[boxBasis] (d7) at (9, -0.6) {$a_7$};
\node[boxBasis] (d8) at (10, -0.6) {$a_8$};
\node[fill=red!10] () at (10.9, -0.6) {$\cdots$};
\node[fill=red!10] () at (11.4, -0.6) {$\cdots$};
\node[fill=red!10] () at (11.9, -0.6) {$\cdots$};
\node[myboxA] (ex0) at (4.5, -1.5) {$\alpha_{k+7} = \mathcal{D}^7(\alpha_k)$};
\node[myboxLeading] (ex0) at (7.15, -1.5) {$a_7\, \text{mod}\, 1 \equiv 0.$};
\node[boxBasis] (d7) at (9, -1.5) {$a_8$};
\node[fill=red!10] () at (9.9, -1.5) {$\cdots$};
\node[fill=red!10] () at (10.4, -1.5) {$\cdots$};
\node[fill=red!10] () at (10.9, -1.5) {$\cdots$};
\node[myboxA] (ex0) at (4.5, -2.4) {$\alpha_{k+8} = \mathcal{D}^8(\alpha_k)$};
\node[myboxLeading] (ex0) at (7.15, -2.4) {$a_8\, \text{mod}\, 1 \equiv 0.$};
\draw[->] (d8) to (d7) node[ ] {};
\node[fill=red!10] () at (8.85, -2.4) {$\cdots$};
\node[fill=red!10] () at (9.35, -2.4) {$\cdots$};
\node[fill=red!10] () at (9.85, -2.4) {$\cdots$};
\end{tikzpicture} \end{center}
\noindent In addition to clarifying the bit-shift property of the dyadic transformation, this visualization nicely brings to light the fundamental mechanism responsible for the onset of chaotic dynamics: each iteration of~$\mathcal{D}$ leads to the loss of~1~bit of information. After~$\tau$ iterations all the significant bits of~$\alpha_k$ have been lost and we are left with an infinite sequence of random bits. In other words, the time evolution of~$\alpha_k$ depends very strongly on its least significant bits: a hallmark of chaos known as sensitivity to initial conditions.
\newpage
\paragraph{Encoder/decoder strategies.} Let us consider a dataset~$\mathcal{X} = [ x_0, \cdots , x_n ]$ composed of a sequence of samples whose values have been normalized to lie in the unit interval. Any data source, regardless of the underlying dimensionality or modality, can be collapsed into such a list of numerical values and min-max normalized so that whatever pre-processing procedure one may need to apply to the raw data does not impose any restrictions on our approach (as demonstrated before by the illustrative examples for scatter plots, sound, images...). \\
\noindent Already, combining the fixed-point binary representation of real numbers along with the dyadic transformation allows us to construct a straightforward encoding/decoding strategy that is able to reproduce all the samples of~$\mathcal{X}$ up to an arbitrary degree of precision using a single parameter in~$\mathbb{R}$ (section~\ref{lab:naive}). Because of its lack of sophistication, this strategy won't be able to explain the elegant expression~$f_\alpha$ displayed at the very beginning of the paper. Nevertheless, it will serve as a conceptual scaffolding upon which our second strategy will be heavily based. Uncovering the origin of~$f_\alpha$ will require the introduction a few more theoretical tools from chaos theory: logistic maps and the concept of topological conjugacy. Blending these new ingredients on top of the blueprint laid out in the first construction will finally reveal how the differentiable model~$f_\alpha$ comes about (section~\ref{secondStrategy}).
\section{Building the scaffolding}
\label{lab:naive}
The idea is almost embarrassingly simple... Start by encoding all the values of~$\mathcal{X}$ into a long binary string and convert it to its decimal representation. Use this number~$\alpha_0 \in \mathbb{R}$ as the initial condition for the dyadic transformation. Because of its bit-shift property, iterated applications of~$\mathcal{D}$ can be used to decode one-by-one all the components of~$\mathcal{X}$.
\subsection{Construction of the initial condition}
\label{naive::init}
We start by converting all the values~$[x_0, x_1, x_2, \cdots , x_n]$ of~$\mathcal{X}$ into their binary representation truncated to the first~$\tau$ significant bits. \\
\begin{center} \begin{tikzpicture}
\node[] (y0) at (4, 0) {$x_0 \approx x^0_\text{bin} = $ };
\node[] (y1) at (4, -1) {$x_1 \approx x^1_\text{bin} = $ };
\node[] (y2) at (4, -2) {$x_2 \approx x^2_\text{bin} = $ };
\node[] (yn) at (4, -4) {$x_n \approx x^n_\text{bin} = $ };
\node[myboxFst] (a1) at (5.3, 0) { \phantom{0} };
\node[myboxFst] (a2) at (5.8, 0) {\phantom{0} };
\node[myboxFst] (a3) at (6.3, 0) {\phantom{0} };
\node[myboxFst] (a4) at (6.8, 0) {\phantom{0} };
\node[myboxFst] (a5) at (7.3, 0) {\phantom{0} };
\node[myboxFst] (a6) at (7.8, 0) {\phantom{0} };
\node[myboxFst] (a7) at (8.3, 0) {\phantom{0} };
\node[myboxFst] (a8) at (8.8, 0) { \phantom{0}};
\node[myboxSec] (b1) at (5.3, -1) { \phantom{0} };
\node[myboxSec] (b2) at (5.8, -1) { \phantom{0}};
\node[myboxSec] (b3) at (6.3, -1) { \phantom{0} };
\node[myboxSec] (b4) at (6.8, -1) {\phantom{0} };
\node[myboxSec] (b5) at (7.3, -1) {\phantom{0} };
\node[myboxSec] (b6) at (7.8, -1) { \phantom{0}};
\node[myboxSec] (b7) at (8.3, -1) { \phantom{0}};
\node[myboxSec] (b8) at (8.8, -1) {\phantom{0} };
\node[myboxLst] (c1) at (5.3, -2) { \phantom{0} };
\node[myboxLst] (c2) at (5.8, -2) {\phantom{0} };
\node[myboxLst] (c3) at (6.3, -2) {\phantom{0} };
\node[myboxLst] (c4) at (6.8, -2) { \phantom{0}};
\node[myboxLst] (c5) at (7.3, -2) {\phantom{0} };
\node[myboxLst] (c6) at (7.8, -2) { \phantom{0}};
\node[myboxLst] (c7) at (8.3, -2) { \phantom{0}};
\node[myboxLst] (c8) at (8.8, -2) {\phantom{0} };
\node[] () at (3.8, -2.8) {$\vdots$ };
\node[] () at (3.8, -3.2) {$\vdots$ };
\node[myboxLstFinal] (d1) at (5.3, -4) { \phantom{0} };
\node[myboxLstFinal] (d2) at (5.8, -4) {\phantom{0} };
\node[myboxLstFinal] (d3) at (6.3, -4) {\phantom{0} };
\node[myboxLstFinal] (d4) at (6.8, -4) { \phantom{0}};
\node[myboxLstFinal] (d5) at (7.3, -4) {\phantom{0} };
\node[myboxLstFinal] (d6) at (7.8, -4) { \phantom{0}};
\node[myboxLstFinal] (d7) at (8.3, -4) { \phantom{0}};
\node[myboxLstFinal] (d8) at (8.8, -4) {\phantom{0} };
\draw [decorate,decoration={brace,amplitude=10pt}]
(a8.north east) -- (d8.south east) node [align=left, black,midway,xshift=4cm]
{Convert all samples to fixed-point binary \\ representation with~$\tau$ bits of precision};
\end{tikzpicture} \end{center}
\noindent Next, we concatenate all the samples together so that the entire dataset is ``flattened'' into a long binary string composed of~$\sim n \tau$ bits. Notice how we associate the binary coefficients with weights of decreasing significance according to their order of appearance in~$\mathcal{X}$. \\ \\
\noindent \makebox[\textwidth][c]{\begin{tikzpicture}
\node[] (alpha) at (0.2, 0) {$\alpha_{0} = $ };
\node[myboxFst] (a1) at (1, 0) { \phantom{0} };
\node[myboxFst] (a2) at (1.5, 0) { \phantom{0} };
\node[myboxFst] (a3) at (2, 0) {\phantom{0} };
\node[myboxFst] (a4) at (2.5, 0) {\phantom{0} };
\node[myboxFst] (a5) at (3, 0) { \phantom{0} };
\node[myboxFst] (a6) at (3.5, 0) { \phantom{0} };
\node[myboxFst] (a7) at (4, 0) {\phantom{0} };
\node[myboxFst] (a8) at (4.5, 0) {\phantom{0} };
\node[myboxSec] (b1) at (5, 0) { \phantom{0} };
\node[myboxSec] (b2) at (5.5, 0) { \phantom{0}};
\node[myboxSec] (b3) at (6, 0) {\phantom{0} };
\node[myboxSec] (b4) at (6.5, 0) {\phantom{0} };
\node[myboxSec] (b5) at (7, 0) {\phantom{0} };
\node[myboxSec] (b6) at (7.5, 0) {\phantom{0} };
\node[myboxSec] (b7) at (8, 0) {\phantom{0} };
\node[myboxSec] (b8) at (8.5, 0) {\phantom{0} };
\node[myboxLst] (l1) at (9, 0) {\phantom{0} };
\node[myboxLst] (l2) at (9.5, 0) {\phantom{0} };
\node[myboxLst] (l3) at (10, 0) {\phantom{0} };
\node[myboxLst] (l4) at (10.5, 0) {\phantom{0} };
\node[myboxLst] (l5) at (11, 0) {\phantom{0} };
\node[myboxLst] (l6) at (11.5, 0) {\phantom{0} };
\node[myboxLst] (l7) at (12, 0) {\phantom{0} };
\node[myboxLst] (l8) at (12.5, 0) {\phantom{0} };
\node[minimum width=0.5cm] (bDots) at (13.15, 0) { $\cdots$ };
\node[minimum width=0.5cm] (bDots2) at (13.65, 0) { $\cdots$ };
\node[minimum width=0.5cm] (bDots3) at (14.15, 0) { $\cdots$ };
\node[myboxLstFinal] (y1) at (14.8, 0) {\phantom{0} };
\node[myboxLstFinal] (y2) at (15.3, 0) {\phantom{0} };
\node[myboxLstFinal] (y3) at (15.8, 0) {\phantom{0} };
\node[myboxLstFinal] (y4) at (16.3, 0) {\phantom{0} };
\node[myboxLstFinal] (y5) at (16.8, 0) {\phantom{0} };
\node[myboxLstFinal] (y6) at (17.3, 0) {\phantom{0} };
\node[myboxLstFinal] (y7) at (17.8, 0) {\phantom{0} };
\node[myboxLstFinal] (y8) at (18.3, 0) {\phantom{0} };
\node[myboxFst, fill=gray!10] (basis1) at (1, -1) {$1/2$};
\node[myboxFst, fill=gray!10] (basis2) at (4.5, -1) {$1/2^\tau$};
\node[myboxSec, fill=gray!10] (basis3) at (8.5, -1) {$1/2^{2\tau}$ };
\node[myboxSec, fill=gray!10] (basis4) at (12.5, -1) {$1/2^{3\tau}$ };
\node[myboxSec, fill=gray!10] (basis5) at (14.8, -1) {$1/2^{n\tau}$ };
\node[myboxSec, fill=gray!10] (basis6) at (18.3, -1) {$1/2^{(n+1)\tau}$ };
\draw [decorate,decoration={brace,amplitude=10pt},xshift=60pt]
(y1.north west) -- (y8.north east) node [black,midway,yshift=0.3cm,above]
{$x^n_\text{bin}$};
\draw[->, dashed] (a1.south) to (basis1.north) node[ ] { };
\draw[->, dashed] (a8.south) to (basis2.north) node[ ] { };
\draw[->, dashed] (b8.south) to (basis3.north) node[ ] { };
\draw[->, dashed] (l8.south) to (basis4.north) node[ ] { };
\draw[->, dashed] (y1.south) to (basis5.north) node[ ] { };
\draw[->, dashed] (y8.south) to (basis6.north) node[ ] { };
\draw [decorate,decoration={brace,amplitude=10pt},xshift=60pt]
(a1.north west) -- (a8.north east) node [black,midway,yshift=0.3cm,above]
{$x^0_\text{bin}$};
\draw [decorate,decoration={brace,amplitude=10pt},xshift=60pt]
(b1.north west) -- (b8.north east) node [black,midway,yshift=0.3cm,above]
{$x^1_\text{bin}$};
\draw [decorate,decoration={brace,amplitude=10pt},xshift=60pt]
(l1.north west) -- (l8.north east) node [black,midway,yshift=0.3cm,above]
{$x^2_\text{bin}$};
\draw [decorate,decoration={mirror,brace,amplitude=10pt},xshift=60pt] (basis1.south) -- (basis2.south) node [black,midway,yshift=-0.3cm,below] {$\alpha_0 \approx x^0_\text{bin}$};
\end{tikzpicture}} \\
\noindent Thanks to this encoding scheme, the bits corresponding to~$x^0_\text{bin}$ occupy the first~$\tau$ dominant weights ensuring that~$\alpha_0$ is already a good approximation to the value of the first sample:
\begin{equation*}
\highlight[black!10]{\alpha_0 \approx x_0 \quad \text{with error bound} \quad \left| \alpha_0 - x_0 \right| < 1/2^\tau}
\end{equation*}
The error bound is explained in the introductory paragraph on fixed-point binary representations.
\subsection{Decimal precision requirements}
\label{sec::decimalPrecision}
Converting~$\alpha_0 \in [0,1]$ to its equivalent decimal representation is achieved by evaluating the expression:
\begin{equation*}
\highlight[black!10]{\alpha_0 = \sum_{i=1}^{(n+1)\tau} \dfrac{\alpha_i}{2^i} } = \left( \highlight[yellow!10]{\sum_{i=1}^\tau} + \highlight[green!10]{\sum_{i=\tau+1}^{2 \tau}} + \highlight[pink!10]{\sum_{n=2\tau+1}^{3 \tau}} + \cdots + \highlight[blue!10]{\sum_{n=n\tau + 1}^{(n+1)\tau}} \right) \dfrac{\alpha_i}{2^i}
\end{equation*}
Conceptually, this summation can be partitioned into blocks which are color-coded according to the level of significance they represent in the original binary representation of~$\alpha_0$. Each block encodes the value~$x_j$ of a particular sample corrected by a multiplicative factor determined by the order in which sample~$j$ appears in the dataset~$\mathcal{X} = [x_0, x_1, x_2, \cdots , x_n]$. \\
\noindent Since all further arithmetic operations will exclusively be performed in decimal representation, it is important to answer the following question: How many digits of decimal precision does one need to keep in order to have the capacity to represent all the information contained in the original construction of~$\alpha_0$ as a long binary string? Let us denote by~$p_\text{bin}$ the level of binary precision (number of bits) targeted. This means that we aim to describe numbers with a numerical precision of at least~$1 / 2^{\,p_\text{bin}}$. Therefore the required level of decimal precision~$p_\text{dec}$ is determined by:
\begin{equation*}
\dfrac{1}{{10}^{\,p_\text{dec}}} = \dfrac{1}{2^{\,p_\text{bin}}} \quad
\Longrightarrow \quad p_\text{dec} = \left( \dfrac{\log 2}{\log 10} \approx 0.3 \right) p_\text{bin}
\end{equation*}
\noindent Unsurprisingly, decimal representation requires less digits than would otherwise be necessary in binary to achieve the same level of numerical precision. Intuitively, this can be attributed to the fact the decimal representation of a real number can be expanded over a larger set of base digits~$\{ 0, \cdots , 9 \}$ instead of just~$\{ 0,1 \}$ in binary form. \\
\noindent With typical numbers such as~$n=1000$ data points and an accuracy of~$\tau=8$ bits of precision for each sample, we need to faithfully encode~$p_\text{bin} \approx 8000$ bits of information which corresponds to~$p_\text{dec} \approx 2400$ significant decimal digits. Obviously, this level of precision goes way beyond the traditional IEEE floating-point standard and other common data types offered by host programming languages. Therefore, actual implementation of the algorithm requires the use of external libraries to perform arbitrary-precision arithmetic operations.
\subsection{Decoding using the dyadic transformation}
\noindent Going back to the definition of~$\mathcal{D}$ in~eq.(\ref{eq:dyadic}), it is easy to see that multiple iterated applications of the dyadic transformation~$k\tau$ times on the initial condition~$\alpha_0$ leads to a new value~$\alpha_k$ which is given by:
\begin{equation}
\alpha_k \equiv \underbrace{\left( \mathcal{D} \circ \cdots \circ \mathcal{D} \right)}_{k\tau \text{ times}} \, (\alpha_0) = \mathcal{D}^{k\tau} (\alpha_0) \quad \Longrightarrow \quad \highlight[black!10]{ \alpha_k = 2^{k\tau} \, \alpha_0 \, \text{mod} \,\, 1 \quad ; \quad k \in \{ 0, \cdots , n \} }
\label{eq:exactSol}
\end{equation}
As will be demonstrated below, this expression for~$\alpha_k$ is indeed able to reproduce all the datapoints of~$\mathcal{X}$. \\
\noindent With~$k=0$, we have seen that~$\alpha_0$ is already a good approximation to the first sample of our dataset. In order to see how the remaining datapoints can also be extracted using~$\mathcal{D}$, it is helpful to mentally go back to the binary representation in which the dyadic transformation can be seen as a simple leftwise bit-shift. Note that this logic is only presented {\bf as a visual guide} and that~$\alpha$'s are calculated directly in decimal basis. Accordingly, let us start by looking at~$k=1$ so that the first iterate~$\alpha_1$ corresponds to: \\ \\
\noindent \makebox[\textwidth][c]{\begin{tikzpicture}
\node[] (alpha) at (-0.6, 0) {$\alpha_1 = \mathcal{D}^\tau \left( \alpha_0 \right) = $ };
\node[myboxSec] (a1) at (1, 0) { \phantom{0} };
\node[myboxSec] (a2) at (1.5, 0) { \phantom{0} };
\node[myboxSec] (a3) at (2, 0) {\phantom{0} };
\node[myboxSec] (a4) at (2.5, 0) {\phantom{0} };
\node[myboxSec] (a5) at (3, 0) { \phantom{0} };
\node[myboxSec] (a6) at (3.5, 0) { \phantom{0} };
\node[myboxSec] (a7) at (4, 0) {\phantom{0} };
\node[myboxSec] (a8) at (4.5, 0) {\phantom{0} };
\node[myboxLst] (b1) at (5, 0) { \phantom{0} };
\node[myboxLst] (b2) at (5.5, 0) { \phantom{0}};
\node[myboxLst] (b3) at (6, 0) {\phantom{0} };
\node[myboxLst] (b4) at (6.5, 0) {\phantom{0} };
\node[myboxLst] (b5) at (7, 0) {\phantom{0} };
\node[myboxLst] (b6) at (7.5, 0) {\phantom{0} };
\node[myboxLst] (b7) at (8, 0) {\phantom{0} };
\node[myboxLst] (b8) at (8.5, 0) {\phantom{0} };
\node[minimum width=0.5cm] () at (9.15, 0) { $\cdots$ };
\node[minimum width=0.5cm] () at (9.65, 0) { $\cdots$ };
\node[minimum width=0.5cm] () at (10.15, 0) { $\cdots$ };
\node[myboxLstFinal] (y1) at (10.8, 0) {\phantom{0} };
\node[myboxLstFinal] (y2) at (11.3, 0) {\phantom{0} };
\node[myboxLstFinal] (y3) at (11.8, 0) {\phantom{0} };
\node[myboxLstFinal] (y4) at (12.3, 0) {\phantom{0} };
\node[myboxLstFinal] (y5) at (12.8, 0) {\phantom{0} };
\node[myboxLstFinal] (y6) at (13.3, 0) {\phantom{0} };
\node[myboxLstFinal] (y7) at (13.8, 0) {\phantom{0} };
\node[myboxLstFinal] (y8) at (14.3, 0) {\phantom{0} };
\node[myboxFst, fill=gray!10] (basis1) at (1, -1) {$1/2$};
\node[myboxFst, fill=gray!10] (basis2) at (4.5, -1) {$1/2^\tau$};
\node[myboxSec, fill=gray!10] (basis3) at (8.5, -1) {$1/2^{2\tau}$ };
\node[myboxSec, fill=gray!10] (basis5) at (10.8, -1) {$1/2^{(n-1)\tau}$ };
\node[myboxSec, fill=gray!10] (basis6) at (14.3, -1) {$1/2^{n\tau}$ };
\draw[->, dashed] (a1.south) to (basis1.north) node[ ] { };
\draw[->, dashed] (a8.south) to (basis2.north) node[ ] { };
\draw[->, dashed] (b8.south) to (basis3.north) node[ ] { };
\draw[->, dashed] (y1.south) to (basis5.north) node[ ] { };
\draw[->, dashed] (y8.south) to (basis6.north) node[ ] { };
\draw [decorate,decoration={brace,amplitude=10pt},xshift=60pt]
(a1.north west) -- (a8.north east) node [black,midway,yshift=0.3cm,above]
{$x^1_\text{bin}$};
\draw [decorate,decoration={brace,amplitude=10pt},xshift=60pt]
(b1.north west) -- (b8.north east) node [black,midway,yshift=0.3cm,above]
{$x^2_\text{bin}$};
\draw [decorate,decoration={brace,amplitude=10pt},xshift=60pt]
(y1.north west) -- (y8.north east) node [black,midway,yshift=0.3cm,above]
{$x^n_\text{bin}$};
\draw [decorate,decoration={mirror,brace,amplitude=10pt},xshift=60pt] (basis1.south) -- (basis2.south) node [black,midway,yshift=-0.3cm,below] {$\alpha_1 \approx x^1_\text{bin}$};
\end{tikzpicture}}
\noindent showing that the second sample is recovered by~$\tau$ successive applications of~$\mathcal{D}$ on our carefully crafted initial condition:
\begin{equation*}
\highlight[black!10]{\alpha_1 = 2^\tau \alpha_0 \, \text{mod} \,\, 1 \,\, \Longrightarrow \,\, \alpha_1 \approx x_1 \,\,\, \text{with error bound} \,\,\, \left| \alpha_1 - x_1 \right| < 1/2^\tau}
\end{equation*}
\newpage
\noindent Moving on to~$k=2$, we have: \\ \\
\noindent \begin{tikzpicture}
\node[] (alpha) at (-0.7, 0) {$\alpha_{2} = \mathcal{D}^{2\tau} \left( \alpha_0 \right) = $ };
\node[myboxLst] (a1) at (1, 0) { \phantom{0} };
\node[myboxLst] (a2) at (1.5, 0) { \phantom{0} };
\node[myboxLst] (a3) at (2, 0) {\phantom{0} };
\node[myboxLst] (a4) at (2.5, 0) {\phantom{0} };
\node[myboxLst] (a5) at (3, 0) { \phantom{0} };
\node[myboxLst] (a6) at (3.5, 0) { \phantom{0} };
\node[myboxLst] (a7) at (4, 0) {\phantom{0} };
\node[myboxLst] (a8) at (4.5, 0) {\phantom{0} };
\node[minimum width=0.5cm] () at (5.15, 0) { $\cdots$ };
\node[minimum width=0.5cm] () at (5.65, 0) { $\cdots$ };
\node[minimum width=0.5cm] () at (6.15, 0) { $\cdots$ };
\node[myboxLstFinal] (y1) at (6.8, 0) {\phantom{0} };
\node[myboxLstFinal] (y2) at (7.3, 0) {\phantom{0} };
\node[myboxLstFinal] (y3) at (7.8, 0) {\phantom{0} };
\node[myboxLstFinal] (y4) at (8.3, 0) {\phantom{0} };
\node[myboxLstFinal] (y5) at (8.8, 0) {\phantom{0} };
\node[myboxLstFinal] (y6) at (9.3, 0) {\phantom{0} };
\node[myboxLstFinal] (y7) at (9.8, 0) {\phantom{0} };
\node[myboxLstFinal] (y8) at (10.3, 0) {\phantom{0} };
\node[myboxFst, fill=gray!10] (basis1) at (1, -1) {$1/2$};
\node[myboxFst, fill=gray!10] (basis2) at (4.5, -1) {$1/2^\tau$};
\node[myboxSec, fill=gray!10] (basis5) at (6.8, -1) {$1/2^{(n-2)\tau}$ };
\node[myboxSec, fill=gray!10] (basis6) at (10.3, -1) {$1/2^{(n-1)\tau}$ };
\draw [decorate,decoration={brace,amplitude=10pt},xshift=60pt] (y1.north west) -- (y8.north east) node [black,midway,yshift=0.3cm,above] {$x^n_\text{bin}$};
\draw [decorate,decoration={brace,amplitude=10pt},xshift=60pt] (a1.north west) -- (a8.north east) node [black,midway,yshift=0.3cm,above] {$x^2_\text{bin}$};
\draw [decorate,decoration={mirror,brace,amplitude=10pt},xshift=60pt] (basis1.south) -- (basis2.south) node [black,midway,yshift=-0.3cm,below] {$\alpha_{2} \approx x^2_\text{bin}$};
\draw[->, dashed] (a1.south) to (basis1.north) node[ ] { };
\draw[->, dashed] (a8.south) to (basis2.north) node[ ] { };
\draw[->, dashed] (y1.south) to (basis5.north) node[ ] { };
\draw[->, dashed] (y8.south) to (basis6.north) node[ ] { };
\end{tikzpicture} \\
\noindent confirming that the third sample of~$\mathcal{X}$ is indeed reproduced:
\begin{equation*}
\highlight[black!10]{\alpha_2 = 2^{2\tau} \alpha_0 \, \text{mod} \,\, 1 \,\, \Longrightarrow \,\, \alpha_2 \approx x_2 \,\,\, \text{with error bound} \,\,\, \left| \alpha_2 - x_2 \right| < 1/2^\tau}
\end{equation*}
Generalizing, it is clear that iterating the dyadic transformation~$n\tau$ times removes away all the bits of~$\alpha_0$ leaving only its (initally) most subdominant contribution (now dominant) which was constructed as the sequence of~$\tau$ bits encoding the last sample: \\
\noindent \begin{tikzpicture}
\node[] (alpha) at (-0.8, 0) {$\alpha_n = \mathcal{D}^{n\tau} \left( \alpha_\text{bin} \right) = $ };
\node[myboxLstFinal] (a1) at (1, 0) { \phantom{0} };
\node[myboxLstFinal] (a2) at (1.5, 0) { \phantom{0} };
\node[myboxLstFinal] (a3) at (2, 0) {\phantom{0} };
\node[myboxLstFinal] (a4) at (2.5, 0) {\phantom{0} };
\node[myboxLstFinal] (a5) at (3, 0) { \phantom{0} };
\node[myboxLstFinal] (a6) at (3.5, 0) { \phantom{0} };
\node[myboxLstFinal] (a7) at (4, 0) {\phantom{0} };
\node[myboxLstFinal] (a8) at (4.5, 0) {\phantom{0} };
\node[myboxFst, fill=gray!10] (basis1) at (1, -1) {$1/2$};
\node[myboxFst, fill=gray!10] (basis2) at (4.5, -1) {$1/2^\tau$};
\draw [decorate,decoration={brace,amplitude=10pt},xshift=60pt] (a1.north west) -- (a8.north east) node [black,midway,yshift=0.3cm,above] {$x^n_\text{bin}$};
\draw[->, dashed] (a1.south) to (basis1.north) node[ ] { };
\draw[->, dashed] (a8.south) to (basis2.north) node[ ] { };
\end{tikzpicture} \\
\noindent thereby completing our demonstration that the entire dataset~$\mathcal{X}$ can be decoded by~eq.(\ref{eq:exactSol}):
\begin{equation*}
\highlight[black!10]{\alpha_n = 2^{n\tau} \alpha_0 \, \text{mod} \,\, 1 \,\, \Longrightarrow \,\, \alpha_n \approx x_n \,\,\, \text{with error bound} \,\,\, \left| \alpha_n - x_n \right| < 1/2^\tau}
\end{equation*}
\subsection{Summary in code}
\label{codeSummary::1}
\noindent Let us finish this section by showing how the algorithm described above can be implemented very easily in just a few lines of code. {\it (Helper functions to convert between binary and decimal representations, using a default value of~$\tau=8$, as well as the construction of a dataset~$\mathcal{X}$ composed of~50 random numbers in the unit interval are provided in appendix~\ref{code::appendix}.)} \\
\noindent The first step consists in looping over the dataset to approximate the decimal values of all of its samples with their~$\tau$-bit binary representations. This transformation of~$\mathcal{X} = [x_0, x_1, x_2, \cdots, x_n] \approx [x_\text{bin}^0, x_\text{bin}^1,x_\text{bin}^2,\cdots,x_\text{bin}^n ]$ is achieved by using the helper function~\texttt{decimalToBinary} (which internally invokes the dyadic transformation implemented as~\texttt{dyadicMap} below). Appending these~$\tau$-long binary strings all together by order of appearance in~$\mathcal{X}$ (denoted in code as~\texttt{xs}) leads to the initial condition~$\alpha_0$ (denoted in code as~\texttt{binaryInitial}). \\
\begin{lstlisting}[language=Python, caption=]
# xs: List[float] is the dataset instantiated here as a list of random values in [0, 1] as defined the Appendix.
dyadicMap = lambda x : (2 * x) % 1
binaryInitial = ''.join(map(decimalToBinary, xs))
print('binaryInitial = %s\n' % binaryInitial)
binaryInitial = 1000110010110111100110101000101101101100101001010111000011100100111101100110
0010110010101000011110010001111011000001001000010110000001011101010111000111
1101111011111010110011000111011011000111000111101010001100100100111100011000
0101011010100100001111000110011101001001000100000100100111101001110010011101
1111000110101110010111000110111110110010000011111010101010101011001101010010
00010101000001011101
\end{lstlisting}
\vspace{0.3cm}
\noindent Since~$\mathcal{X}\equiv$~\texttt{xs} is composed of~50 samples represented in binary with~$\tau=8$ bits of precision, we can verify that the initial condition~\texttt{binaryInitial} requires~$50 \times 8 = 400$ bits of precision.
\newpage
\noindent Converting this initial condition from its binary form into its corresponding decimal representation is achieved by invoking the~\texttt{binaryToDecimal} helper function. \\
\begin{lstlisting}[language=Python, caption=]
decimalInitial = binaryToDecimal(binaryInitial)
print('decimalInitial = %s' % decimalInitial)
decimalInitial = 0.5496765699760055703169202362353308700123406974106022231112441515212066847
3990606828049163011967627260871299016527460727972
\end{lstlisting}
\vspace{0.3cm}
\noindent As can be verified from the complete value of~$\alpha_0 \equiv$~\texttt{decimalInitial}, one needs to keep about~$\approx 120$ decimal digits of precision. Next, we implement the decoding function~$\alpha_k \equiv$~\texttt{dyadicDecoder}. \\
\begin{lstlisting}[language=Python, caption=]
dyadicDecoder = lambda k: (2 ** (k * tau) * decimalInitial) % 1
\end{lstlisting}
\vspace{0.3cm}
\noindent It is important to recognize that the multiplication between~$\alpha_0$ and the~$k$-dependent exponential pre-factor should be understood in the context of ``overloading''. In particular, this means that the implementation is dispatched to an external library that handles arithmetic operations on arbitrary-precision objects such as~\texttt{decimalInitial} (instead of being handled by the native implementations on common data types offered by the host programming language). \\
\noindent Now that we have constructed the initial condition and the decoding function, all the values of the original dataset~$\mathcal{X}$ can be generated (within a level of precision determined by~$\tau$) one-by-one by applying~\texttt{dyadicDecoder} on a range of integer-valued arguments~$k \in [0, \cdots , n]$. Even though arbitrary-precision numbers were necessary to carry out accurate arithmetic operations on~\texttt{decimalInitial}, the components of the resulting list denoted as~\texttt{decodedValues} can be downcast to usual floating-point numbers using the built-in~\texttt{float} method. \\
\begin{lstlisting}[language=Python, caption=]
n = len(xs)
decodedValues = [float(dyadicDecoder(_)) for _ in range(n)]
\end{lstlisting}
\vspace{0.2cm}
\noindent As a sanity check, it is interesting to check that the difference between the values~$[\alpha_0, \alpha_1, \alpha_2, \cdots, \alpha_n]$ produced by our encoding/decoding strategy and the true values from~$\mathcal{X}=[x_0, x_1, x_2, \cdots , x_n]$ satisfy the theoretical error bound~$\left| \alpha_j - x_j \right| < 1/2^\tau$ captured by~\texttt{normalizedErrors} in the code fragment below. \\
\begin{lstlisting}[language=Python, caption=]
maxError = 1 / 2 ** tau
normalizedErrors = [abs(decodedValue - dataPoint) / maxError
for decodedValue, dataPoint in zip(decodedValues, xs)]
\end{lstlisting}
\begin{figure}[hb!]
\centering
\begin{subfigure}{.5\textwidth}
\centering
\includegraphics[width=\linewidth]{resources/codeExamples/decodedValues_dyadic.png}
\end{subfigure}%
\begin{subfigure}{.5\textwidth}
\centering
\includegraphics[width=\linewidth]{resources/codeExamples/normalizedError_dyadic.png}
\end{subfigure}
\caption{{\bf Left)} Comparison between the decoded values (black star markers) against the original dataset~$\mathcal{X}$ (thick orange line). {\bf Right)} Normalized error showing that the gap between decoded values and the original values never strays more than~$1/2^\tau$ away (represented as the red horizontal dashed line) confirming the error bound derived in the main text.}
\label{fig:naivePics}
\end{figure}
\newpage
\section{Applying makeup to the scaffolding}
\label{secondStrategy}
We refer to the strategy described in the previous section as {\it ``building the scaffolding''}. The reason for this choice of terminology is that it feels like the decoding function leaves too much of the bare mechanics exposed. Most objectionable, it relies in plain view on the dyadic transformation and by implication on its bit-shift property. Moreover, the somewhat inelegant modulo operation with its lack of continuity may stand out as a ``hair in the soup'' in an age where \href{https://en.wikipedia.org/wiki/Differentiable\_programming}{\bf differentiable programming} is all the rage. \\
\noindent The purpose of this section is to justify the more aesthetically pleasing formulation~$f_\alpha(x) = \sin^2 (2^{x\tau} \arcsin{\sqrt{\alpha}})$ whose accomplishments have already been documented in the introduction as a replacement to the crude decoding function,~eq.(\ref{eq:exactSol}), that we currently have. To do this, we need to introduce two well-known concepts in the context of dynamical systems: the logistic map and the notion of topological conjugacy. It is worth emphasizing that these ingredients will be used simply as a kind of ``makeup'' on top of an otherwise mostly identical logic to the one presented in the previous section.
\subsection{Logistic map}
Originally introduced as a discrete-time model for population demographics, the logistic map~$\mathcal{L}$ is a polynomial mapping between a real-valued~$z_k \in [0,1]$ and its next iterate~$z_{k+1}$ defined by:
\begin{equation}
z_{k+1} \equiv \mathcal{L} (z_k) = r \, z_k (1 - z_k)
\label{eq:logisiticMap}
\end{equation}
Note that we will restrict ourselves to the special case where~$r=4$. Even if not particularly useful in biology, it turns out that the logistic map exhibits an unexpected degree of complexity~\cite{robertMay}. In fact, it is often used nowadays as the \href{https://en.wikipedia.org/wiki/Logistic\_map}{archetypal example} of how chaotic behavior may arise from simple dynamical equations. \\
\noindent A quick visual comparison of~$\mathcal{L}$ (as depicted in~Figure~\ref{fig:logistic}) with the dyadic transformation~$\mathcal{D}$ (illustrated previously in~Figure~\ref{fig:dyadic}) suggests that both maps do not have much in common with each other... For example, it is clear that~$\mathcal{L}$ is continuous and differentiable over the unit interval; smoothness guarantees that stand in stark contrast to the sawtooth shape associated with~$\mathcal{D}$. \\
\noindent At this point you may ask: well, what does this logistic map business have to do with anything we discussed so far? \\
\begin{figure}[hb!]
\begin{center}
\begin{tikzpicture}
\begin{axis}[xmin=-0.03, xmax=1.03, ymin=-0.03, ymax=1.03, legend pos=north west, legend style={empty legend, fill=red!10, at={(0.5,1.1)},anchor=north}, axis background/.style={fill=gray!10}, xtick={0, 0.25, 0.5, 0.75, 1}, xticklabels={0, 1/4, 1/2, 3/4, 1},
ytick={0, 0.25, 0.5, 0.75, 1}, yticklabels={0, 1/4, 1/2, 3/4, 1}, thick, minor x tick num=0, minor y tick num=0, grid=both, xlabel=$z_k$]
\addplot[samples=1000,domain=0:1, color=red, line width=1.8pt]{4 * x * (1-x)};
\addlegendentry{$z_{k+1} \equiv \mathcal{L} (z_k) = 4 z_k (1-z_k)$};
\end{axis}
\end{tikzpicture}
\end{center}
\caption{Graphical illustration of the logistic map~$\mathcal{L}$ as defined in~eq.(\ref{eq:logisiticMap}) in the special case where~$r=4$. To be compared with the non-differentiable dyadic transformation~$\mathcal{D}$ shown in~Figure~\ref{fig:dyadic}.}
\label{fig:logistic}
\end{figure}
\subsection{Toplogical conjugacy}
\label{sec:sleight}
\begin{figure}
\centering
\begin{tikzpicture}[scale=0.96]
\begin{axis}[xmin=-0.03, xmax=1.03, ymin=-0.03, ymax=1.03, legend pos=north west, legend style={empty legend, fill=blue!10, at={(0.5,1.1)},anchor=north}, axis background/.style={fill=gray!10}, xtick={0, 0.25, 0.5, 0.75, 1}, xticklabels={0,1/4,1/2,3/4,1},
ytick={0, 0.25, 0.5, 0.75, 1}, yticklabels={0, 1/4, 1/2, 3/4, 1}, thick, minor x tick num=0, minor y tick num=0, grid=both, xlabel=$\alpha$]
\addplot[samples=500,domain=0:1, color=blue,line width=1.8pt]{ (sin(2*pi*deg(x)))^2 };
\addlegendentry{$z = \phi(\alpha) = \sin^2 \left( 2\pi \alpha \right)$};
\end{axis}
\end{tikzpicture}
\begin{tikzpicture}[scale=0.96]
\begin{axis}[xmin=-0.03, xmax=1.03, ymin=-0.01, ymax=0.26, legend pos=north west, legend style={empty legend, fill=orange!10, at={(0.5,1.1)},anchor=north}, axis background/.style={fill=gray!10}, xtick={0, 0.25, 0.5, 0.75, 1}, xticklabels={0, 1/4, 1/2, 3/4, 1},
ytick={0,0.08333333,0.1666667,0.25}, yticklabels={0,1/12,1/6,1/4}, thick, minor x tick num=0, minor y tick num=0, grid=both, xlabel=$z$]
\addplot[samples=500,domain=0:1, color=orange,line width=1.8pt]{rad(asin(sqrt(x))) / (2*pi)};
\addlegendentry{$\phi^{-1} (z) = \left( \arcsin{\sqrt{z}} \, \right) / \, 2\pi $};
\end{axis}
\end{tikzpicture}
\caption{Illustrations of the homeomorphism~$\phi$ and of its inverse~$\phi^{-1}$. Note that a continuous function with a continuous inverse function, as is the case here, is called a homeomorphism.}
\label{fig:Phi}
\end{figure}
Let us assume that for a given~$k \in \mathbb{N}$ the value~$z_k$ of an iterate produced by the logistic map can be linked to some other variable~$\alpha_k$ through the following change of variable:
\begin{equation}
z_k \equiv \phi(\alpha_k) = \sin^2(2\pi \alpha_k)
\label{eq::definePhi}
\end{equation}
Obviously, we will soon try and identify~$\alpha_k$ as an iterate of the dyadic transformation~$\mathcal{D}$ defined in~eq.(\ref{eq:dyadic}). For now, let us continue by following the evolution of~$z_k$ as prescribed by the logistic map. Using the newly-introduced~$\phi$, the value of the next iterate~$z_{k+1} \equiv \mathcal{L}(z_k)$ can be expressed as:
\begin{equation*}
z_{k+1} \equiv 4 \phi(\alpha_k) \big( 1 - \phi(\alpha_k) \big) = 4 \sin^2(2\pi \alpha_k) \cos^2(2\pi \alpha_k) \quad \Longrightarrow \quad z_{k+1} = \sin^2 \left( 2\pi \, 2\alpha_k \right)
\end{equation*}
In order to make a connection, let us recognize that:
\begin{equation*}
\sin^2 \big( 2\pi \mathcal{D}(\alpha_k) \big) =
\begin{cases}
\sin^2 \left( 2\pi 2\alpha_k \right) & (\alpha_k < 1/2) \\
\sin^2 \big( 2\pi (2\alpha_k - 1) \big) = \sin^2 \left( 2\pi 2\alpha_k - 2\pi \right) = \sin^2 \left( 2\pi 2\alpha_k \right) & (\alpha_k > 1/2) \\
\end{cases}
\end{equation*}
due to the periodicity of~$\phi$ as one can see in~Figure~\ref{fig:Phi}. Plugging this result back into the expression for~$z_{k+1}$ and introducing~$\alpha_{k+1} \equiv \mathcal{D}(\alpha_k)$ leads to the following identity:
\begin{equation*}
z_{k+1} = \phi(\alpha_{k+1}) \quad \Leftrightarrow \quad \mathcal{L}(z_k) = \phi \big( \mathcal{D}(\alpha_k) \big)
\label{eq:topologicalConjugacy}
\end{equation*}
\noindent This result is nothing short of astonishing: it proves that although~$\mathcal{L}$ and~$\mathcal{D}$ superficially look like very different recurrence relations, it is more appropriate to think of them as topological twins that share identical dynamics. With~$\phi$ and its inverse~$\phi^{-1}$ acting as a bridges between the two topological spaces, this marvelous behavior known as topological conjugacy can be summarized pictorially via the commutative diagram:
\begin{center} \begin{tikzpicture}
\node[] (zks) at (0, 0) {};
\node[] (zk0) at (2, 0) {};
\node[] (zk) at (4, 0) {$z_k$ };
\node[] (zk1) at (7, 0) {$z_{k+1}$ };
\node[] (zk2) at (10, 0) {$z_{k+2}$ };
\node[] (zk3) at (12, 0) {};
\node[] (zkf) at (14, 0) {};
\node[] (tks) at (0, 2) {};
\node[] (tk0) at (2, 2) {};
\node[] (tk) at (4, 2) {$\alpha_k$ };
\node[] (tk1) at (7, 2) {$\alpha_{k+1}$ };
\node[] (tk2) at (10, 2) {$\alpha_{k+2}$ };
\node[] (tk3) at (12, 2) {};
\node[] (tkf) at (14, 2) {};
\draw[->] (zk.east) to (zk1.west) node[xshift=-1.0cm, yshift=0.0cm, above] {$\mathcal{L}$ };
\draw[->] (zk1.east) to (zk2.west) node[xshift=-1.0cm, yshift=0.0cm,above] {$\mathcal{L}$ };
\draw[dashed] (zk2.east) to (zk3.west) node[] {};
\draw[dashed] (zk0.east) to (zk.west) node[] {};
\draw[->] (tk.east) to (tk1.west) node[xshift=-1.0cm, yshift=0.0cm,below] {$\mathcal{D}$ };
\draw[->] (tk1.east) to (tk2.west) node[xshift=-1.0cm, yshift=0.0cm,below] {$\mathcal{D}$ };
\draw[dashed] (tk2.east) to (tk3.west) node[] {};
\draw[dashed] (tk0.east) to (tk.west) node[] {};
\draw[dashed] (tks.east) to (tk0.west) node[] {};
\draw[dashed] (zks.east) to (zk0.west) node[] {};
\draw[dashed] (zk3.east) to (zkf.west) node[] {};
\draw[dashed] (tk3.east) to (tkf.west) node[] {};
\draw[->, thick, orange] (zk0.west) to [bend left] (tk0.west) node[xshift=-0.3cm, yshift=-1.0cm,left] {$\phi^{-1}$ } ;
\draw[->, thick, blue] (tk0.east) to [bend left] (zk0.east) node[xshift=0.3cm, yshift=1.0cm,right] {$\phi$ } ;
\draw[->, thick, blue] (tk3.east) to [bend left] (zk3.east) node[xshift=0.3cm, yshift=1.0cm,right] {$\phi$ } ;
\draw[->, thick, orange] (zk3.west) to [bend left] (tk3.west) node[xshift=-0.3cm, yshift=-1.0cm,left] {$\phi^{-1}$ } ;
\end{tikzpicture} \end{center}
\noindent In other words, one may decide to study the dynamics of a variable according to the logistic map and never leave this representation but, all the while, think about how this dynamics is equivalently represented in some kind of ``parallel universe'' where variables evolve according to the dyadic transformation up to a compositional ``portal'' in the form of~$\phi$. \\
\noindent Now that we have this remarkable phenomenon in our hands, how can we exploit it to apply makeup to the basic encoding/decoding strategy presented in the previous section?
\subsection{Construction of the initial condition}
Instead of populating the initial condition with direct binary approximations to the components of the original dataset, let us start with a pre-processing step by applying~$\phi^{-1}$ to all of the samples of~$\mathcal{X}=[x_0,x_1,x_2,\cdots,x_n]$ before going into a fixed-point binary representation:
\begin{center} \begin{tikzpicture}
\node[] (x0) at (0, 0) {$x_0$ };
\node[] (x1) at (0, -1) {$x_1$ };
\node[] (xdots) at (0, -2) {$\vdots$ };
\node[] (xn) at (0, -3) {$x_n$ };
\node[] (y0) at (2, 0) {$\phi^{-1} (x_0) \approx $ };
\node[] (y1) at (2, -1) {$\phi^{-1} (x_1) \approx $ };
\node[] (yDots) at (2, -2) {$\vdots$ };
\node[] (yn) at (2, -3) {$\phi^{-1} (x_n) \approx $ };
\node[myboxFst] (a1) at (3.3, 0) { };
\node[myboxFst] (a2) at (3.8, 0) { };
\node[myboxFst] (a3) at (4.3, 0) { };
\node[myboxFst] (a4) at (4.8, 0) { };
\node[myboxFst] (a5) at (5.3, 0) { };
\node[myboxFst] (a6) at (5.8, 0) { };
\node[myboxFst] (a7) at (6.3, 0) { };
\node[myboxFst] (a8) at (6.8, 0) { };
\node[myboxSec] (b1) at (3.3, -1) { };
\node[myboxSec] (b2) at (3.8, -1) { };
\node[myboxSec] (b3) at (4.3, -1) { };
\node[myboxSec] (b4) at (4.8, -1) { };
\node[myboxSec] (b5) at (5.3, -1) { };
\node[myboxSec] (b6) at (5.8, -1) { };
\node[myboxSec] (b7) at (6.3, -1) { };
\node[myboxSec] (b8) at (6.8, -1) { };
\node[myboxLst] (c1) at (3.3, -3) { };
\node[myboxLst] (c2) at (3.8, -3) { };
\node[myboxLst] (c3) at (4.3, -3) { };
\node[myboxLst] (c4) at (4.8, -3) { };
\node[myboxLst] (c5) at (5.3, -3) { };
\node[myboxLst] (c6) at (5.8, -3) { };
\node[myboxLst] (c7) at (6.3, -3) { };
\node[myboxLst] (c8) at (6.8, -3) { };