-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathnotes.tex
309 lines (290 loc) · 19.9 KB
/
notes.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
%!TEX root = main.tex
\section{Relative binning}
\subsection{Motivation}
In GW data analysis, we work with a complex inner product space $(\com^n, \brk[a]{\cdot,\cdot}_{\text{GW}})$ representing time-series signals, where for $h,g \in \com^n$, $\brk[a]{\cdot,\cdot}_{\text{GW}}:\com^n \times \com^n \to \com$ is a sesquilinear inner product defined by:
\begin{equation}\label{eq:gw-inner}
\brk[a]{h,g}_{\text{GW}} \defn 4 \sum_{i=1}^n \frac{\tilde{h}(f_i)^* \tilde{g}(f_i)}{S(f_i)} \Delta f,
\end{equation}
where $\Delta f = 1/T$, where $T$ is the duration of the signal, $\tilde{h} \defn \text{DFT}(h)$, and $S(f)$ is a function which weighs each contribution to the sum (called the \textit{power spectral density.})
Recall that a sesquilinear inner product satisfies the following three axioms:
\begin{itemize}
\item $\brk[a]{f,g} = \brk[a]{g,f}^*,$ \hfill{(conjugate symmetry)}
\item $\brk[a]{f, g + \alpha h} = \brk[a]{f,g} + \alpha \brk[a]{f,h}$, \hfill{(linearity in second argument)}
\item $\brk[a]{h,h} > 0$ for $h \neq 0_{\com^n}$. \hfill{(postive definiteness)}
% \item $\brk[a]{h,h} \gt 0$ and $\brk[a]{h,h} = 0 \iff h = 0_{\com^n}$. \hfill{(postive definiteness)}
\end{itemize}
\begin{remark}[]\label{}
We use the same linearity convention as in quantum-mechanics for consistancy.
\end{remark}
\begin{remark}[]\label{}
If one works immediately with frequency-domain waveforms, the definition is the same with the exception that the DFT is not applied.
\end{remark}
%
The likelihood is defined in terms of this inner product by
\begin{equation}\label{}
\like(\theta) \defn e^{-\frac{1}{2} \re \brk[a]1{h(\cdot ; \theta) - d(\cdot),h(\cdot ; \theta) - d(\cdot)}_{\text{GW}}},
\end{equation}
where $d \in \com^n$ is the detector strain, and $h$ is a waveform model parameterized by $\theta$ (e.g, IMRPhenomD).
Using the linearity of the inner product, we may show that\footnote{We use the notation $h(\cdot, \theta)$ to indicate a $\com^n$ vector paramerized by $\theta$.
In the following, we suppress the first slot which indexes the vector when convenient.}
\begin{align*}\label{}
-\ln \like (\theta) &= \frac{1}{2} \re \brk[a]1{h(\theta) - d,h(\theta) - d}_{\text{GW}} = \frac{1}{2}\overbrace{\brk[a]{h(\theta), h(\theta)}_{\text{GW}}}^{(a)} - \re \overbrace{\brk[a]{h(\theta), d}_{\text{GW}}}^{(b)} + \frac{1}{2}\overbrace{\brk[a]{d, d}_{\text{GW}}}^{\text(c)}.
\end{align*}
Hence, for every $\like(\theta)$ evaluation, we must evaluate $(a)$ and $(b)$, which require $\order(n)$ evaluations of the waveform.
Note that $(c)$ can be precomputed once and stored.
Relative binning aims to reduce the number of evaluations to $\order(m)$, where $m \ll n$, providing significant computational advantage.
% \subsection{Derivatives and Fisher matrix}
\subsection{The heterodyning strategy}
Heterodyning is a technique in signal processing which combines two high frequency signals to yield another signal with lower frequency.
If we can modify the integrand of \cref{eq:gw-inner} such that it turns from a highly oscillating function into a slowly oscillating one, then we may in principle use a coarser grid in the sum (yielding computational benefits) while accumulating a minimal amount of error.
Consider the following.
Suppose that $\theta_0$ is a fiducial set of parameters, and that we are interested in a signal with support $[\fmin, \fmax]$. Then we define the \textit{heterodyned signal} as
\begin{align*}
r(f;\theta) &\defn \frac{h(f;\theta)}{h(f; \theta_0)} \\
&= \frac{A}{A_0} e^{i \overbrace{\brk[r]1{\psi(f;\theta) - \psi(f;\theta_0)}}^{\defn \Psi(f;\theta)}},
\end{align*}
where $\Psi(f;\theta)$ is referred to as the \textit{differential phase}.
This expression is convenient since it decouples oscillations in the envelope $A/A_0$ and broad oscillations controlled by the phasor.
Indeed, this function lives up to its name if both the envelope and differential phase are ``slowly varying" functions with respect to $f$.
\begin{example}[TaylorF2]\label{}
In the simple case of TaylorF2, the envelope $A/A_0$ is not a function of frequency. On the other hand, it may be shown that the differential phase evolves as a polynomial. Hence, we expect the oscillatory behavior of $r$ to be much milder than that of the original strain $h$.
\end{example}
\begin{remark}[]\label{}
Since $r$ is parameterized by $\theta$, this slow variation condition must hold for a reasonably large subset of parameter space $\Theta$ in order to be useful.
However, analytically verifying this condition is inconvenient as it depends on the particular waveform family used to generate $h$.
This assumption is therefore usually justified in retrospect by numerical experiements.
\end{remark}
In \cref{sec:bin-algorithm}, we will describe a scheme to construct a substantially sparser grid (compared to the original one) to automatically resolve $r$.
Note that a real slowly varying function can be efficiently approximated with a linear spline.
This idea generalizes to the complex case in a straightforward manner.
Namely, a linear spline of a complex function is defined as the linear spline of the real and complex parts of the function individually.
We use the following convention
% More complex version suggested in the paper
% \begin{definition}[]\label{}
% Let $r:[\fmin,\fmax] \to \reals$, and consider the cover $[\fmin, \fmax] = \cup_{i=1}^{m-1}[f_-^{(i)}, f_+^{(i)}) \cup [f_-^{(m)}, f^{(m)}_+]$, where $f_+^{(i)}=f_-^{(i+1)}$ for every $i \in \brk[c]{1, \ldots, m-1}$, with $f_-^{(1)}=\fmin$ and $ f_+^{(m)}=\fmax$. Then the \textit{linear spline approximation} of $r$ is given by
% \begin{equation}\label{}
% r(f; \theta) \approx r_0^{(b)}(\theta) + r_1^{(b)}(\theta)(f-\bar{f}^{(b)}), \; \; f \in \text{bin} \; b
% \end{equation}
% where
% \begin{align*}
% r_0^{(b)}(\theta) &= \frac{r(f_+^{(b)};\theta) + r(f_-^{(b)}; \theta)}{2}, &
% r_1^{(b)}(\theta) &= \frac{r(f_+^{(b)};\theta) - r(f_-^{(b)}; \theta)}{f_+^{(b)} - f_-^{(b)}}, &
% \bar{f}^{(b)} &\defn \frac{f^{(b)}_+ + f^{(b)}_-}{2}.
% \end{align*}
% \end{definition}
% Simpler version proposed by me
\begin{definition}[]\label{}
Let $r:[\fmin,\fmax] \to \com$, and consider the cover $[\fmin, \fmax] = \cup_{i=1}^{m-1}[f_-^{(i)}, f_+^{(i)}) \cup [f_-^{(m)}, f^{(m)}_+]$, where $f_+^{(i)}=f_-^{(i+1)}$ for every $i \in \brk[c]{1, \ldots, m-1}$, with $f_-^{(1)}=\fmin$ and $ f_+^{(m)}=\fmax$. Then the \textit{linear spline approximation} of $r$ is given by
\begin{equation}\label{}
r(f; \theta) \approx r_0^{(b)}(\theta) + r_1^{(b)}(\theta)(f-f_-^{(b)}), \; \; f \in \text{bin} \; b
\end{equation}
where
\begin{align*}
r_0^{(b)}(\theta) &\defn r(f_-^{(b)};\theta) , &
r_1^{(b)}(\theta) &\defn \frac{r(f_+^{(b)};\theta) - r(f_-^{(b)}; \theta)}{f_+^{(b)} - f_-^{(b)}}, &
% \bar{f}^{(b)} &\defn \frac{f^{(b)}_+ + f^{(b)}_-}{2}.
\end{align*}
\end{definition}
This yields the following result
%
\begin{proposition}[]\label{}
Let $r(f;\theta)$ be the heterodyne function with fiducial parameters $\theta_0$.
Then under a first order spline, the following approximations hold
\begin{align*}
\brk[a]{h(\cdot, \theta), d(\cdot)} &\approx \sum_b \brk[s]1{A_0^{(b)} r_0^{(b)}(\theta)^* + A_1^{(b)} r_1^{(b)}(\theta)^*}, \\
% \brk[a]{d(\cdot), h(\cdot, \theta)} &\approx \sum_b \brk[s]1{A_0^{(b)} r_0^{(b)}(\theta)^* + A_1^{(b)} r_1^{(b)}(\theta)^*}, \\
\brk[a]{h(\cdot,\theta), h(\cdot,\theta)} &\approx \sum_b \brk[s]2{B_0^{(b)} \abs{r_0^{(b)}(\theta)}^2 + 2 B_1^{(b)} \re \brk[r]1{r_0^{(b)}(\theta)^* r_1^{(b)}(\theta)}},
\end{align*}
where for every bin $b$, the coefficients $A_0^{(b)}, A_1^{(b)} \in \com$ and $B_0^{(b)}, B_1^{(b)} \in \reals$ are defined by
\begin{align*}
A_0^{(b)} &\defn 4 \sum_{i: f_i \in \text{bin} \; b} \frac{ h(f_i;\theta_0)^* d(f_i) }{S(f_i)} \Delta f, & A_1^{(b)} &= 4 \sum_{i:f_i \in \text{bin} \; b} \frac{ h(f_i;\theta_0)^*d(f_i) (f_i - f_-^{(b)})}{S(f_i)} \Delta f,
\end{align*}
%
\begin{align*}
B_0^{(b)} &\defn 4 \sum_{i:f_i \in \text{bin} \; b} \frac{\abs{h(f_i;\theta_0)}^2}{S(f_i)}\Delta f, & B_1^{(b)} &\defn 4 \sum_{i:f_i \in \text{bin} \: b} \frac{\abs{h(f_i;\theta_0)}^2(f_i-f_-^{(b)})}{S(f_i)}\Delta f.
\end{align*}
\end{proposition}
\begin{proof}
(i) We have
\begin{align*}
\brk[a]{h(\cdot, \theta), d(\cdot)} &= \brk[a]{h(\cdot,\theta_0) r(\cdot,\theta), d(\cdot)}\\
&\defn 4 \sum_{b} \sum_{i:f_i \in \text{bin} \; b} \frac{h(f_i;\theta_0)^* r(f_i;\theta)^* d(f_i) }{S(f_i)}\Delta f \\
&\approx 4 \sum_{b} \sum_{i:f_i \in \text{bin} \; b} \frac{h(f_i;\theta_0)^* d(f_i) }{S(f_i)} \brk[s]!{r_0^{(b)}(\theta)^* + r_1^{(b)}(\theta)^*(f_i - f_-^{(b)})} \Delta f\\
&= \sum_b \brk[s]!{A_0^{(b)} r_0^{(b)}(\theta)^* + A_1^{(b)} r_1^{(b)}(\theta)^*}.
\end{align*}
(ii) On the other hand, we have
\begin{align*}
\brk[a]{h(\cdot,\theta), h(\cdot,\theta)} &= \brk[a]{h(\cdot,\theta_0) r(\cdot,\theta), h(\cdot,\theta_0) r(\cdot,\theta)}\\
% &= 4 \sum_{i=1}^n \frac{\abs{h(f_i;\theta)}^2}{S(f_i)} \Delta f \\
% &= 4 \sum_{i=1}^n \frac{\abs{h(f_i;\theta_0)r(f_i;\theta) }^2}{S(f_i)}\Delta f \\
&\approx 4 \sum_b \sum_{i:f_i \in \text{bin} \; b} \frac{\abs{h(f_i;\theta_0)}^2 \abs1{r_0^{(b)}(\theta) + r_1^{(b)}(\theta) (f_i-f_-^{(b)})}^2}{S(f_i)}\Delta f \\
&= 4 \sum_b \sum_{i:f_i \in \text{bin} \; b}\frac{\abs{h(f_i;\theta_0)}^2 \brk[r]2{\abs{r_0^{(b)}(\theta)}^2 + \abs{r_1^{(b)}(\theta)(f-f_-^{(b)})}^2 + 2(f_i-f_-^{(b)}) \re \brk[r]1{r_0^{(b)}(\theta)^*r_1^{(b)}(\theta)}}}{S(f_i)}\Delta f \\
&\approx \sum_b \brk[s]2{B_0^{(b)} \abs{r_0^{(b)}(\theta)}^2 + 2B_1^{(b)} \re \brk[r]1{r_0^{(b)}(\theta)^* r_1^{(b)}(\theta)}},
\end{align*}
where in the second to last line we've used the fact that for $z_1, z_2 \in \com$, $\abs{z_1+z_2}^2 = \abs{z_1}^2 + \abs{z_2}^2 + 2 \re \brk[r]{z_1^* z_2}$, and in the last line we've ignored terms of $\order(\abs{f-f_-^{(b)}}^2)$.
\end{proof}
The computation savings appear if we can argue that a subgrid with $b \ll n$ may be chosen which does not generate significant error.
\subsection{Heterodyning the gradient and Fisher}
We may also investigate whether heterodyning produces accurate gradient and Fisher matrix evaluations.
\subsubsection{Review}
The first derivative of the potential is given by
\begin{align*}
- \ln \like_{,i}(\theta) &= \re \brk[a]{h_{,i}(\cdot, \theta), h(\cdot, \theta)} - \re \brk[a]{h_{,i}(\cdot, \theta),d(\cdot)} \pushright{\text{(heterodyne)}} \\
&= \re \brk[a]{h_{,i}(\cdot, \theta), h(\cdot, \theta)-d(\cdot)}, \pushright{\text(standard)}
\end{align*}
and the second derivative of the potential is approximately
\begin{align*}
- \ln \like_{,ij}(\theta) &= \re \brk[a]{h_{,ij}(\cdot, \theta), h(\cdot,\theta) - d(\cdot)} + \re \brk[a]{h_{,i}(\cdot,\theta), h_{,j}(\cdot, \theta)} \\
&\approx \re \brk[a]{h_{,i}(\cdot,\theta), h_{,j}(\cdot, \theta)} \\
&:= \Gamma_{ij}(\theta),
\end{align*}
where we call the matrix field $\Gamma_{ij}(\theta)$ the \textit{Fisher information matrix}.
\begin{remark}[]\label{}
In certain literature, $\Gamma_{ij}(\theta^*)$ is called the Fisher information, where $\theta^*$ is the MAP point. Here, we prefer defining it as a matrix field.
\end{remark}
% \begin{equation}\label{eq:fisher-mat}
% \defn \re \brk[a]{h_{,i}(\cdot,\theta), h_{,j}(\cdot, \theta)}
% \end{equation}
From the previous derivation, we see that the Fisher matrix acts as a surrogate for the Hessian of the potential.
In fact, we may use it as a replacement in Newton based descent procedures due to the following
\begin{proposition}[]\label{}
The Fisher information matrix $\Gamma$ is positive-semi definite.
\end{proposition}
\begin{proof}
Let $h(f;\theta) = h_1(f;\theta) + i h_2(f;\theta)$. Then by definition, we have
\begin{align*}
\Gamma_{ij}(\theta) &\defn \re \brk[a]{h_{,i}(\cdot,\theta), h_{,j}(\cdot, \theta)} \\
&= 4 \re \sum_i \frac{h_{,i}(f;\theta)^* h_{,j}(f;\theta)}{S(f_i)} \Delta f \\
&= 4 \re \sum_i \frac{(h_{1,i} - ih_{2,i})(h_{1,j} + ih_{2,j})}{S(f_i)} \Delta f \\
&= 4 \re \sum_i \frac{h_{1,i} h_{1,j} + h_{2,i} h_{2,j} + i(h_{1,i} h_{2,j} - h_{2,i} h_{1,j})}{S(f_i)} \Delta f \\
&= 4 \sum_i \frac{h_{1,i} h_{1,j} + h_{2,i} h_{2,j}}{S(f_i)} \Delta f.
\end{align*}
Since $S>0$ and both matricies in the numerator as PSD, so too is the sum.
\end{proof}
\subsubsection{Heterodyne for derivatives}
Recall that $h(f; \theta) = A(f; \theta) e^{i \psi(f;\theta)}$, and observe that
\begin{align*}
r_{,j}(f;\theta) &\defn \frac{h_{,j}(f;\theta)}{h(f;\theta_0)} \\
&= \frac{A_{,j}(f;\theta) e^{i \psi(f;\theta)} + i \psi_{,j}(f;\theta) A(f;\theta) e^{i \psi(f;\theta)}}{A(f;\theta_0) e^{i \psi(f;\theta_0)}} \\
&= \frac{A_{,j}(f;\theta)}{A(f;\theta_0)} e^{i \brk[r]{\psi(f;\theta) - \psi(f;\theta_0)}} + \frac{\psi_{,j}(f;\theta)A(f;\theta)}{A(f;\theta_0)}e^{i \brk[r]{\psi(f;\theta) - \psi(f;\theta_0) + \frac{\pi}{2}}}. \\
% &= \alpha_j(f;\theta) + i \beta_j(f;\theta).
\end{align*}
At this point, one must take care to ensure that the envolopes of both phasors are slowly varying functions of $f$. For the moment we take this for granted, and intend to demonstrate the veracity of this assumption via numerical experimentation.
We now have the following result analogous to the zeroth order case
\begin{proposition}[]\label{}
The terms we need to calculate for the derivative and Fisher may be approximated as follows:
\begin{align*}
\brk[a]{h_{,j}(\theta),d} &\approx \sum_b \brk[s]!{A_0^{(b)} r_{0,j}^{(b)} (\theta)^* + A_1^{(b)} r_{1,j}^{(b)}(\theta)^*}, \\
\brk[a]{h_{,j}(\theta), h(\theta)} &\approx \sum_b \brk[c]2{B_0^{(b)} r_{0,j}^{(b)}(\theta)^* r_0^{(b)}(\theta) + B_1^{(b)} \brk[s]!{r_{0,j}^{(b)}(\theta)^* r_1^{(b)}(\theta) + r_{1,j}^{(b)}(\theta)^* r_0^{(b)}(\theta)}}, \\
\brk[a]{h_{,j}(\theta), h_{,k}(\theta)} &\approx \sum_b \brk[c]2{B_0^{(b)} r_{0,j}^{(b)}(\theta)^* r_{0,k}^{(b)}(\theta) + B_1^{(b)} \brk[s]!{r_{0,j}^{(b)}(\theta)^* r_{1,k}^{(b)}(\theta) + r_{1,j}^{(b)}(\theta)^* r_{0,k}^{(b)}(\theta)}}.
\end{align*}
\end{proposition}
\begin{proof}
(i) Follows immediately from the previous result:
\begin{align*}
\brk[a]{h_{,j}(\theta), d} &= \brk[a]{h(\theta), d}_{,j}
\approx \sum_b \brk[s]1{A_0^{(b)} r_{0,j}^{(b)}(\theta)^* + A_1^{(b)} r_{1,j}^{(b)}(\theta)^*}.
\end{align*}
(ii) Similarly,
\begin{align*}
\brk[a]{h_{,j}(\theta), h(\theta)} &= \brk[a]{h(\cdot,\theta_0) r_{,j}(\cdot,\theta), h(\cdot,\theta_0) r(\cdot,\theta)}\\
% &= 4 \sum_{i=1}^n \frac{h_{,j}(f_i;\theta)^* h(f;\theta)}{S(f_i)} \Delta f \\
% &= 4 \sum_{i=1}^n \frac{r_{,j}(f_i;\theta)^* h(f_i;\theta_0)^* r(f_i;\theta) h(f_i;\theta_0)}{S(f_i)} \Delta f \\
% &= 4 \sum_{i=1}^n \frac{\abs{h(f_i;\theta_0)}^2}{S(f_i)} r_{,j}(f_i;\theta)^* r(f_i;\theta) \Delta f\\
&\approx 4 \sum_b \sum_{i:f_i \in \text{bin } b}\frac{\abs{h(f_i;\theta_0)}^2}{S(f_i)}\Delta f \brk[s]!{r_{0,j}^{(b)}(\theta) + r_{1,j}^{(b)}(f_i-f_-^{(b)})}^* \brk[s]!{r_{0}^{(b)}(\theta) + r_{1}^{(b)}(f_i-f_-^{(b)})} \\
&\approx \sum_b \brk[c]2{B_0^{(b)} r_{0,j}^{(b)}(\theta)^* r_0^{(b)}(\theta) + B_1^{(b)} \brk[s]!{r_{0,j}^{(b)}(\theta)^* r_1^{(b)}(\theta) + r_{1,j}^{(b)}(\theta)^* r_0^{(b)}(\theta)}},
\end{align*}
where in the last line we've ignored terms of $\order(\abs{f_i - f_-^{(b)}}^2)$.
(iii) Follows a similar proof to (ii). Alternatively, one may read off the answer by replacing $r_0^{(b)}, r_1^{(b)} \leftarrow r_{0,k}^{(b)}, r_{1,k}^{(b)}$ in the rightmost square brackets of the second to last line.
\end{proof}
\section{Bin selection algorithm}\label{sec:bin-algorithm}
\begin{definition}[]\label{}
Let $h(f;\theta)$ be a complex valued signal. Then the \textit{post-Newtonian ansatz} asserts that $\psi(f)\defn\text{arg}(h(f))$ takes the form
\begin{equation}\label{}
\psi(f;\theta) \defn \sum_i \alpha_i(\theta) f^{\gamma_i},
\end{equation}
where $\gamma \defn (-5/3, -2/3, 1, 5/3, 7/3)$.
\end{definition}
We now have that
\begin{align*}
\Psi(f;\theta) &\defn \psi(f;\theta) - \psi(f; \theta_0) \\
&= \sum_i \brk[r]1{\alpha_i (\theta) - \alpha_i(\theta_0)} f^{\gamma_i} \\
&= \sum_i \delta \alpha_i (\theta) f^{\gamma_i}
\end{align*}
The heterodyne approximation is valid if $\Psi$ varies slowly within a bin, which translates to a constraint on the $\delta \alpha_i$ terms. This motivates the following definition:
\begin{proposition}[]\label{}
Suppose that the parameters $\theta$ are restricted to
\begin{equation}\label{eq:bound-params}
\Theta \defn \brk[c]1{\theta \in \chi : \forall i \; \abs{\delta \alpha_i(\theta)} \le 2 \pi \chi f_{*,i}^{-\gamma_i}},
\end{equation}
where
\begin{equation}\label{}
f_{*,i} \defn
\begin{cases}
f_{\text{max}} & \text{if } \gamma_i > 0 \\
f_{\text{min}} & \text{else}.
\end{cases}
\end{equation}
Then the differential phase change over the interval $[f_-, f_+]$ obeys the following bound:
%change in bin $b \in \brk[c]{1, 2, \ldots, m}$ is bounded by
\begin{equation}\label{}
\abs{\Psi(f_+;\theta) - \Psi(f_-;\theta)} \le 2 \pi \chi \sum_i \abs2{\brk[r]2{\frac{f_+}{f_{*,i}}}^{\gamma_i} - \brk[r]2{\frac{f_-}{f_{*,i}}}^{\gamma_i}}.
\end{equation}
\end{proposition}
% \tag*{\llap{(triangle inequality)}}
\begin{proof}
\begin{align*}
\abs{\Psi(f_+;\theta) - \Psi(f_-;\theta)} &= \abs2{\sum_i \delta \alpha_i(\theta) \brk[s]1{f_+^{\gamma_i} - f_-^{\gamma_i}}} \\
&\le \sum_i \abs{\delta \alpha_i(\theta)} \abs{f_+^{\gamma_i} - f_-^{\gamma_i}} \pushright{(\text{triangle inequality})} \\
&\le 2 \pi \chi \sum_i \abs2{\brk[r]2{\frac{f_+}{f_{*,i}}}^{\gamma_i} - \brk[r]2{\frac{f_-}{f_{*,i}}}^{\gamma_i}}.
\end{align*}
\end{proof}
% TESTING
% \begin{align*}
% r(f;\theta) &= \frac{h(f;\theta)}{h(f; \theta_0)} \\
% &= \frac{A}{A_0} e^{i \left(\Psi(f;\theta) - \Psi(f;\theta_0)\right)}.
% \end{align*}
%
% \begin{proposition}[]\label{}
% Let $r(f;\theta)$ be the heterodyne function with fiducial parameters $\theta_0$.
% Then under a first order spline, the following approximations hold:
% \begin{align*}
% \left[h(\cdot, \theta), d(\cdot)\right] &\approx \sum_b \left[A_0^{(b)} r_0^{(b)}(\theta)^* + A_1^{(b)} r_1^{(b)}(\theta)^*\right], \\
% % \left[d(\cdot), h(\cdot, \theta)\right] &\approx \sum_b \left[A_0^{(b)} r_0^{(b)}(\theta)^* + A_1^{(b)} r_1^{(b)}(\theta)^*\right], \\
% \left[h(\cdot,\theta), h(\cdot,\theta)\right] &\approx \sum_b \left[B_0^{(b)} \left|r_0^{(b)}(\theta)\right|^2 + 2 B_1^{(b)} \re \left(r_0^{(b)}(\theta)^* r_1^{(b)}(\theta)\right)\right],
% \end{align*}
% where for every bin $b$, the coefficients $A_0^{(b)}, A_1^{(b)} \in \mathbb{C}$ and $B_0^{(b)}, B_1^{(b)} \in \mathbb{R}$ are defined by
% \begin{align*}
% A_0^{(b)} &\defn 4 \sum_{i: f_i \in \text{bin} \; b} \frac{ h(f_i;\theta_0)^* d(f_i) }{S(f_i)} \Delta f, & A_1^{(b)} &= 4 \sum_{i:f_i \in \text{bin} \; b} \frac{ h(f_i;\theta_0)^*d(f_i) (f_i - f_-^{(b)})}{S(f_i)} \Delta f,
% \end{align*}
% \end{proposition}
% Suppose that the parameters $\theta$ are restricted to
% \begin{equation}\label{eq:bound-params}
% \Theta \defn \brk[c]1{\theta \in \chi : \forall i \; \abs{\delta \alpha_i(\theta)} \le 2 \pi \chi f_{*,i}^{-\gamma_i}},
% \end{equation}
% where
% \begin{equation}\label{}
% f_{*,i} \defn
% \begin{cases}
% f_+ & \text{if } \gamma_i > 0 \\
% f_- & \text{else}.
% \end{cases}
% \end{equation}
% Then the differential phase change over the interval $[f_-, f_+]$ is bounded by
% %change in bin $b \in \brk[c]{1, 2, \ldots, m}$ is bounded by
% \begin{equation}\label{}
% \abs{\Psi(f_+;\theta) - \Psi(f_-;\theta)} \le 2 \pi \chi \sum_i \brk[s]2{1 - \brk[r]2{\frac{f_-}{f_+}}^{\abs{\gamma_i}}}.
% \end{equation}
% \end{proposition}
% % \tag*{\llap{(triangle inequality)}}
% \begin{proof}
% \begin{align*}
% \abs{\Psi(f_+;\theta) - \Psi(f_-;\theta)} &= \abs2{\sum_i \delta \alpha_i(\theta) \brk[s]1{f_+^{\gamma_i} - f_-^{\gamma_i}}} \\
% &\le \sum_i \abs{\delta \alpha_i(\theta)} \abs{f_+^{\gamma_i} - f_-^{\gamma_i}} \pushright{(\text{triangle inequality})} \\
% &\le 2 \pi \chi \sum_i \abs2{\brk[r]2{\frac{f_+}{f_{*,i}}}^{\gamma_i} - \brk[r]2{\frac{f_-}{f_{*,i}}}^{\gamma_i}} \\
% &= 2 \pi \chi \brk[s]3{\sum_{i:\gamma_i>0} \abs2{1 - \brk[r]2{\frac{f_-}{f_+}}^{\gamma_i}} + \sum_{i:\gamma_i<0} \abs2{\brk[r]2{\frac{f_+}{f_-}}^{\gamma_i} - 1}} \\
% &= 2 \pi \chi \brk[s]3{\sum_{i:\gamma_i>0} \abs2{1 - \brk[r]2{\frac{f_-}{f_+}}^{\abs{\gamma_i}}} + \sum_{i:\gamma_i<0} \abs2{1-\brk[r]2{\frac{f_-}{f_+}}^{\abs{\gamma_i}}}} \\
% &= 2 \pi \chi \sum_i \abs2{1 - \brk[r]2{\frac{f_-}{f_+}}^{\abs{\gamma_i}}} \\
% &= 2 \pi \chi \sum_i \brk[s]2{1-\brk[r]2{\frac{f_-}{f_+}}^{\abs{\gamma_i}}}.
% \end{align*}
% \end{proof}