Skip to content

Commit 23c0cb4

Browse files
authored
Merge pull request #9 from HydraRadio/pool-fix
Fix spurious correlations between FG model amplitudes at different times
2 parents cbf1c42 + 91b0b68 commit 23c0cb4

File tree

1 file changed

+25
-2
lines changed

1 file changed

+25
-2
lines changed

hydra_pspec/pspec.py

+25-2
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from scipy.stats import invgamma
66
from scipy.optimize import minimize, Bounds
77

8-
from multiprocess import Pool
8+
from multiprocess import Pool, current_process
99
from . import utils
1010
import os, time
1111

@@ -86,12 +86,16 @@ def sprior(signals, bins, factor):
8686

8787

8888
def gcr_fgmodes_1d(
89-
vis, w, matrices, fgmodes, f0=None, map_estimate=False, verbose=False
89+
idx, vis, w, matrices, fgmodes, f0=None, map_estimate=False, verbose=False,
90+
multiprocess_seed=912983
9091
):
9192
"""
9293
Perform the GCR step on a single time sample.
9394
9495
Parameters:
96+
idx (int):
97+
Time index. Used to generate a unique random seed for each process
98+
if using `multiprocess.Pool` and multiple processes.
9599
vis (array_like):
96100
Array of complex visibilities for a single baseline, of shape
97101
`(Ntimes, Nfreqs)`.
@@ -109,8 +113,26 @@ def gcr_fgmodes_1d(
109113
Provide the maximum a posteriori sample.
110114
verbose (bool):
111115
If True, output basic timing stats about each iteration.
116+
multiprocess_seed (int):
117+
Reference random seed used for all processes and time indices.
118+
Used to generate a unique random seed for each spawned process and
119+
each time index. Defaults to 912983.
112120
113121
"""
122+
# If multiple process are spawned via `multiprocess.Pool`, each process
123+
# inherits the random seed of the parent process. We need to set a unique
124+
# seed per process to avoid spurious correlations between GCRs at different
125+
# time indices. We can do so using the process ID (PID, unique per
126+
# process) and time index (unique for each time). For fewer than 1000
127+
# processes, we can guarantee a unique random seed by summing the
128+
# multiprocess_seed (a reference seed which is fixed for all processes and
129+
# times), the PID*1000, and the time index.
130+
# WARNING: if more than 1000 processes is every used this sum will not
131+
# guarantee a unique seed for each process!
132+
pid = current_process().pid
133+
seed = multiprocess_seed + pid*1000 + idx
134+
np.random.seed(seed)
135+
114136
Nfreqs, Nmodes = fgmodes.shape
115137
d = vis.reshape((1, max(Nfreqs, len(vis.T))))
116138

@@ -201,6 +223,7 @@ def gcr_fgmodes(
201223
with Pool(nproc) as pool:
202224
samples, residuals, info = zip(*pool.map(
203225
lambda idx: gcr_fgmodes_1d(
226+
idx=idx,
204227
vis=vis[idx],
205228
w=w,
206229
matrices=matrices,

0 commit comments

Comments
 (0)