Skip to content

Commit fd2a558

Browse files
committed
Merge branch 'feature/random-data-sample-problem' into develop
2 parents 442d19e + 1ea249e commit fd2a558

File tree

4 files changed

+58
-7
lines changed

4 files changed

+58
-7
lines changed

README.md

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -175,8 +175,10 @@ List<String> list = Lists.asList(boxedArray);
175175
- Enumerate prime numbers, EPI#5.9: [c++](/cpp-algorithm/src/array/enumerate_prime_number.h) | Enumerate prime numbers in the range.
176176
- Order elements in an array by even and odd: [c++](/cpp-algorithm/src/array/order_element.h)(`EvenOdd`) | Order even and odd numbers in the array.
177177
- Order elements in an array by specified order, EPI#5.8: [c++](/cpp-algorithm/src/array/order_element.h)(`Rearrange`) | Rearrange arrays to have a specific order.
178-
- Random data sampling - offline, EPI#5.12: [c++](/cpp-algorithm/src/array/random_data_sampling.h)(`OfflineRandomSampling`) | Randomly select $\textit{k}$ elements from the array.
179-
- Random data sampling - compute permutation, EPI#5.14: [c++](/cpp-algorithm/src/array/random_data_sampling.h)(`ComputeRandomPermutation`) | Compute permutation of the array generated by random sampling.
178+
- Random data sampling: Select the k elements randomly from the array with uniform probability.
179+
- offline, EPI#5.12: [c++](/cpp-algorithm/src/array/random_data_sampling.h)(`OfflineRandomSampling`) | Design an algorithm to return a random subset of $k$ elements from an array.
180+
- online, EPI#5.13: [c++](/cpp-algorithm/src/array/random_data_sampling.h)(`OnlineRandomSampling`) | Design an algorithm that reads data and creates a random subset of size $k$.
181+
- compute permutation, EPI#5.14: [c++](/cpp-algorithm/src/array/random_data_sampling.h)(`ComputeRandomPermutation`) | Compute permutation of the array generated by random sampling.
180182
- Replace elements
181183
- replace and remove: [c++](/cpp-algorithm/src/array/replace_element.h)(`ReplaceAndRemoveString1`, `ReplaceAndRemoveString2`) | Replace element and remove element in the array.
182184
- telex encoding: [c++](/cpp-algorithm/src/array/replace_element.h)(`TelexEncoding`) | Telex encoding for punctuation marks.

cpp-algorithm/src/array/random_data_sampling.cpp

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,37 @@ auto RandomDataSampling::OfflineRandomSampling(const int k, std::vector<int>& ar
1414
return std::vector<int>{arr.begin(), arr.begin() + k};
1515
}
1616

17+
auto RandomDataSampling::OnlineRandomSampling(std::vector<int>::const_iterator begin,
18+
const std::vector<int>::const_iterator end,
19+
const int k)
20+
-> std::vector<int>
21+
{
22+
std::vector<int> running_sample;
23+
// save the first k elements
24+
for (int i = 0; i < k; ++i)
25+
{
26+
running_sample.emplace_back(*begin++);
27+
}
28+
29+
std::default_random_engine seed((std::random_device())());
30+
int num_seen_so_far = k;
31+
while (begin != end)
32+
{
33+
int x = *begin++;
34+
++num_seen_so_far;
35+
36+
// generate a random number in [0, num_seen_so_far].
37+
// if the generated number exists in [0, k), replace the element with x.
38+
const int idx_to_replace = std::uniform_int_distribution<int>{0, num_seen_so_far - 1}(seed);
39+
if (idx_to_replace < k)
40+
{
41+
running_sample[idx_to_replace] = x;
42+
}
43+
}
44+
45+
return running_sample;
46+
}
47+
1748
auto RandomDataSampling::ComputeRandomPermutation(const int n) -> std::vector<int>
1849
{
1950
auto permutation = std::vector<int>(n);

cpp-algorithm/src/array/random_data_sampling.h

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,23 +6,25 @@
66
namespace RandomDataSampling
77
{
88
/**
9-
* \brief Randomly select k elements from the array.
9+
* \brief Select the k elements randomly from the array with uniform probability.
10+
* Let A be an array with n distinct elements. Design an algorithm to return a random subset of k elements from A, where every subset has an equal chance of being chosen.
1011
* \param k sample size
1112
* \param arr input array
1213
* \return result array
1314
*/
1415
auto OfflineRandomSampling(int k, std::vector<int>& arr) -> std::vector<int>;
1516

16-
// TODO: Implement OnlineRandomSampling
1717
/**
18-
* \brief Randomly select k elements from the array.
18+
* \brief Select the k elements randomly from the array with uniform probability.
19+
* Design an algorithm that reads data and creates a random subset of size k, where each item has an equal chance of being included.
1920
* \param begin begin iterator
2021
* \param end end iterator
2122
* \param k sample size
2223
* \return result array
2324
*/
24-
auto OnlineRandomSampling(const std::vector<int>::const_iterator& begin,
25-
const std::vector<int>::const_iterator& end, int k)
25+
auto OnlineRandomSampling(std::vector<int>::const_iterator begin,
26+
std::vector<int>::const_iterator end,
27+
int k)
2628
-> std::vector<int>;
2729

2830
/**

cpp-algorithm/src/array/test/random_data_sampling_test.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,22 @@ GTEST_TEST(RandomDataSampling, OfflineRandomSampling)
1919
EXPECT_EQ(k, static_cast<int>(result.size()));
2020
}
2121

22+
GTEST_TEST(RandomDataSampling, OnlineRandomSampling)
23+
{
24+
constexpr int k = 3;
25+
auto arr = std::vector<int>{3, 7, 5, 11};
26+
const auto result = RandomDataSampling::OnlineRandomSampling(arr.cbegin(), arr.cend(), k);
27+
28+
std::stringstream stream;
29+
for (const int& item : result)
30+
{
31+
stream << item << " ";
32+
}
33+
std::cout << stream.str() << std::endl;
34+
35+
EXPECT_EQ(k, static_cast<int>(result.size()));
36+
}
37+
2238
GTEST_TEST(RandomDataSampling, ComputeRandomPermutation)
2339
{
2440
constexpr int k = 3;

0 commit comments

Comments
 (0)