|
|
|
调用签名
|
|
|
template< std::input_iterator I, std::sentinel_for<I> S,
std::weakly_incrementable O, class Gen >
requires (std::forward_iterator<I> or
std::random_access_iterator<O>) &&
std::indirectly_copyable<I, O> &&
std::uniform_random_bit_generator<std::remove_reference_t<Gen>>
O sample( I first, S last, O out, std::iter_difference_t<I> n, Gen&& gen );
|
(1) |
(C++20 起) |
template< ranges::input_range R, std::weakly_incrementable O, class Gen >
requires (ranges::forward_range<R> || std::random_access_iterator<O>) &&
std::indirectly_copyable<ranges::iterator_t<R>, O> &&
std::uniform_random_bit_generator<std::remove_reference_t<Gen>>
O sample( R&& r, O
out, ranges::range_difference_t<R> n, Gen&& gen
);
|
(2) |
(C++20 起) |
|
|
|
1) 从序列 [first, last)
选择 M = min(n, last - first) 个元素(无替代)使得每个可能的样本拥有相等的出现概率,并将那些选择的元素写到始于
out
的范围中。
仅若 I
实现 std::forward_iterator 算法才稳定(保持被选择元素的相对顺序)。
若 out
在 [first, last)
中则行为未定义。
2) 同 (1) ,但以 r
为源范围,如同以 ranges::begin(r) 为 first
并以 ranges::end(r) 为 last
。
此页面上描述的仿函数实体是 niebloid,即:
实际上,它们能以函数对象,或者某些特殊编译器扩展实现。
参数
first1, last1
|
-
|
从中采样的范围(总体)
|
r
|
-
|
从中采样的范围(总体)
|
out
|
-
|
用以写入样本的输出迭代器
|
n
|
-
|
要抽取的样本数
|
gen
|
-
|
用作随机性源的随机数生成器
|
返回值
等于 out + M
的迭代器,即结果采样范围的末尾。
复杂度
线性: 𝓞(last - first)
。
注解
此函数可能实现选择抽样或蓄水池抽样。
可能的实现
struct sample_fn {
template<std::input_iterator I, std::sentinel_for<I> S,
std::weakly_incrementable O, class Gen>
requires (std::forward_iterator<I> or
std::random_access_iterator<O>) &&
std::indirectly_copyable<I, O> &&
std::uniform_random_bit_generator<std::remove_reference_t<Gen>>
O operator()( I first, S last, O out, std::iter_difference_t<I> n, Gen&& gen ) const {
using diff_t = std::iter_difference_t<I>;
using distrib_t = std::uniform_int_distribution<diff_t>;
using param_t = typename distrib_t::param_type;
distrib_t D{};
if constexpr (std::forward_iterator<I>) {
// 此分支保持样本元素“稳定性”
auto rest {ranges::distance(first, last)};
for (n = ranges::min(n, rest); n != 0; ++first) {
if (D(gen, param_t(0, --rest)) < n) {
*out++ = *first;
--n;
}
}
return out;
} else { // D 为 random_access_iterator
diff_t sample_size{};
// 复制 [first, first + M) 元素到“随机访问”输出
for (; first != last && sample_size != n; ++first) {
out[sample_size++] = *first;
}
// 以随机选择的值重写某些复制的元素
for (auto pop_size {sample_size}; first != last; ++first, ++pop_size) {
const auto i {D(gen, param_t{0, pop_size})};
if (i < n) out[i] = *first;
}
return out + sample_size;
}
}
template<ranges::input_range R, std::weakly_incrementable O, class Gen>
requires (ranges::forward_range<R> or std::random_access_iterator<O>) &&
std::indirectly_copyable<ranges::iterator_t<R>, O> &&
std::uniform_random_bit_generator<std::remove_reference_t<Gen>>
O operator()( R&& r, O out, ranges::range_difference_t<R> n, Gen&& gen ) const {
return (*this)(ranges::begin(r), ranges::end(r), std::move(out), n,
std::forward<Gen>(gen));
}
};
inline constexpr sample_fn sample{};
|
示例
#include <algorithm>
#include <iomanip>
#include <iostream>
#include <iterator>
#include <random>
#include <vector>
void print(auto const& rem, auto const& v) {
std::cout << rem << " = [" << std::size(v) << "] { ";
for (auto const& e : v) {
std::cout << e << ' ';
}
std::cout << "}\n";
}
int main()
{
const auto in = {1, 2, 3, 4, 5, 6};
print("in", in);
std::vector<int> out;
const int max = in.size() + 2;
auto gen = std::mt19937{std::random_device{}()};
for (int n{}; n != max; ++n) {
out.clear();
std::ranges::sample(in, std::back_inserter(out), n, gen);
std::cout << "n = " << n;
print(", out", out);
}
}
可能的输出:
in = [6] { 1 2 3 4 5 6 }
n = 0, out = [0] { }
n = 1, out = [1] { 5 }
n = 2, out = [2] { 4 5 }
n = 3, out = [3] { 2 3 5 }
n = 4, out = [4] { 2 4 5 6 }
n = 5, out = [5] { 1 2 3 5 6 }
n = 6, out = [6] { 1 2 3 4 5 6 }
n = 7, out = [6] { 1 2 3 4 5 6 }
参阅
|
随机重排范围中的元素 (niebloid) |
|
从一个序列中随机选择 n 个元素 (函数模板) |