This patch optimizes how `for_each` iterates over trees by using recursion and storing pointers to the next nodes on the stack. This avoids pointer chasing through the `__parent_` pointer, reducing cache misses. It also makes use of the compiler being able tail-call optimize the recursive function, removing back-tracking the iterators have to do. ``` Benchmark old new Difference % Difference -------------------------------------------------- -------------- -------------- ------------ -------------- rng::for_each(map<int>)/32 35.19 26.67 -8.52 -24.21% rng::for_each(map<int>)/50 64.13 40.68 -23.45 -36.57% rng::for_each(map<int>)/8 5.06 6.49 1.43 28.21% rng::for_each(map<int>)/8192 22893.89 9266.68 -13627.21 -59.52% rng::for_each(map<int>::iterator)/32 35.51 26.88 -8.63 -24.31% rng::for_each(map<int>::iterator)/50 64.39 41.24 -23.15 -35.95% rng::for_each(map<int>::iterator)/8 5.12 5.93 0.81 15.80% rng::for_each(map<int>::iterator)/8192 21283.14 9736.83 -11546.31 -54.25% rng::for_each(multimap<int>)/32 35.22 26.61 -8.61 -24.45% rng::for_each(multimap<int>)/50 64.10 40.07 -24.03 -37.49% rng::for_each(multimap<int>)/8 5.08 6.69 1.61 31.70% rng::for_each(multimap<int>)/8192 23130.44 9026.16 -14104.28 -60.98% rng::for_each(multimap<int>::iterator)/32 35.40 25.08 -10.32 -29.15% rng::for_each(multimap<int>::iterator)/50 64.19 38.15 -26.04 -40.56% rng::for_each(multimap<int>::iterator)/8 5.04 5.25 0.22 4.31% rng::for_each(multimap<int>::iterator)/8192 22875.97 9392.08 -13483.89 -58.94% rng::for_each(multiset<int>)/32 35.82 27.11 -8.72 -24.33% rng::for_each(multiset<int>)/50 62.92 41.59 -21.32 -33.89% rng::for_each(multiset<int>)/8 4.79 6.79 2.00 41.70% rng::for_each(multiset<int>)/8192 22642.68 9280.95 -13361.73 -59.01% rng::for_each(multiset<int>::iterator)/32 35.76 26.71 -9.04 -25.28% rng::for_each(multiset<int>::iterator)/50 63.44 39.00 -24.44 -38.53% rng::for_each(multiset<int>::iterator)/8 4.90 5.21 0.30 6.18% rng::for_each(multiset<int>::iterator)/8192 19930.45 9867.60 -10062.85 -50.49% rng::for_each(set<int>)/32 35.90 27.30 -8.60 -23.96% rng::for_each(set<int>)/50 63.15 40.75 -22.40 -35.47% rng::for_each(set<int>)/8 4.77 6.83 2.06 43.23% rng::for_each(set<int>)/8192 20262.77 9381.57 -10881.20 -53.70% rng::for_each(set<int>::iterator)/32 36.02 26.42 -9.60 -26.64% rng::for_each(set<int>::iterator)/50 63.29 37.97 -25.32 -40.01% rng::for_each(set<int>::iterator)/8 4.72 5.22 0.50 10.50% rng::for_each(set<int>::iterator)/8192 20041.91 9831.91 -10210.00 -50.94% ```
98 lines
3.5 KiB
C++
98 lines
3.5 KiB
C++
//===----------------------------------------------------------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#ifndef _LIBCPP___ALGORITHM_RANGES_FOR_EACH_H
|
|
#define _LIBCPP___ALGORITHM_RANGES_FOR_EACH_H
|
|
|
|
#include <__algorithm/for_each.h>
|
|
#include <__algorithm/for_each_n.h>
|
|
#include <__algorithm/in_fun_result.h>
|
|
#include <__algorithm/specialized_algorithms.h>
|
|
#include <__concepts/assignable.h>
|
|
#include <__config>
|
|
#include <__functional/identity.h>
|
|
#include <__iterator/concepts.h>
|
|
#include <__iterator/projected.h>
|
|
#include <__ranges/access.h>
|
|
#include <__ranges/concepts.h>
|
|
#include <__ranges/dangling.h>
|
|
#include <__type_traits/remove_cvref.h>
|
|
#include <__utility/move.h>
|
|
|
|
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
|
|
# pragma GCC system_header
|
|
#endif
|
|
|
|
_LIBCPP_PUSH_MACROS
|
|
#include <__undef_macros>
|
|
|
|
#if _LIBCPP_STD_VER >= 20
|
|
|
|
_LIBCPP_BEGIN_NAMESPACE_STD
|
|
|
|
namespace ranges {
|
|
|
|
template <class _Iter, class _Func>
|
|
using for_each_result = in_fun_result<_Iter, _Func>;
|
|
|
|
struct __for_each {
|
|
private:
|
|
template <class _Iter, class _Sent, class _Proj, class _Func>
|
|
_LIBCPP_HIDE_FROM_ABI constexpr static for_each_result<_Iter, _Func>
|
|
__for_each_impl(_Iter __first, _Sent __last, _Func& __func, _Proj& __proj) {
|
|
// In the case where we have different iterator and sentinel types, the segmented iterator optimization
|
|
// in std::for_each will not kick in. Therefore, we prefer std::for_each_n in that case (whenever we can
|
|
// obtain the `n`).
|
|
if constexpr (!std::assignable_from<_Iter&, _Sent> && std::sized_sentinel_for<_Sent, _Iter>) {
|
|
auto __n = __last - __first;
|
|
auto __end = std::__for_each_n(std::move(__first), __n, __func, __proj);
|
|
return {std::move(__end), std::move(__func)};
|
|
} else {
|
|
auto __end = std::__for_each(std::move(__first), std::move(__last), __func, __proj);
|
|
return {std::move(__end), std::move(__func)};
|
|
}
|
|
}
|
|
|
|
public:
|
|
template <input_iterator _Iter,
|
|
sentinel_for<_Iter> _Sent,
|
|
class _Proj = identity,
|
|
indirectly_unary_invocable<projected<_Iter, _Proj>> _Func>
|
|
_LIBCPP_HIDE_FROM_ABI constexpr for_each_result<_Iter, _Func>
|
|
operator()(_Iter __first, _Sent __last, _Func __func, _Proj __proj = {}) const {
|
|
return __for_each_impl(std::move(__first), std::move(__last), __func, __proj);
|
|
}
|
|
|
|
template <input_range _Range,
|
|
class _Proj = identity,
|
|
indirectly_unary_invocable<projected<iterator_t<_Range>, _Proj>> _Func>
|
|
_LIBCPP_HIDE_FROM_ABI constexpr for_each_result<borrowed_iterator_t<_Range>, _Func>
|
|
operator()(_Range&& __range, _Func __func, _Proj __proj = {}) const {
|
|
using _SpecialAlg = __specialized_algorithm<_Algorithm::__for_each, __single_range<remove_cvref_t<_Range>>>;
|
|
if constexpr (_SpecialAlg::__has_algorithm) {
|
|
auto [__iter, __func2] = _SpecialAlg()(__range, std::move(__func), std::move(__proj));
|
|
return {std::move(__iter), std::move(__func)};
|
|
} else {
|
|
return __for_each_impl(ranges::begin(__range), ranges::end(__range), __func, __proj);
|
|
}
|
|
}
|
|
};
|
|
|
|
inline namespace __cpo {
|
|
inline constexpr auto for_each = __for_each{};
|
|
} // namespace __cpo
|
|
} // namespace ranges
|
|
|
|
_LIBCPP_END_NAMESPACE_STD
|
|
|
|
#endif // _LIBCPP_STD_VER >= 20
|
|
|
|
_LIBCPP_POP_MACROS
|
|
|
|
#endif // _LIBCPP___ALGORITHM_RANGES_FOR_EACH_H
|