Files
Peng Liu d0cee6939a [libc++] Optimize std::{,ranges}::{fill,fill_n} for segmented iterators (#132665)
This patch optimizes `std::fill`, `std::fill_n`, `std::ranges::fill`,
and `std::ranges::fill_n` for segmented iterators, achieving substantial
performance improvements. Specifically, for `deque<int>` iterators, the
performance improvements are above 10x for all these algorithms. The
optimization also enables filling segmented memory of `deque<int>` to
approach the performance of filling contiguous memory of `vector<int>`.


Benchmark results comparing the before and after implementations are
provided below. For additional context, we’ve included `vector<int>`
results, which remain unchanged, as this patch specifically targets
segmented iterators and leaves non-segmented iterator behavior
untouched.



Fixes two subtasks outlined in #102817.

#### `fill_n`

```
-----------------------------------------------------------------------------
Benchmark                                Before            After      Speedup
-----------------------------------------------------------------------------
std::fill_n(deque<int>)/32              11.4 ns          2.28 ns        5.0x
std::fill_n(deque<int>)/50              19.7 ns          3.40 ns        5.8x
std::fill_n(deque<int>)/1024             391 ns          37.3 ns       10.5x
std::fill_n(deque<int>)/8192            3174 ns           301 ns       10.5x
std::fill_n(deque<int>)/65536          26504 ns          2951 ns        9.0x
std::fill_n(deque<int>)/1048576       407960 ns         80658 ns        5.1x
rng::fill_n(deque<int>)/32              14.3 ns          2.15 ns        6.6x
rng::fill_n(deque<int>)/50              20.2 ns          3.22 ns        6.3x
rng::fill_n(deque<int>)/1024             381 ns          37.8 ns       10.1x
rng::fill_n(deque<int>)/8192            3101 ns           294 ns       10.5x
rng::fill_n(deque<int>)/65536          25098 ns          2926 ns        8.6x
rng::fill_n(deque<int>)/1048576       394342 ns         78874 ns        5.0x
std::fill_n(vector<int>)/32             1.76 ns          1.72 ns        1.0x
std::fill_n(vector<int>)/50             3.00 ns          2.73 ns        1.1x
std::fill_n(vector<int>)/1024           38.4 ns          37.9 ns        1.0x
std::fill_n(vector<int>)/8192            258 ns           252 ns        1.0x
std::fill_n(vector<int>)/65536          2993 ns          2889 ns        1.0x
std::fill_n(vector<int>)/1048576       80328 ns         80468 ns        1.0x
rng::fill_n(vector<int>)/32             1.99 ns          1.35 ns        1.5x
rng::fill_n(vector<int>)/50             2.66 ns          2.12 ns        1.3x
rng::fill_n(vector<int>)/1024           37.7 ns          35.8 ns        1.1x
rng::fill_n(vector<int>)/8192            253 ns           250 ns        1.0x
rng::fill_n(vector<int>)/65536          2922 ns          2930 ns        1.0x
rng::fill_n(vector<int>)/1048576       79739 ns         79742 ns        1.0x
```

#### `fill`

```
--------------------------------------------------------------------------
Benchmark                              Before            After     Speedup
--------------------------------------------------------------------------
std::fill(deque<int>)/32              13.7 ns          2.45 ns        5.6x
std::fill(deque<int>)/50              21.7 ns          4.57 ns        4.7x
std::fill(deque<int>)/1024             367 ns          38.5 ns        9.5x
std::fill(deque<int>)/8192            2896 ns           247 ns       11.7x
std::fill(deque<int>)/65536          23723 ns          2907 ns        8.2x
std::fill(deque<int>)/1048576       379043 ns         79885 ns        4.7x
rng::fill(deque<int>)/32              13.6 ns          2.70 ns        5.0x
rng::fill(deque<int>)/50              23.4 ns          3.94 ns        5.9x
rng::fill(deque<int>)/1024             377 ns          37.9 ns        9.9x
rng::fill(deque<int>)/8192            2914 ns           286 ns       10.2x
rng::fill(deque<int>)/65536          23612 ns          2939 ns        8.0x
rng::fill(deque<int>)/1048576       379841 ns         80079 ns        4.7x
std::fill(vector<int>)/32             1.99 ns          1.79 ns        1.1x
std::fill(vector<int>)/50             3.05 ns          3.06 ns        1.0x
std::fill(vector<int>)/1024           37.6 ns          38.0 ns        1.0x
std::fill(vector<int>)/8192            255 ns           257 ns        1.0x
std::fill(vector<int>)/65536          2966 ns          2981 ns        1.0x
std::fill(vector<int>)/1048576       78300 ns         80348 ns        1.0x
rng::fill(vector<int>)/32             1.77 ns          1.75 ns        1.0x
rng::fill(vector<int>)/50             4.85 ns          2.31 ns        2.1x
rng::fill(vector<int>)/1024           39.6 ns          36.1 ns        1.1x
rng::fill(vector<int>)/8192            238 ns           251 ns        0.9x
rng::fill(vector<int>)/65536          2941 ns          2918 ns        1.0x
rng::fill(vector<int>)/1048576       80497 ns         80442 ns        1.0x
```

---------

Co-authored-by: Louis Dionne <ldionne.2@gmail.com>
Co-authored-by: A. Jiang <de34@live.cn>
2025-10-17 07:41:24 +08:00

62 lines
1.8 KiB
C++

//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef _LIBCPP___ALGORITHM_RANGES_FILL_H
#define _LIBCPP___ALGORITHM_RANGES_FILL_H
#include <__algorithm/fill.h>
#include <__algorithm/fill_n.h>
#include <__config>
#include <__iterator/concepts.h>
#include <__ranges/access.h>
#include <__ranges/concepts.h>
#include <__ranges/dangling.h>
#include <__utility/move.h>
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
# pragma GCC system_header
#endif
_LIBCPP_PUSH_MACROS
#include <__undef_macros>
#if _LIBCPP_STD_VER >= 20
_LIBCPP_BEGIN_NAMESPACE_STD
namespace ranges {
struct __fill {
template <class _Type, output_iterator<const _Type&> _Iter, sentinel_for<_Iter> _Sent>
_LIBCPP_HIDE_FROM_ABI constexpr _Iter operator()(_Iter __first, _Sent __last, const _Type& __value) const {
if constexpr (sized_sentinel_for<_Sent, _Iter>) {
auto __n = __last - __first;
return std::__fill_n(std::move(__first), __n, __value);
} else {
return std::__fill(std::move(__first), std::move(__last), __value);
}
}
template <class _Type, output_range<const _Type&> _Range>
_LIBCPP_HIDE_FROM_ABI constexpr borrowed_iterator_t<_Range> operator()(_Range&& __range, const _Type& __value) const {
return (*this)(ranges::begin(__range), ranges::end(__range), __value);
}
};
inline namespace __cpo {
inline constexpr auto fill = __fill{};
} // namespace __cpo
} // namespace ranges
_LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_STD_VER >= 20
_LIBCPP_POP_MACROS
#endif // _LIBCPP___ALGORITHM_RANGES_FILL_H