PointerUnion stores a fixed-width `ceil(log2(N))`-bit tag in the low bits of the pointer. This works only when every member type provides at least that many low bits — if the least-aligned type doesn't, compilation fails, even though the higher-aligned types may have plenty of spare bits going to waste. Introduce a variable-length escape-encoded tag that exploits the extra low bits of higher-aligned types, analogous to UTF-8: types are grouped into tiers by NumLowBitsAvailable; each non-final tier reserves one code as an escape prefix, and the next tier extends the tag into the newly available bits. This allows PointerUnion to hold more type variants than a fixed-width tag permits. The fixed-width path is used when the minimum alignment already provides enough bits (the common case); the variable-width path activates only when it doesn't, and requires types to be listed in non-decreasing NumLowBitsAvailable order. I need this for https://github.com/llvm/llvm-project/pull/186923 which requires a 6-member PointerUnion in MLIR TypeRange/ValueRange. On 32-bit systems, some members only provide 2 low bits, insufficient for a 3-bit fixed-width tag.
239 lines
9.0 KiB
C++
239 lines
9.0 KiB
C++
//===- PointerUnionBM.cpp - Benchmark for PointerUnion ---------*- C++ -*-===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// Benchmarks for PointerUnion operations with randomized data.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "benchmark/benchmark.h"
|
|
#include "llvm/ADT/PointerUnion.h"
|
|
#include "llvm/Support/Casting.h"
|
|
#include <cstddef>
|
|
#include <random>
|
|
#include <vector>
|
|
|
|
using namespace llvm;
|
|
|
|
namespace llvm {
|
|
namespace {
|
|
|
|
// Aligned slot types with controlled NumLowBitsAvailable.
|
|
template <int N> struct alignas(4) A4 {
|
|
int data;
|
|
}; // 2 bits.
|
|
template <int N> struct alignas(8) A8 {
|
|
int data;
|
|
}; // 3 bits.
|
|
template <int N> struct alignas(32) A32 {
|
|
int data;
|
|
}; // 5 bits.
|
|
|
|
template <typename T> T &getSlot() {
|
|
static T S{};
|
|
return S;
|
|
}
|
|
|
|
// Encodes a list of slot types for random fill.
|
|
template <typename... Ts> struct TypeList {};
|
|
|
|
template <typename UnionT, typename... SlotTs>
|
|
void fillRandom(UnionT *Arr, size_t Count, TypeList<SlotTs...>) {
|
|
constexpr size_t N = sizeof...(SlotTs);
|
|
UnionT Variants[N] = {UnionT(&getSlot<SlotTs>())...};
|
|
std::mt19937 Rng(42);
|
|
std::uniform_int_distribution<size_t> Dist(0, N - 1);
|
|
for (size_t I = 0; I < Count; ++I)
|
|
Arr[I] = Variants[Dist(Rng)];
|
|
}
|
|
|
|
template <typename UnionT, typename... SlotTs>
|
|
void fillRandomWithNulls(UnionT *Arr, size_t Count, TypeList<SlotTs...> TL) {
|
|
fillRandom(Arr, Count, TL);
|
|
std::mt19937 Rng(123);
|
|
// 50% null mix to exercise both null and non-null paths equally.
|
|
std::bernoulli_distribution Coin(0.5);
|
|
for (size_t I = 0; I < Count; ++I)
|
|
if (Coin(Rng))
|
|
Arr[I] = nullptr;
|
|
}
|
|
|
|
// A8Types<N> = TypeList<A8<0>, A8<1>, ..., A8<N-1>>.
|
|
// Used to generate randomized arrays with a uniform mix of N distinct types.
|
|
template <size_t... Is>
|
|
TypeList<A8<Is>...> makeA8TypesImpl(std::index_sequence<Is...>);
|
|
template <size_t N>
|
|
using A8Types = decltype(makeA8TypesImpl(std::make_index_sequence<N>{}));
|
|
|
|
// Splits N types into two tiers: N0 types in tier 0 (A4, 2-bit) and
|
|
// N1 types in tier 1 (A32, 5-bit). Tier 0 gets up to 3 types (the max
|
|
// for 2 low bits minus one escape code).
|
|
template <size_t N> struct TwoTierSplit {
|
|
static constexpr size_t N0 = N < 4 ? N - 1 : 3;
|
|
static constexpr size_t N1 = N - N0;
|
|
};
|
|
|
|
// Splits N types into three tiers: N0 types in tier 0 (A4, 2-bit),
|
|
// N1 = 1 type in tier 1 (A8, 3-bit), and N2 types in tier 2 (A32, 5-bit).
|
|
// Tier 0 gets up to 3 types; tier 1 always gets exactly 1.
|
|
template <size_t N> struct ThreeTierSplit {
|
|
static constexpr size_t N0 = N < 5 ? N - 2 : 3;
|
|
static constexpr size_t N1 = 1;
|
|
static constexpr size_t N2 = N - N0 - N1;
|
|
};
|
|
|
|
// A4A32Types<N> = TypeList<A4<0>, ..., A4<N0-1>, A32<0>, ..., A32<N1-1>>.
|
|
// Used to generate randomized arrays with a mix of A4 and A32 types.
|
|
template <size_t... I0, size_t... I1>
|
|
TypeList<A4<I0>..., A32<I1>...> makeA4A32TypesImpl(std::index_sequence<I0...>,
|
|
std::index_sequence<I1...>);
|
|
template <size_t N>
|
|
using A4A32Types = decltype(makeA4A32TypesImpl(
|
|
std::make_index_sequence<TwoTierSplit<N>::N0>{},
|
|
std::make_index_sequence<TwoTierSplit<N>::N1>{}));
|
|
|
|
// A4A8A32Types<N> = TypeList<A4<0>, ..., A8<0>, ..., A32<0>, ...>.
|
|
// Used to generate randomized arrays with a mix of A4, A8, and A32 types.
|
|
template <size_t... I0, size_t... I1, size_t... I2>
|
|
TypeList<A4<I0>..., A8<I1>..., A32<I2>...>
|
|
makeA4A8A32TypesImpl(std::index_sequence<I0...>, std::index_sequence<I1...>,
|
|
std::index_sequence<I2...>);
|
|
template <size_t N>
|
|
using A4A8A32Types = decltype(makeA4A8A32TypesImpl(
|
|
std::make_index_sequence<ThreeTierSplit<N>::N0>{},
|
|
std::make_index_sequence<ThreeTierSplit<N>::N1>{},
|
|
std::make_index_sequence<ThreeTierSplit<N>::N2>{}));
|
|
|
|
// Union type aliases.
|
|
template <size_t... Is>
|
|
auto makePtrUnion(std::index_sequence<Is...>) -> PointerUnion<A8<Is> *...>;
|
|
template <size_t N>
|
|
using PtrUnion = decltype(makePtrUnion(std::make_index_sequence<N>{}));
|
|
|
|
template <size_t... I0, size_t... I1>
|
|
auto makePtrUnion2T(std::index_sequence<I0...>, std::index_sequence<I1...>)
|
|
-> PointerUnion<A4<I0> *..., A32<I1> *...>;
|
|
template <size_t N>
|
|
using PtrUnion2T =
|
|
decltype(makePtrUnion2T(std::make_index_sequence<TwoTierSplit<N>::N0>{},
|
|
std::make_index_sequence<TwoTierSplit<N>::N1>{}));
|
|
|
|
template <size_t... I0, size_t... I1, size_t... I2>
|
|
auto makePtrUnion3T(std::index_sequence<I0...>, std::index_sequence<I1...>,
|
|
std::index_sequence<I2...>)
|
|
-> PointerUnion<A4<I0> *..., A8<I1> *..., A32<I2> *...>;
|
|
template <size_t N>
|
|
using PtrUnion3T =
|
|
decltype(makePtrUnion3T(std::make_index_sequence<ThreeTierSplit<N>::N0>{},
|
|
std::make_index_sequence<ThreeTierSplit<N>::N1>{},
|
|
std::make_index_sequence<ThreeTierSplit<N>::N2>{}));
|
|
|
|
// Isa: random type mix, uniform distribution (~1/N hit rate for each type).
|
|
template <typename UnionT, typename QueryT, typename TL>
|
|
static void BM_Isa(benchmark::State &State) {
|
|
constexpr size_t Batch = 1024;
|
|
std::vector<UnionT> Arr(Batch);
|
|
fillRandom(Arr.data(), Batch, TL{});
|
|
benchmark::DoNotOptimize(Arr.data());
|
|
for (auto _ : State) {
|
|
bool R = false;
|
|
for (size_t I = 0; I < Batch; ++I)
|
|
R ^= isa<QueryT *>(Arr[I]);
|
|
benchmark::DoNotOptimize(R);
|
|
}
|
|
State.SetItemsProcessed(State.iterations() * Batch);
|
|
}
|
|
|
|
// IsNull: random mix with ~50% nulls.
|
|
template <typename UnionT, typename TL>
|
|
static void BM_IsNull(benchmark::State &State) {
|
|
constexpr size_t Batch = 1024;
|
|
std::vector<UnionT> Arr(Batch);
|
|
fillRandomWithNulls(Arr.data(), Batch, TL{});
|
|
benchmark::DoNotOptimize(Arr.data());
|
|
for (auto _ : State) {
|
|
bool R = false;
|
|
for (size_t I = 0; I < Batch; ++I)
|
|
R ^= Arr[I].isNull();
|
|
benchmark::DoNotOptimize(R);
|
|
}
|
|
State.SetItemsProcessed(State.iterations() * Batch);
|
|
}
|
|
|
|
} // namespace
|
|
} // namespace llvm
|
|
|
|
// Registration -- N = 2, 4, 8. PtrUnion3T uses N = 3, 4, 8.
|
|
|
|
// Isa: PtrUnion (fixed-width tag).
|
|
BENCHMARK((BM_Isa<PtrUnion<2>, A8<0>, A8Types<2>>))->Name("Isa/PU/2/First");
|
|
BENCHMARK((BM_Isa<PtrUnion<2>, A8<1>, A8Types<2>>))->Name("Isa/PU/2/Last");
|
|
|
|
BENCHMARK((BM_Isa<PtrUnion<4>, A8<0>, A8Types<4>>))->Name("Isa/PU/4/First");
|
|
BENCHMARK((BM_Isa<PtrUnion<4>, A8<3>, A8Types<4>>))->Name("Isa/PU/4/Last");
|
|
|
|
BENCHMARK((BM_Isa<PtrUnion<8>, A8<0>, A8Types<8>>))->Name("Isa/PU/8/First");
|
|
BENCHMARK((BM_Isa<PtrUnion<8>, A8<7>, A8Types<8>>))->Name("Isa/PU/8/Last");
|
|
|
|
// Isa: PtrUnion2T (variable-width, 2 alignment tiers).
|
|
// Note: PtrUnion2T<N> uses variable-width encoding only when N > 3 (when
|
|
// fixed-width tags don't fit in 2 bits). PtrUnion2T<2> is still fixed-width.
|
|
BENCHMARK((BM_Isa<PtrUnion2T<2>, A4<0>, A4A32Types<2>>))
|
|
->Name("Isa/PU2T/2/Tier0");
|
|
BENCHMARK((BM_Isa<PtrUnion2T<2>, A32<0>, A4A32Types<2>>))
|
|
->Name("Isa/PU2T/2/Tier1");
|
|
|
|
BENCHMARK((BM_Isa<PtrUnion2T<4>, A4<0>, A4A32Types<4>>))
|
|
->Name("Isa/PU2T/4/Tier0");
|
|
BENCHMARK((BM_Isa<PtrUnion2T<4>, A32<0>, A4A32Types<4>>))
|
|
->Name("Isa/PU2T/4/Tier1");
|
|
|
|
BENCHMARK((BM_Isa<PtrUnion2T<8>, A4<0>, A4A32Types<8>>))
|
|
->Name("Isa/PU2T/8/Tier0");
|
|
BENCHMARK((BM_Isa<PtrUnion2T<8>, A32<4>, A4A32Types<8>>))
|
|
->Name("Isa/PU2T/8/Tier1");
|
|
|
|
// Isa: PtrUnion3T (variable-width, 3 alignment tiers).
|
|
// Note: PtrUnion3T<N> uses variable-width encoding only when N > 4 (when
|
|
// fixed-width tags don't fit in 2 bits). PtrUnion3T<3> and <4> are
|
|
// still fixed-width.
|
|
BENCHMARK((BM_Isa<PtrUnion3T<3>, A4<0>, A4A8A32Types<3>>))
|
|
->Name("Isa/PU3T/3/Tier0");
|
|
BENCHMARK((BM_Isa<PtrUnion3T<3>, A8<0>, A4A8A32Types<3>>))
|
|
->Name("Isa/PU3T/3/Tier1");
|
|
BENCHMARK((BM_Isa<PtrUnion3T<3>, A32<0>, A4A8A32Types<3>>))
|
|
->Name("Isa/PU3T/3/Tier2");
|
|
|
|
BENCHMARK((BM_Isa<PtrUnion3T<4>, A4<0>, A4A8A32Types<4>>))
|
|
->Name("Isa/PU3T/4/Tier0");
|
|
BENCHMARK((BM_Isa<PtrUnion3T<4>, A8<0>, A4A8A32Types<4>>))
|
|
->Name("Isa/PU3T/4/Tier1");
|
|
BENCHMARK((BM_Isa<PtrUnion3T<4>, A32<0>, A4A8A32Types<4>>))
|
|
->Name("Isa/PU3T/4/Tier2");
|
|
|
|
BENCHMARK((BM_Isa<PtrUnion3T<8>, A4<0>, A4A8A32Types<8>>))
|
|
->Name("Isa/PU3T/8/Tier0");
|
|
BENCHMARK((BM_Isa<PtrUnion3T<8>, A8<0>, A4A8A32Types<8>>))
|
|
->Name("Isa/PU3T/8/Tier1");
|
|
BENCHMARK((BM_Isa<PtrUnion3T<8>, A32<3>, A4A8A32Types<8>>))
|
|
->Name("Isa/PU3T/8/Tier2");
|
|
|
|
// IsNull: all suites.
|
|
BENCHMARK((BM_IsNull<PtrUnion<2>, A8Types<2>>))->Name("IsNull/PU/2");
|
|
BENCHMARK((BM_IsNull<PtrUnion<4>, A8Types<4>>))->Name("IsNull/PU/4");
|
|
BENCHMARK((BM_IsNull<PtrUnion<8>, A8Types<8>>))->Name("IsNull/PU/8");
|
|
|
|
BENCHMARK((BM_IsNull<PtrUnion2T<2>, A4A32Types<2>>))->Name("IsNull/PU2T/2");
|
|
BENCHMARK((BM_IsNull<PtrUnion2T<4>, A4A32Types<4>>))->Name("IsNull/PU2T/4");
|
|
BENCHMARK((BM_IsNull<PtrUnion2T<8>, A4A32Types<8>>))->Name("IsNull/PU2T/8");
|
|
|
|
BENCHMARK((BM_IsNull<PtrUnion3T<3>, A4A8A32Types<3>>))->Name("IsNull/PU3T/3");
|
|
BENCHMARK((BM_IsNull<PtrUnion3T<4>, A4A8A32Types<4>>))->Name("IsNull/PU3T/4");
|
|
BENCHMARK((BM_IsNull<PtrUnion3T<8>, A4A8A32Types<8>>))->Name("IsNull/PU3T/8");
|
|
|
|
BENCHMARK_MAIN();
|