Files
llvm-project/llvm/unittests/Support/ParallelTest.cpp
Fangrui Song 8daaa26efd [Support] Support nested parallel TaskGroup via work-stealing (#189293)
Nested TaskGroups run serially to prevent deadlock, as documented by
https://reviews.llvm.org/D61115 and refined by
https://reviews.llvm.org/D148984 to use threadIndex.

Enable nested parallelism by having worker threads actively execute
tasks from the work queue while waiting (work-stealing), instead of
just blocking. Root-level TaskGroups (main thread) keep the efficient
blocking Latch::sync(), so there is no overhead for the common
non-nested case.

In lld, https://reviews.llvm.org/D131247 worked around the limitation
by passing a single root TaskGroup into OutputSection::writeTo and
spawning 4MB-chunked tasks into it. However, SyntheticSection::writeTo
calls with internal parallelism (e.g. GdbIndexSection,
MergeNoTailSection) still ran serially on worker threads. With this
change, their internal parallelFor/parallelForEach calls parallelize
automatically via helpSync work-stealing.

The increased parallelism can reorder error messages from parallel
phases (e.g. relocation processing during section writes), so one lld
test is updated to use --threads=1 for deterministic output.
2026-04-01 19:20:16 -07:00

126 lines
4.0 KiB
C++

//===- llvm/unittest/Support/ParallelTest.cpp -----------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
/// \file
/// Parallel.h unit tests.
///
//===----------------------------------------------------------------------===//
#include "llvm/Support/Parallel.h"
#include "llvm/Config/llvm-config.h" // for LLVM_ENABLE_THREADS
#include "llvm/Support/ThreadPool.h"
#include "gtest/gtest.h"
#include <random>
uint32_t array[1024 * 1024];
using namespace llvm;
// Tests below are hanging up on mingw. Investigating.
#if !defined(__MINGW32__)
TEST(Parallel, sort) {
std::mt19937 randEngine;
std::uniform_int_distribution<uint32_t> dist;
for (auto &i : array)
i = dist(randEngine);
parallelSort(std::begin(array), std::end(array));
ASSERT_TRUE(llvm::is_sorted(array));
}
TEST(Parallel, parallel_for) {
// We need to test the case with a TaskSize > 1. We are white-box testing
// here. The TaskSize is calculated as (End - Begin) / 1024 at the time of
// writing.
uint32_t range[2050];
std::fill(range, range + 2050, 1);
parallelFor(0, 2049, [&range](size_t I) { ++range[I]; });
uint32_t expected[2049];
std::fill(expected, expected + 2049, 2);
ASSERT_TRUE(std::equal(range, range + 2049, expected));
// Check that we don't write past the end of the requested range.
ASSERT_EQ(range[2049], 1u);
}
TEST(Parallel, TransformReduce) {
// Sum an empty list, check that it works.
auto identity = [](uint32_t v) { return v; };
uint32_t sum = parallelTransformReduce(ArrayRef<uint32_t>(), 0U,
std::plus<uint32_t>(), identity);
EXPECT_EQ(sum, 0U);
// Sum the lengths of these strings in parallel.
const char *strs[] = {"a", "ab", "abc", "abcd", "abcde", "abcdef"};
size_t lenSum =
parallelTransformReduce(strs, static_cast<size_t>(0), std::plus<size_t>(),
[](const char *s) { return strlen(s); });
EXPECT_EQ(lenSum, static_cast<size_t>(21));
// Check that we handle non-divisible task sizes as above.
uint32_t range[2050];
llvm::fill(range, 1);
sum = parallelTransformReduce(range, 0U, std::plus<uint32_t>(), identity);
EXPECT_EQ(sum, 2050U);
llvm::fill(range, 2);
sum = parallelTransformReduce(range, 0U, std::plus<uint32_t>(), identity);
EXPECT_EQ(sum, 4100U);
// Avoid one large task.
uint32_t range2[3060];
llvm::fill(range2, 1);
sum = parallelTransformReduce(range2, 0U, std::plus<uint32_t>(), identity);
EXPECT_EQ(sum, 3060U);
}
TEST(Parallel, ForEachError) {
int nums[] = {1, 2, 3, 4, 5, 6};
Error e = parallelForEachError(nums, [](int v) -> Error {
if ((v & 1) == 0)
return createStringError(std::errc::invalid_argument, "asdf");
return Error::success();
});
EXPECT_TRUE(e.isA<ErrorList>());
std::string errText = toString(std::move(e));
EXPECT_EQ(errText, std::string("asdf\nasdf\nasdf"));
}
#if LLVM_ENABLE_THREADS
TEST(Parallel, NestedTaskGroup) {
parallel::TaskGroup tg;
EXPECT_TRUE(tg.isParallel() || (parallel::strategy.ThreadsRequested == 1));
tg.spawn([&]() {
parallel::TaskGroup nestedTG;
EXPECT_TRUE(nestedTG.isParallel() ||
(parallel::strategy.ThreadsRequested == 1));
});
}
// Verify nested parallelFor doesn't deadlock. This is a simplified version of
// the pattern from https://reviews.llvm.org/D61115 that originally motivated
// serializing nested TaskGroups. With work-stealing in helpSync(), nested
// parallelism now works without deadlock.
TEST(Parallel, NestedParallelFor) {
std::atomic<uint32_t> count{0};
parallelFor(0, 8, [&](size_t i) {
parallelFor(0, 8, [&](size_t j) {
parallelFor(0, 8, [&](size_t k) {
count.fetch_add(1, std::memory_order_relaxed);
});
});
});
EXPECT_EQ(count.load(), 512u);
}
#endif
#endif