Nested TaskGroups run serially to prevent deadlock, as documented by https://reviews.llvm.org/D61115 and refined by https://reviews.llvm.org/D148984 to use threadIndex. Enable nested parallelism by having worker threads actively execute tasks from the work queue while waiting (work-stealing), instead of just blocking. Root-level TaskGroups (main thread) keep the efficient blocking Latch::sync(), so there is no overhead for the common non-nested case. In lld, https://reviews.llvm.org/D131247 worked around the limitation by passing a single root TaskGroup into OutputSection::writeTo and spawning 4MB-chunked tasks into it. However, SyntheticSection::writeTo calls with internal parallelism (e.g. GdbIndexSection, MergeNoTailSection) still ran serially on worker threads. With this change, their internal parallelFor/parallelForEach calls parallelize automatically via helpSync work-stealing. The increased parallelism can reorder error messages from parallel phases (e.g. relocation processing during section writes), so one lld test is updated to use --threads=1 for deterministic output.
126 lines
4.0 KiB
C++
126 lines
4.0 KiB
C++
//===- llvm/unittest/Support/ParallelTest.cpp -----------------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
///
|
|
/// \file
|
|
/// Parallel.h unit tests.
|
|
///
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "llvm/Support/Parallel.h"
|
|
#include "llvm/Config/llvm-config.h" // for LLVM_ENABLE_THREADS
|
|
#include "llvm/Support/ThreadPool.h"
|
|
#include "gtest/gtest.h"
|
|
#include <random>
|
|
|
|
uint32_t array[1024 * 1024];
|
|
|
|
using namespace llvm;
|
|
|
|
// Tests below are hanging up on mingw. Investigating.
|
|
#if !defined(__MINGW32__)
|
|
|
|
TEST(Parallel, sort) {
|
|
std::mt19937 randEngine;
|
|
std::uniform_int_distribution<uint32_t> dist;
|
|
|
|
for (auto &i : array)
|
|
i = dist(randEngine);
|
|
|
|
parallelSort(std::begin(array), std::end(array));
|
|
ASSERT_TRUE(llvm::is_sorted(array));
|
|
}
|
|
|
|
TEST(Parallel, parallel_for) {
|
|
// We need to test the case with a TaskSize > 1. We are white-box testing
|
|
// here. The TaskSize is calculated as (End - Begin) / 1024 at the time of
|
|
// writing.
|
|
uint32_t range[2050];
|
|
std::fill(range, range + 2050, 1);
|
|
parallelFor(0, 2049, [&range](size_t I) { ++range[I]; });
|
|
|
|
uint32_t expected[2049];
|
|
std::fill(expected, expected + 2049, 2);
|
|
ASSERT_TRUE(std::equal(range, range + 2049, expected));
|
|
// Check that we don't write past the end of the requested range.
|
|
ASSERT_EQ(range[2049], 1u);
|
|
}
|
|
|
|
TEST(Parallel, TransformReduce) {
|
|
// Sum an empty list, check that it works.
|
|
auto identity = [](uint32_t v) { return v; };
|
|
uint32_t sum = parallelTransformReduce(ArrayRef<uint32_t>(), 0U,
|
|
std::plus<uint32_t>(), identity);
|
|
EXPECT_EQ(sum, 0U);
|
|
|
|
// Sum the lengths of these strings in parallel.
|
|
const char *strs[] = {"a", "ab", "abc", "abcd", "abcde", "abcdef"};
|
|
size_t lenSum =
|
|
parallelTransformReduce(strs, static_cast<size_t>(0), std::plus<size_t>(),
|
|
[](const char *s) { return strlen(s); });
|
|
EXPECT_EQ(lenSum, static_cast<size_t>(21));
|
|
|
|
// Check that we handle non-divisible task sizes as above.
|
|
uint32_t range[2050];
|
|
llvm::fill(range, 1);
|
|
sum = parallelTransformReduce(range, 0U, std::plus<uint32_t>(), identity);
|
|
EXPECT_EQ(sum, 2050U);
|
|
|
|
llvm::fill(range, 2);
|
|
sum = parallelTransformReduce(range, 0U, std::plus<uint32_t>(), identity);
|
|
EXPECT_EQ(sum, 4100U);
|
|
|
|
// Avoid one large task.
|
|
uint32_t range2[3060];
|
|
llvm::fill(range2, 1);
|
|
sum = parallelTransformReduce(range2, 0U, std::plus<uint32_t>(), identity);
|
|
EXPECT_EQ(sum, 3060U);
|
|
}
|
|
|
|
TEST(Parallel, ForEachError) {
|
|
int nums[] = {1, 2, 3, 4, 5, 6};
|
|
Error e = parallelForEachError(nums, [](int v) -> Error {
|
|
if ((v & 1) == 0)
|
|
return createStringError(std::errc::invalid_argument, "asdf");
|
|
return Error::success();
|
|
});
|
|
EXPECT_TRUE(e.isA<ErrorList>());
|
|
std::string errText = toString(std::move(e));
|
|
EXPECT_EQ(errText, std::string("asdf\nasdf\nasdf"));
|
|
}
|
|
|
|
#if LLVM_ENABLE_THREADS
|
|
TEST(Parallel, NestedTaskGroup) {
|
|
parallel::TaskGroup tg;
|
|
EXPECT_TRUE(tg.isParallel() || (parallel::strategy.ThreadsRequested == 1));
|
|
|
|
tg.spawn([&]() {
|
|
parallel::TaskGroup nestedTG;
|
|
EXPECT_TRUE(nestedTG.isParallel() ||
|
|
(parallel::strategy.ThreadsRequested == 1));
|
|
});
|
|
}
|
|
|
|
// Verify nested parallelFor doesn't deadlock. This is a simplified version of
|
|
// the pattern from https://reviews.llvm.org/D61115 that originally motivated
|
|
// serializing nested TaskGroups. With work-stealing in helpSync(), nested
|
|
// parallelism now works without deadlock.
|
|
TEST(Parallel, NestedParallelFor) {
|
|
std::atomic<uint32_t> count{0};
|
|
parallelFor(0, 8, [&](size_t i) {
|
|
parallelFor(0, 8, [&](size_t j) {
|
|
parallelFor(0, 8, [&](size_t k) {
|
|
count.fetch_add(1, std::memory_order_relaxed);
|
|
});
|
|
});
|
|
});
|
|
EXPECT_EQ(count.load(), 512u);
|
|
}
|
|
#endif
|
|
|
|
#endif
|