Related to #114695. This PR adds a Weak AVL Tree for tsearch APIs. The symbol implementations are coming in a following up PR to avoid creating a huge patch. The work is based on @MaskRay's recent post (see below). A general self-balancing binary search tree where the node pointer can be used as stable handles to the stored values. The self-balancing strategy is the Weak AVL (WAVL) tree, based on the following foundational references: 1. https://maskray.me/blog/2025-12-14-weak-avl-tree 2. https://reviews.freebsd.org/D25480 3. https://ics.uci.edu/~goodrich/teach/cs165/notes/WeakAVLTrees.pdf 4. https://dl.acm.org/doi/10.1145/2689412 (Rank-Balanced Trees) WAVL trees belong to the rank-balanced binary search tree framework (reference 4), alongside AVL and Red-Black trees. Key Properties of WAVL Trees: 1. Relationship to Red-Black Trees: A WAVL tree can always be colored as a Red-Black tree. 2. Relationship to AVL Trees: An AVL tree meets all the requirements of a WAVL tree. Insertion-only WAVL trees maintain the same structure as AVL trees. Rank-Based Balancing: In rank-balanced trees, each node is assigned a rank (conceptually similar to height). In AVL/WAVL, the rank difference between a parent and its child is strictly enforced to be either **1** or **2**. - **AVL Trees:** Rank is equivalent to height. The strict condition is that there are no 2-2 nodes (a parent with rank difference 2 to both children). - **WAVL Trees:** The no 2-2 node rule is relaxed for internal nodes during the deletion fixup process, making WAVL trees less strictly balanced than AVL trees but easier to maintain than Red-Black trees. Balancing Mechanics (Promotion/Demotion): - **Null nodes** are considered to have rank -1. - **External/leaf nodes** have rank 0. - **Insertion:** Inserting a node may create a situation where a parent and child have the same rank (difference 0). This is fixed by **promoting** the rank of the parent and propagating the fix upwards using at most two rotations (trinode fixup). - **Deletion:** Deleting a node may result in a parent being 3 ranks higher than a child (difference 3). This is fixed by **demoting** the parent's rank and propagating the fix upwards. Implementation Detail: The rank is **implicitly** maintained. We never store the full rank. Instead, a 2-bit tag is used on each node to record the rank difference to each child: - Bit cleared (0) -> Rank difference is **1**. - Bit set (1) -> Rank difference is **2**. --------- Co-authored-by: Michael Jones <michaelrj@google.com>
99 lines
3.1 KiB
C++
99 lines
3.1 KiB
C++
//===-- weak_avl_fuzz.cpp -------------------------------------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
///
|
|
/// Fuzzing test for llvm-libc weak AVL implementations.
|
|
///
|
|
//===----------------------------------------------------------------------===//
|
|
#include "hdr/types/ENTRY.h"
|
|
#include "src/__support/CPP/bit.h"
|
|
#include "src/__support/CPP/optional.h"
|
|
#include "src/__support/macros/config.h"
|
|
#include "src/__support/weak_avl.h"
|
|
|
|
namespace LIBC_NAMESPACE_DECL {
|
|
|
|
// A sequence of actions:
|
|
// - Erase: a single byte valued (5, 6 mod 7) followed by an int
|
|
// - Find: a single byte valued (4 mod 7) followed by an int
|
|
// - FindOrInsert: a single byte valued (0,1,2,3 mod 7) followed by an int
|
|
extern "C" size_t LLVMFuzzerMutate(uint8_t *data, size_t size, size_t max_size);
|
|
extern "C" size_t LLVMFuzzerCustomMutator(uint8_t *data, size_t size,
|
|
size_t max_size, unsigned int seed) {
|
|
size = LLVMFuzzerMutate(data, size, max_size);
|
|
return size / (1 + sizeof(int)) * (1 + sizeof(int));
|
|
}
|
|
|
|
class AVLTree {
|
|
using Node = WeakAVLNode<int>;
|
|
Node *root = nullptr;
|
|
bool reversed = false;
|
|
static int compare(int a, int b) { return (a > b) - (a < b); }
|
|
static int reverse_compare(int a, int b) { return (b > a) - (b < a); }
|
|
|
|
public:
|
|
AVLTree(bool reversed = false) : reversed(reversed) {}
|
|
bool find(int key) {
|
|
return Node::find(root, key, reversed ? reverse_compare : compare)
|
|
.has_value();
|
|
}
|
|
bool find_or_insert(int key) {
|
|
return Node::find_or_insert(root, key, reversed ? reverse_compare : compare)
|
|
.has_value();
|
|
}
|
|
bool erase(int key) {
|
|
if (cpp::optional<Node *> node =
|
|
Node::find(root, key, reversed ? reverse_compare : compare)) {
|
|
Node::erase(root, node.value());
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
~AVLTree() { Node::destroy(root); }
|
|
};
|
|
|
|
extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
|
|
AVLTree tree1;
|
|
AVLTree tree2(true);
|
|
for (size_t i = 0; i + (1 + sizeof(int)) <= size; i += 1 + sizeof(int)) {
|
|
uint8_t action = data[i];
|
|
int key;
|
|
__builtin_memcpy(&key, data + i + 1, sizeof(int));
|
|
if (action % 7 == 4) {
|
|
// Find
|
|
bool res1 = tree1.find(key);
|
|
bool res2 = tree2.find(key);
|
|
if (res1 != res2)
|
|
__builtin_trap();
|
|
|
|
} else if (action % 7 == 5 || action % 7 == 6) {
|
|
// Erase
|
|
bool res1 = tree1.erase(key);
|
|
bool res2 = tree2.erase(key);
|
|
if (res1 != res2)
|
|
__builtin_trap();
|
|
if (tree1.find(key))
|
|
__builtin_trap();
|
|
if (tree2.find(key))
|
|
__builtin_trap();
|
|
} else {
|
|
// FindOrInsert
|
|
bool res1 = tree1.find_or_insert(key);
|
|
bool res2 = tree2.find_or_insert(key);
|
|
if (res1 != res2)
|
|
__builtin_trap();
|
|
if (!tree1.find(key))
|
|
__builtin_trap();
|
|
if (!tree2.find(key))
|
|
__builtin_trap();
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
} // namespace LIBC_NAMESPACE_DECL
|