When multiple threads are stopped at the same breakpoint, LLDB currently steps each thread over the breakpoint one at a time. Each step requires disabling the breakpoint, single-stepping one thread, and re-enabling it, resulting in N disable/enable cycles and N individual vCont packets for N threads. This is a common scenario for hot breakpoints in multithreaded programs and scales poorly. This patch batches the step-over so that all threads at the same breakpoint site are stepped together in a single vCont packet, with the breakpoint disabled once at the start and re-enabled once after the last thread finishes. At the top of WillResume, any leftover StepOverBreakpoint plans from a previous cycle are popped with their re-enable side effect suppressed via SetReenabledBreakpointSite, giving a clean slate. SetupToStepOverBreakpointIfNeeded then creates fresh plans for all threads that still need to step over a breakpoint, and these are grouped by breakpoint address. For groups with multiple threads, each plan is set to defer its re-enable through SetDeferReenableBreakpointSite. Instead of re-enabling the breakpoint directly when a plan completes, it calls ThreadFinishedSteppingOverBreakpoint, which decrements a per-address tracking count. The breakpoint is only re-enabled when the count reaches zero. All threads in the largest group are resumed together in a single batched vCont packet. If some threads don't complete their step in one cycle, the pop-and-recreate logic naturally re-batches the remaining threads on the next WillResume call. For 10 threads at the same breakpoint, this reduces the operation from 10 z0/Z0 pairs and 10 vCont packets to 1 z0 + 1 Z0 and a few progressively smaller batched vCont packets. EDIT: Tried to merge this PR twice, the first time the test was flaky so we had to revert. The second time, we broke 2 tests on windows machine: https://lab.llvm.org/buildbot/#/builders/141/builds/15798 The tests that were failing were failing because the cleanup code in `WillResume` was popping **ALL** `StepOverBreakpoint` plans, including non-deferred ones from incomplete single-steps. The issue was: 1) Multiple threads hit the same breakpoint. One thread's breakpoint condition evaluates to false, so it needs to auto-continue. 2) A `StepOverBreakpoint` plan is created for that thread (non-deferred). 3) On the next WillResume, the cleanup pops that non-deferred plan. 4) Now the `StopOthers` scan finds no thread with a StopOthers() plan, so thread_to_run stays null. 5) The else branch runs, calling `SetupToStepOverBreakpointIfNeeded` on **ALL** threads, including the thread that legitimately hit the breakpoint with a true condition. 6) That thread gets a new `StepOverBreakpoint` plan pushed, which overwrites its breakpoint stop reason with trace when the step completes. The error `trace (2) != breakpoint (3)` confirms this, the thread that should have reported breakpoint as its stop reason instead reports trace, because an unwanted `StepOverBreakpoint` plan was pushed on it and completed. The newly added code fixes it by only popping plans that have `GetDeferReenableBreakpointSite() == true` Co-authored-by: Bar Soloveychik <barsolo@fb.com>
217 lines
9.2 KiB
Python
217 lines
9.2 KiB
Python
"""
|
|
Test that when multiple threads are stopped at the same breakpoint, LLDB sends
|
|
a batched vCont with multiple step actions and only one breakpoint disable/
|
|
re-enable pair, rather than stepping each thread individually with repeated
|
|
breakpoint toggles.
|
|
|
|
Uses a mock GDB server to directly verify the packets LLDB sends.
|
|
"""
|
|
|
|
import re
|
|
|
|
import lldb
|
|
from lldbsuite.test.lldbtest import *
|
|
from lldbsuite.test.decorators import *
|
|
from lldbsuite.test.gdbclientutils import *
|
|
from lldbsuite.test.lldbgdbclient import GDBRemoteTestBase
|
|
|
|
|
|
class TestBatchedBreakpointStepOver(GDBRemoteTestBase):
|
|
@skipIfXmlSupportMissing
|
|
def test(self):
|
|
BP_ADDR = 0x0000000000401020
|
|
# PC after stepping past the breakpoint instruction.
|
|
STEPPED_PC = BP_ADDR + 1
|
|
NUM_THREADS = 10
|
|
TIDS = [0x101 + i for i in range(NUM_THREADS)]
|
|
|
|
class MyResponder(MockGDBServerResponder):
|
|
def __init__(self):
|
|
MockGDBServerResponder.__init__(self)
|
|
self.resume_count = 0
|
|
# Track which threads have completed their step.
|
|
self.stepped_threads = set()
|
|
|
|
def qSupported(self, client_supported):
|
|
return (
|
|
"PacketSize=3fff;QStartNoAckMode+;"
|
|
"qXfer:features:read+;swbreak+;hwbreak+"
|
|
)
|
|
|
|
def qfThreadInfo(self):
|
|
return "m" + ",".join("{:x}".format(t) for t in TIDS)
|
|
|
|
def qsThreadInfo(self):
|
|
return "l"
|
|
|
|
def haltReason(self):
|
|
# All threads stopped at the breakpoint address.
|
|
threads_str = ",".join("{:x}".format(t) for t in TIDS)
|
|
pcs_str = ",".join("{:x}".format(BP_ADDR) for _ in TIDS)
|
|
return "T05thread:{:x};threads:{};thread-pcs:{};" "swbreak:;".format(
|
|
TIDS[0], threads_str, pcs_str
|
|
)
|
|
|
|
def threadStopInfo(self, threadnum):
|
|
threads_str = ",".join("{:x}".format(t) for t in TIDS)
|
|
pcs_str = ",".join("{:x}".format(BP_ADDR) for _ in TIDS)
|
|
return "T05thread:{:x};threads:{};thread-pcs:{};" "swbreak:;".format(
|
|
threadnum, threads_str, pcs_str
|
|
)
|
|
|
|
def setBreakpoint(self, packet):
|
|
return "OK"
|
|
|
|
def readRegisters(self):
|
|
return "00" * 160
|
|
|
|
def readRegister(self, regno):
|
|
return "00" * 8
|
|
|
|
def qXferRead(self, obj, annex, offset, length):
|
|
if annex == "target.xml":
|
|
return (
|
|
"""<?xml version="1.0"?>
|
|
<target version="1.0">
|
|
<architecture>i386:x86-64</architecture>
|
|
<feature name="org.gnu.gdb.i386.core">
|
|
<reg name="rax" bitsize="64" regnum="0" type="int" group="general"/>
|
|
<reg name="rbx" bitsize="64" regnum="1" type="int" group="general"/>
|
|
<reg name="rcx" bitsize="64" regnum="2" type="int" group="general"/>
|
|
<reg name="rdx" bitsize="64" regnum="3" type="int" group="general"/>
|
|
<reg name="rsi" bitsize="64" regnum="4" type="int" group="general"/>
|
|
<reg name="rdi" bitsize="64" regnum="5" type="int" group="general"/>
|
|
<reg name="rbp" bitsize="64" regnum="6" type="data_ptr" group="general"/>
|
|
<reg name="rsp" bitsize="64" regnum="7" type="data_ptr" group="general"/>
|
|
<reg name="r8" bitsize="64" regnum="8" type="int" group="general"/>
|
|
<reg name="r9" bitsize="64" regnum="9" type="int" group="general"/>
|
|
<reg name="r10" bitsize="64" regnum="10" type="int" group="general"/>
|
|
<reg name="r11" bitsize="64" regnum="11" type="int" group="general"/>
|
|
<reg name="r12" bitsize="64" regnum="12" type="int" group="general"/>
|
|
<reg name="r13" bitsize="64" regnum="13" type="int" group="general"/>
|
|
<reg name="r14" bitsize="64" regnum="14" type="int" group="general"/>
|
|
<reg name="r15" bitsize="64" regnum="15" type="int" group="general"/>
|
|
<reg name="rip" bitsize="64" regnum="16" type="code_ptr" group="general"/>
|
|
<reg name="eflags" bitsize="32" regnum="17" type="int" group="general"/>
|
|
<reg name="cs" bitsize="32" regnum="18" type="int" group="general"/>
|
|
<reg name="ss" bitsize="32" regnum="19" type="int" group="general"/>
|
|
</feature>
|
|
</target>""",
|
|
False,
|
|
)
|
|
return None, False
|
|
|
|
def other(self, packet):
|
|
if packet == "vCont?":
|
|
return "vCont;c;C;s;S"
|
|
if packet.startswith("vCont;"):
|
|
return self._handle_vCont(packet)
|
|
if packet.startswith("z"):
|
|
return "OK"
|
|
return ""
|
|
|
|
def _handle_vCont(self, packet):
|
|
self.resume_count += 1
|
|
# Parse step actions from vCont.
|
|
stepping_tids = []
|
|
for action in packet[6:].split(";"):
|
|
if not action:
|
|
continue
|
|
if action.startswith("s:"):
|
|
tid_str = action[2:]
|
|
if "." in tid_str:
|
|
tid_str = tid_str.split(".")[1]
|
|
stepping_tids.append(int(tid_str, 16))
|
|
|
|
# All stepping threads complete their step.
|
|
for tid in stepping_tids:
|
|
self.stepped_threads.add(tid)
|
|
|
|
all_done = self.stepped_threads >= set(TIDS)
|
|
|
|
# Report stop, use the first stepping thread as the reporter.
|
|
report_tid = stepping_tids[0] if stepping_tids else TIDS[0]
|
|
threads_str = ",".join("{:x}".format(t) for t in TIDS)
|
|
if all_done:
|
|
# All threads moved past breakpoint.
|
|
pcs_str = ",".join("{:x}".format(STEPPED_PC) for _ in TIDS)
|
|
else:
|
|
# Stepped threads moved, others still at breakpoint.
|
|
pcs_str = ",".join(
|
|
"{:x}".format(
|
|
STEPPED_PC if t in self.stepped_threads else BP_ADDR
|
|
)
|
|
for t in TIDS
|
|
)
|
|
return "T05thread:{:x};threads:{};thread-pcs:{};".format(
|
|
report_tid, threads_str, pcs_str
|
|
)
|
|
|
|
self.server.responder = MyResponder()
|
|
self.runCmd("platform select remote-linux")
|
|
target = self.createTarget("a.yaml")
|
|
process = self.connect(target)
|
|
|
|
self.assertEqual(process.GetNumThreads(), NUM_THREADS)
|
|
|
|
# Set a breakpoint at BP_ADDR, all threads are already stopped there.
|
|
bkpt = target.BreakpointCreateByAddress(BP_ADDR)
|
|
self.assertTrue(bkpt.IsValid())
|
|
|
|
# Continue, LLDB should step all threads over the breakpoint.
|
|
process.Continue()
|
|
|
|
# Collect packets from the log.
|
|
received = self.server.responder.packetLog.get_received()
|
|
|
|
bp_addr_hex = "{:x}".format(BP_ADDR)
|
|
|
|
# Count z0 (disable) and Z0 (enable) packets for our breakpoint.
|
|
z0_packets = []
|
|
Z0_packets = []
|
|
vcont_step_packets = []
|
|
|
|
for pkt in received:
|
|
if pkt.startswith("z0,{},".format(bp_addr_hex)):
|
|
z0_packets.append(pkt)
|
|
elif pkt.startswith("Z0,{},".format(bp_addr_hex)):
|
|
Z0_packets.append(pkt)
|
|
elif pkt.startswith("vCont;"):
|
|
step_count = len(re.findall(r";s:", pkt))
|
|
if step_count > 0:
|
|
vcont_step_packets.append((step_count, pkt))
|
|
|
|
# Verify: exactly 1 breakpoint disable (z0)
|
|
self.assertEqual(
|
|
len(z0_packets),
|
|
1,
|
|
"Expected 1 z0 (disable) packet, got {}: {}".format(
|
|
len(z0_packets), z0_packets
|
|
),
|
|
)
|
|
|
|
# The initial Z0 is the breakpoint set. After stepping, there should
|
|
# be exactly 1 re-enable Z0 (total Z0 count = 2: set + re-enable).
|
|
# But we set the breakpoint via SB API, so count Z0 packets with
|
|
# our address, initial set + 1 re-enable = 2.
|
|
self.assertEqual(
|
|
len(Z0_packets),
|
|
2,
|
|
"Expected 2 Z0 packets (1 set + 1 re-enable), got {}: {}".format(
|
|
len(Z0_packets), Z0_packets
|
|
),
|
|
)
|
|
|
|
# At least one batched vCont with multiple step actions.
|
|
max_batch = max((count for count, _ in vcont_step_packets), default=0)
|
|
self.assertGreaterEqual(
|
|
max_batch,
|
|
NUM_THREADS,
|
|
"Expected a vCont with {} step actions (batched), "
|
|
"but max was {}. Packets: {}".format(
|
|
NUM_THREADS,
|
|
max_batch,
|
|
[(c, p) for c, p in vcont_step_packets],
|
|
),
|
|
)
|