[CI] Make cache_lit_timing_files.py Script Gracefully Fail (#162316)

This is a performance optimization and does not impact test fidelity.
There have been some flakes where this script will fail to download
files, exit with code 1, causing the job to fail before it even starts
running tests. This is undesirable as the tests will only run 10-15%
slower without this, so catch the exceptions and emit a warning we can
track later in the rare case we cannot download the timing files.

This fixes #162294.
This commit is contained in:
Aiden Grossman
2025-10-07 11:20:40 -07:00
committed by GitHub
parent c16d25282e
commit 93f2e0a443

View File

@@ -17,6 +17,7 @@ import pathlib
import glob
from google.cloud import storage
from google.api_core import exceptions
GCS_PARALLELISM = 100
@@ -50,7 +51,14 @@ def _maybe_download_timing_file(blob):
def download_timing_files(storage_client, bucket_name: str):
bucket = storage_client.bucket(bucket_name)
blobs = bucket.list_blobs(prefix="lit_timing")
try:
blobs = bucket.list_blobs(prefix="lit_timing")
except exceptions.ClientError as client_error:
print(
"::warning file=cache_lit_timing_files.py::Failed to list blobs "
"in bucket."
)
sys.exit(0)
with multiprocessing.pool.ThreadPool(GCS_PARALLELISM) as thread_pool:
futures = []
for timing_file_blob in blobs:
@@ -60,7 +68,13 @@ def download_timing_files(storage_client, bucket_name: str):
)
)
for future in futures:
future.get()
future.wait()
if not future.successful():
print(
"::warning file=cache_lit_timing_files.py::Failed to "
"download lit timing file."
)
continue
print("Done downloading")