Rename flaky_report to integration_test_report and add try/finally cleanup

- Rename scripts/flaky_report/ to scripts/integration_test_report/ to
  reflect expanded scope beyond flaky-test detection
- Update workflow references in both CI files
- Wrap file search integration tests in try/finally to ensure vector
  store cleanup runs even on test failure or timeout

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
Giles Odigwe
2026-04-30 10:22:00 -07:00
Unverified
parent d2de5ba1b5
commit 52589ab474
6 changed files with 55 additions and 52 deletions
@@ -4401,25 +4401,27 @@ async def test_integration_file_search() -> None:
assert isinstance(openai_responses_client, SupportsChatGetResponse)
file_id, vector_store = await create_vector_store(openai_responses_client)
# Use static method for file search tool
file_search_tool = OpenAIChatClient.get_file_search_tool(vector_store_ids=[vector_store.vector_store_id])
# Test that the client will use the file search tool
response = await openai_responses_client.get_response(
messages=[
Message(
role="user",
contents=["What is the weather today? Do a file search to find the answer."],
)
],
options={
"tool_choice": "auto",
"tools": [file_search_tool],
},
)
try:
# Use static method for file search tool
file_search_tool = OpenAIChatClient.get_file_search_tool(vector_store_ids=[vector_store.vector_store_id])
# Test that the client will use the file search tool
response = await openai_responses_client.get_response(
messages=[
Message(
role="user",
contents=["What is the weather today? Do a file search to find the answer."],
)
],
options={
"tool_choice": "auto",
"tools": [file_search_tool],
},
)
await delete_vector_store(openai_responses_client, file_id, vector_store.vector_store_id)
assert "sunny" in response.text.lower()
assert "75" in response.text
assert "sunny" in response.text.lower()
assert "75" in response.text
finally:
await delete_vector_store(openai_responses_client, file_id, vector_store.vector_store_id)
@pytest.mark.flaky
@@ -4431,36 +4433,37 @@ async def test_integration_streaming_file_search() -> None:
assert isinstance(openai_responses_client, SupportsChatGetResponse)
file_id, vector_store = await create_vector_store(openai_responses_client)
# Use static method for file search tool
file_search_tool = OpenAIChatClient.get_file_search_tool(vector_store_ids=[vector_store.vector_store_id])
# Test that the client will use the file search tool
response = openai_responses_client.get_response(
messages=[
Message(
role="user",
contents=["What is the weather today? Do a file search to find the answer."],
)
],
stream=True,
options={
"tool_choice": "auto",
"tools": [file_search_tool],
},
)
try:
# Use static method for file search tool
file_search_tool = OpenAIChatClient.get_file_search_tool(vector_store_ids=[vector_store.vector_store_id])
# Test that the client will use the file search tool
response = openai_responses_client.get_response(
messages=[
Message(
role="user",
contents=["What is the weather today? Do a file search to find the answer."],
)
],
stream=True,
options={
"tool_choice": "auto",
"tools": [file_search_tool],
},
)
assert response is not None
full_message: str = ""
async for chunk in response:
assert chunk is not None
assert isinstance(chunk, ChatResponseUpdate)
for content in chunk.contents:
if content.type == "text" and content.text:
full_message += content.text
assert response is not None
full_message: str = ""
async for chunk in response:
assert chunk is not None
assert isinstance(chunk, ChatResponseUpdate)
for content in chunk.contents:
if content.type == "text" and content.text:
full_message += content.text
await delete_vector_store(openai_responses_client, file_id, vector_store.vector_store_id)
assert "sunny" in full_message.lower()
assert "75" in full_message
assert "sunny" in full_message.lower()
assert "75" in full_message
finally:
await delete_vector_store(openai_responses_client, file_id, vector_store.vector_store_id)
@pytest.mark.flaky
@@ -7,5 +7,5 @@ them with historical data, and generates a markdown trend report showing
per-test status across the last N runs.
Usage:
uv run python -m scripts.flaky_report <reports-dir> <history-file> <output-file>
uv run python -m scripts.integration_test_report <reports-dir> <history-file> <output-file>
"""
@@ -3,10 +3,10 @@
"""CLI entry point for the integration test report tool.
Usage:
uv run python -m scripts.flaky_report <reports-dir> <history-file> <output-file>
uv run python -m scripts.integration_test_report <reports-dir> <history-file> <output-file>
Example (from python/ directory):
uv run python -m scripts.flaky_report \\
uv run python -m scripts.integration_test_report \\
../test-results/ \\
integration-report-history.json \\
integration-test-report.md
@@ -14,7 +14,7 @@ Example (from python/ directory):
import sys
from scripts.flaky_report.aggregate import main
from scripts.integration_test_report.aggregate import main
if __name__ == "__main__":
sys.exit(main())