From 52589ab474eeca4ca52d81ec1695c0f6572cd439 Mon Sep 17 00:00:00 2001 From: Giles Odigwe Date: Thu, 30 Apr 2026 10:22:00 -0700 Subject: [PATCH] Rename flaky_report to integration_test_report and add try/finally cleanup - Rename scripts/flaky_report/ to scripts/integration_test_report/ to reflect expanded scope beyond flaky-test detection - Update workflow references in both CI files - Wrap file search integration tests in try/finally to ensure vector store cleanup runs even on test failure or timeout Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../workflows/python-integration-tests.yml | 2 +- .github/workflows/python-merge-tests.yml | 2 +- .../tests/openai/test_openai_chat_client.py | 95 ++++++++++--------- .../__init__.py | 2 +- .../__main__.py | 6 +- .../aggregate.py | 0 6 files changed, 55 insertions(+), 52 deletions(-) rename python/scripts/{flaky_report => integration_test_report}/__init__.py (76%) rename python/scripts/{flaky_report => integration_test_report}/__main__.py (60%) rename python/scripts/{flaky_report => integration_test_report}/aggregate.py (100%) diff --git a/.github/workflows/python-integration-tests.yml b/.github/workflows/python-integration-tests.yml index 8c74f21130..9887b667a9 100644 --- a/.github/workflows/python-integration-tests.yml +++ b/.github/workflows/python-integration-tests.yml @@ -513,7 +513,7 @@ jobs: integration-report-history-integration- - name: Generate trend report run: > - uv run python scripts/flaky_report/aggregate.py + uv run python scripts/integration_test_report/aggregate.py ../test-results/ integration-report-history.json integration-test-report.md diff --git a/.github/workflows/python-merge-tests.yml b/.github/workflows/python-merge-tests.yml index 1ad5019951..2e6851e476 100644 --- a/.github/workflows/python-merge-tests.yml +++ b/.github/workflows/python-merge-tests.yml @@ -694,7 +694,7 @@ jobs: integration-report-history-merge- - name: Generate trend report run: > - uv run python scripts/flaky_report/aggregate.py + uv run python scripts/integration_test_report/aggregate.py ../test-results/ integration-report-history.json integration-test-report.md diff --git a/python/packages/openai/tests/openai/test_openai_chat_client.py b/python/packages/openai/tests/openai/test_openai_chat_client.py index 5c9b2b0438..45f41690ff 100644 --- a/python/packages/openai/tests/openai/test_openai_chat_client.py +++ b/python/packages/openai/tests/openai/test_openai_chat_client.py @@ -4401,25 +4401,27 @@ async def test_integration_file_search() -> None: assert isinstance(openai_responses_client, SupportsChatGetResponse) file_id, vector_store = await create_vector_store(openai_responses_client) - # Use static method for file search tool - file_search_tool = OpenAIChatClient.get_file_search_tool(vector_store_ids=[vector_store.vector_store_id]) - # Test that the client will use the file search tool - response = await openai_responses_client.get_response( - messages=[ - Message( - role="user", - contents=["What is the weather today? Do a file search to find the answer."], - ) - ], - options={ - "tool_choice": "auto", - "tools": [file_search_tool], - }, - ) + try: + # Use static method for file search tool + file_search_tool = OpenAIChatClient.get_file_search_tool(vector_store_ids=[vector_store.vector_store_id]) + # Test that the client will use the file search tool + response = await openai_responses_client.get_response( + messages=[ + Message( + role="user", + contents=["What is the weather today? Do a file search to find the answer."], + ) + ], + options={ + "tool_choice": "auto", + "tools": [file_search_tool], + }, + ) - await delete_vector_store(openai_responses_client, file_id, vector_store.vector_store_id) - assert "sunny" in response.text.lower() - assert "75" in response.text + assert "sunny" in response.text.lower() + assert "75" in response.text + finally: + await delete_vector_store(openai_responses_client, file_id, vector_store.vector_store_id) @pytest.mark.flaky @@ -4431,36 +4433,37 @@ async def test_integration_streaming_file_search() -> None: assert isinstance(openai_responses_client, SupportsChatGetResponse) file_id, vector_store = await create_vector_store(openai_responses_client) - # Use static method for file search tool - file_search_tool = OpenAIChatClient.get_file_search_tool(vector_store_ids=[vector_store.vector_store_id]) - # Test that the client will use the file search tool - response = openai_responses_client.get_response( - messages=[ - Message( - role="user", - contents=["What is the weather today? Do a file search to find the answer."], - ) - ], - stream=True, - options={ - "tool_choice": "auto", - "tools": [file_search_tool], - }, - ) + try: + # Use static method for file search tool + file_search_tool = OpenAIChatClient.get_file_search_tool(vector_store_ids=[vector_store.vector_store_id]) + # Test that the client will use the file search tool + response = openai_responses_client.get_response( + messages=[ + Message( + role="user", + contents=["What is the weather today? Do a file search to find the answer."], + ) + ], + stream=True, + options={ + "tool_choice": "auto", + "tools": [file_search_tool], + }, + ) - assert response is not None - full_message: str = "" - async for chunk in response: - assert chunk is not None - assert isinstance(chunk, ChatResponseUpdate) - for content in chunk.contents: - if content.type == "text" and content.text: - full_message += content.text + assert response is not None + full_message: str = "" + async for chunk in response: + assert chunk is not None + assert isinstance(chunk, ChatResponseUpdate) + for content in chunk.contents: + if content.type == "text" and content.text: + full_message += content.text - await delete_vector_store(openai_responses_client, file_id, vector_store.vector_store_id) - - assert "sunny" in full_message.lower() - assert "75" in full_message + assert "sunny" in full_message.lower() + assert "75" in full_message + finally: + await delete_vector_store(openai_responses_client, file_id, vector_store.vector_store_id) @pytest.mark.flaky diff --git a/python/scripts/flaky_report/__init__.py b/python/scripts/integration_test_report/__init__.py similarity index 76% rename from python/scripts/flaky_report/__init__.py rename to python/scripts/integration_test_report/__init__.py index e3b0cc6de7..96532f80a2 100644 --- a/python/scripts/flaky_report/__init__.py +++ b/python/scripts/integration_test_report/__init__.py @@ -7,5 +7,5 @@ them with historical data, and generates a markdown trend report showing per-test status across the last N runs. Usage: - uv run python -m scripts.flaky_report + uv run python -m scripts.integration_test_report """ diff --git a/python/scripts/flaky_report/__main__.py b/python/scripts/integration_test_report/__main__.py similarity index 60% rename from python/scripts/flaky_report/__main__.py rename to python/scripts/integration_test_report/__main__.py index c944e135f8..64fed55605 100644 --- a/python/scripts/flaky_report/__main__.py +++ b/python/scripts/integration_test_report/__main__.py @@ -3,10 +3,10 @@ """CLI entry point for the integration test report tool. Usage: - uv run python -m scripts.flaky_report + uv run python -m scripts.integration_test_report Example (from python/ directory): - uv run python -m scripts.flaky_report \\ + uv run python -m scripts.integration_test_report \\ ../test-results/ \\ integration-report-history.json \\ integration-test-report.md @@ -14,7 +14,7 @@ Example (from python/ directory): import sys -from scripts.flaky_report.aggregate import main +from scripts.integration_test_report.aggregate import main if __name__ == "__main__": sys.exit(main()) diff --git a/python/scripts/flaky_report/aggregate.py b/python/scripts/integration_test_report/aggregate.py similarity index 100% rename from python/scripts/flaky_report/aggregate.py rename to python/scripts/integration_test_report/aggregate.py