Rename flaky_report to integration_test_report and add try/finally cleanup

- Rename scripts/flaky_report/ to scripts/integration_test_report/ to reflect expanded scope beyond flaky-test detection - Update workflow references in both CI files - Wrap file search integration tests in try/finally to ensure vector store cleanup runs even on test failure or timeout Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
2026-06-16 21:04:09 +08:00 · 2026-04-30 10:22:00 -07:00
parent d2de5ba1b5
commit 52589ab474
6 changed files with 55 additions and 52 deletions
@@ -4401,25 +4401,27 @@ async def test_integration_file_search() -> None:
    assert isinstance(openai_responses_client, SupportsChatGetResponse)

    file_id, vector_store = await create_vector_store(openai_responses_client)
-    # Use static method for file search tool
-    file_search_tool = OpenAIChatClient.get_file_search_tool(vector_store_ids=[vector_store.vector_store_id])
-    # Test that the client will use the file search tool
-    response = await openai_responses_client.get_response(
-        messages=[
-            Message(
-                role="user",
-                contents=["What is the weather today? Do a file search to find the answer."],
-            )
-        ],
-        options={
-            "tool_choice": "auto",
-            "tools": [file_search_tool],
-        },
-    )
+    try:
+        # Use static method for file search tool
+        file_search_tool = OpenAIChatClient.get_file_search_tool(vector_store_ids=[vector_store.vector_store_id])
+        # Test that the client will use the file search tool
+        response = await openai_responses_client.get_response(
+            messages=[
+                Message(
+                    role="user",
+                    contents=["What is the weather today? Do a file search to find the answer."],
+                )
+            ],
+            options={
+                "tool_choice": "auto",
+                "tools": [file_search_tool],
+            },
+        )

-    await delete_vector_store(openai_responses_client, file_id, vector_store.vector_store_id)
-    assert "sunny" in response.text.lower()
-    assert "75" in response.text
+        assert "sunny" in response.text.lower()
+        assert "75" in response.text
+    finally:
+        await delete_vector_store(openai_responses_client, file_id, vector_store.vector_store_id)


@pytest.mark.flaky
@@ -4431,36 +4433,37 @@ async def test_integration_streaming_file_search() -> None:
    assert isinstance(openai_responses_client, SupportsChatGetResponse)

    file_id, vector_store = await create_vector_store(openai_responses_client)
-    # Use static method for file search tool
-    file_search_tool = OpenAIChatClient.get_file_search_tool(vector_store_ids=[vector_store.vector_store_id])
-    # Test that the client will use the file search tool
-    response = openai_responses_client.get_response(
-        messages=[
-            Message(
-                role="user",
-                contents=["What is the weather today? Do a file search to find the answer."],
-            )
-        ],
-        stream=True,
-        options={
-            "tool_choice": "auto",
-            "tools": [file_search_tool],
-        },
-    )
+    try:
+        # Use static method for file search tool
+        file_search_tool = OpenAIChatClient.get_file_search_tool(vector_store_ids=[vector_store.vector_store_id])
+        # Test that the client will use the file search tool
+        response = openai_responses_client.get_response(
+            messages=[
+                Message(
+                    role="user",
+                    contents=["What is the weather today? Do a file search to find the answer."],
+                )
+            ],
+            stream=True,
+            options={
+                "tool_choice": "auto",
+                "tools": [file_search_tool],
+            },
+        )

-    assert response is not None
-    full_message: str = ""
-    async for chunk in response:
-        assert chunk is not None
-        assert isinstance(chunk, ChatResponseUpdate)
-        for content in chunk.contents:
-            if content.type == "text" and content.text:
-                full_message += content.text
+        assert response is not None
+        full_message: str = ""
+        async for chunk in response:
+            assert chunk is not None
+            assert isinstance(chunk, ChatResponseUpdate)
+            for content in chunk.contents:
+                if content.type == "text" and content.text:
+                    full_message += content.text

-    await delete_vector_store(openai_responses_client, file_id, vector_store.vector_store_id)
-
-    assert "sunny" in full_message.lower()
-    assert "75" in full_message
+        assert "sunny" in full_message.lower()
+        assert "75" in full_message
+    finally:
+        await delete_vector_store(openai_responses_client, file_id, vector_store.vector_store_id)


@pytest.mark.flaky
@@ -7,5 +7,5 @@ them with historical data, and generates a markdown trend report showing
 per-test status across the last N runs.

 Usage:
-    uv run python -m scripts.flaky_report <reports-dir> <history-file> <output-file>
+    uv run python -m scripts.integration_test_report <reports-dir> <history-file> <output-file>
 """
@@ -3,10 +3,10 @@
 """CLI entry point for the integration test report tool.

 Usage:
-    uv run python -m scripts.flaky_report <reports-dir> <history-file> <output-file>
+    uv run python -m scripts.integration_test_report <reports-dir> <history-file> <output-file>

 Example (from python/ directory):
-    uv run python -m scripts.flaky_report \\
+    uv run python -m scripts.integration_test_report \\
        ../test-results/ \\
        integration-report-history.json \\
        integration-test-report.md
@@ -14,7 +14,7 @@ Example (from python/ directory):

 import sys

-from scripts.flaky_report.aggregate import main
+from scripts.integration_test_report.aggregate import main

 if __name__ == "__main__":
    sys.exit(main())