From 52589ab474eeca4ca52d81ec1695c0f6572cd439 Mon Sep 17 00:00:00 2001
From: Giles Odigwe <gilesodigwe@microsoft.com>
Date: Thu, 30 Apr 2026 10:22:00 -0700
Subject: [PATCH] Rename flaky_report to integration_test_report and add
 try/finally cleanup

- Rename scripts/flaky_report/ to scripts/integration_test_report/ to
  reflect expanded scope beyond flaky-test detection
- Update workflow references in both CI files
- Wrap file search integration tests in try/finally to ensure vector
  store cleanup runs even on test failure or timeout

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 .../workflows/python-integration-tests.yml    |  2 +-
 .github/workflows/python-merge-tests.yml      |  2 +-
 .../tests/openai/test_openai_chat_client.py   | 95 ++++++++++---------
 .../__init__.py                               |  2 +-
 .../__main__.py                               |  6 +-
 .../aggregate.py                              |  0
 6 files changed, 55 insertions(+), 52 deletions(-)
 rename python/scripts/{flaky_report => integration_test_report}/__init__.py (76%)
 rename python/scripts/{flaky_report => integration_test_report}/__main__.py (60%)
 rename python/scripts/{flaky_report => integration_test_report}/aggregate.py (100%)

diff --git a/.github/workflows/python-integration-tests.yml b/.github/workflows/python-integration-tests.yml
index 8c74f21130..9887b667a9 100644
--- a/.github/workflows/python-integration-tests.yml
+++ b/.github/workflows/python-integration-tests.yml
@@ -513,7 +513,7 @@ jobs:
             integration-report-history-integration-
       - name: Generate trend report
         run: >
-          uv run python scripts/flaky_report/aggregate.py
+          uv run python scripts/integration_test_report/aggregate.py
           ../test-results/
           integration-report-history.json
           integration-test-report.md
diff --git a/.github/workflows/python-merge-tests.yml b/.github/workflows/python-merge-tests.yml
index 1ad5019951..2e6851e476 100644
--- a/.github/workflows/python-merge-tests.yml
+++ b/.github/workflows/python-merge-tests.yml
@@ -694,7 +694,7 @@ jobs:
             integration-report-history-merge-
       - name: Generate trend report
         run: >
-          uv run python scripts/flaky_report/aggregate.py
+          uv run python scripts/integration_test_report/aggregate.py
           ../test-results/
           integration-report-history.json
           integration-test-report.md
diff --git a/python/packages/openai/tests/openai/test_openai_chat_client.py b/python/packages/openai/tests/openai/test_openai_chat_client.py
index 5c9b2b0438..45f41690ff 100644
--- a/python/packages/openai/tests/openai/test_openai_chat_client.py
+++ b/python/packages/openai/tests/openai/test_openai_chat_client.py
@@ -4401,25 +4401,27 @@ async def test_integration_file_search() -> None:
     assert isinstance(openai_responses_client, SupportsChatGetResponse)
 
     file_id, vector_store = await create_vector_store(openai_responses_client)
-    # Use static method for file search tool
-    file_search_tool = OpenAIChatClient.get_file_search_tool(vector_store_ids=[vector_store.vector_store_id])
-    # Test that the client will use the file search tool
-    response = await openai_responses_client.get_response(
-        messages=[
-            Message(
-                role="user",
-                contents=["What is the weather today? Do a file search to find the answer."],
-            )
-        ],
-        options={
-            "tool_choice": "auto",
-            "tools": [file_search_tool],
-        },
-    )
+    try:
+        # Use static method for file search tool
+        file_search_tool = OpenAIChatClient.get_file_search_tool(vector_store_ids=[vector_store.vector_store_id])
+        # Test that the client will use the file search tool
+        response = await openai_responses_client.get_response(
+            messages=[
+                Message(
+                    role="user",
+                    contents=["What is the weather today? Do a file search to find the answer."],
+                )
+            ],
+            options={
+                "tool_choice": "auto",
+                "tools": [file_search_tool],
+            },
+        )
 
-    await delete_vector_store(openai_responses_client, file_id, vector_store.vector_store_id)
-    assert "sunny" in response.text.lower()
-    assert "75" in response.text
+        assert "sunny" in response.text.lower()
+        assert "75" in response.text
+    finally:
+        await delete_vector_store(openai_responses_client, file_id, vector_store.vector_store_id)
 
 
 @pytest.mark.flaky
@@ -4431,36 +4433,37 @@ async def test_integration_streaming_file_search() -> None:
     assert isinstance(openai_responses_client, SupportsChatGetResponse)
 
     file_id, vector_store = await create_vector_store(openai_responses_client)
-    # Use static method for file search tool
-    file_search_tool = OpenAIChatClient.get_file_search_tool(vector_store_ids=[vector_store.vector_store_id])
-    # Test that the client will use the file search tool
-    response = openai_responses_client.get_response(
-        messages=[
-            Message(
-                role="user",
-                contents=["What is the weather today? Do a file search to find the answer."],
-            )
-        ],
-        stream=True,
-        options={
-            "tool_choice": "auto",
-            "tools": [file_search_tool],
-        },
-    )
+    try:
+        # Use static method for file search tool
+        file_search_tool = OpenAIChatClient.get_file_search_tool(vector_store_ids=[vector_store.vector_store_id])
+        # Test that the client will use the file search tool
+        response = openai_responses_client.get_response(
+            messages=[
+                Message(
+                    role="user",
+                    contents=["What is the weather today? Do a file search to find the answer."],
+                )
+            ],
+            stream=True,
+            options={
+                "tool_choice": "auto",
+                "tools": [file_search_tool],
+            },
+        )
 
-    assert response is not None
-    full_message: str = ""
-    async for chunk in response:
-        assert chunk is not None
-        assert isinstance(chunk, ChatResponseUpdate)
-        for content in chunk.contents:
-            if content.type == "text" and content.text:
-                full_message += content.text
+        assert response is not None
+        full_message: str = ""
+        async for chunk in response:
+            assert chunk is not None
+            assert isinstance(chunk, ChatResponseUpdate)
+            for content in chunk.contents:
+                if content.type == "text" and content.text:
+                    full_message += content.text
 
-    await delete_vector_store(openai_responses_client, file_id, vector_store.vector_store_id)
-
-    assert "sunny" in full_message.lower()
-    assert "75" in full_message
+        assert "sunny" in full_message.lower()
+        assert "75" in full_message
+    finally:
+        await delete_vector_store(openai_responses_client, file_id, vector_store.vector_store_id)
 
 
 @pytest.mark.flaky
diff --git a/python/scripts/flaky_report/__init__.py b/python/scripts/integration_test_report/__init__.py
similarity index 76%
rename from python/scripts/flaky_report/__init__.py
rename to python/scripts/integration_test_report/__init__.py
index e3b0cc6de7..96532f80a2 100644
--- a/python/scripts/flaky_report/__init__.py
+++ b/python/scripts/integration_test_report/__init__.py
@@ -7,5 +7,5 @@ them with historical data, and generates a markdown trend report showing
 per-test status across the last N runs.
 
 Usage:
-    uv run python -m scripts.flaky_report <reports-dir> <history-file> <output-file>
+    uv run python -m scripts.integration_test_report <reports-dir> <history-file> <output-file>
 """
diff --git a/python/scripts/flaky_report/__main__.py b/python/scripts/integration_test_report/__main__.py
similarity index 60%
rename from python/scripts/flaky_report/__main__.py
rename to python/scripts/integration_test_report/__main__.py
index c944e135f8..64fed55605 100644
--- a/python/scripts/flaky_report/__main__.py
+++ b/python/scripts/integration_test_report/__main__.py
@@ -3,10 +3,10 @@
 """CLI entry point for the integration test report tool.
 
 Usage:
-    uv run python -m scripts.flaky_report <reports-dir> <history-file> <output-file>
+    uv run python -m scripts.integration_test_report <reports-dir> <history-file> <output-file>
 
 Example (from python/ directory):
-    uv run python -m scripts.flaky_report \\
+    uv run python -m scripts.integration_test_report \\
         ../test-results/ \\
         integration-report-history.json \\
         integration-test-report.md
@@ -14,7 +14,7 @@ Example (from python/ directory):
 
 import sys
 
-from scripts.flaky_report.aggregate import main
+from scripts.integration_test_report.aggregate import main
 
 if __name__ == "__main__":
     sys.exit(main())
diff --git a/python/scripts/flaky_report/aggregate.py b/python/scripts/integration_test_report/aggregate.py
similarity index 100%
rename from python/scripts/flaky_report/aggregate.py
rename to python/scripts/integration_test_report/aggregate.py