mirror of
https://github.com/microsoft/agent-framework.git
synced 2026-06-16 21:04:09 +08:00
Python: Enable Agentic Mode to Use Existing Knowledge Bases Without index_name (#2464)
* refactor KB for index creation logic * add user agent header for tracking * fix mypy issues --------- Co-authored-by: farzad528 <farzad528@users.noreply.github.com>
This commit is contained in:
committed by
GitHub
Unverified
parent
6232dd8305
commit
b5595f6f70
+118
-44
@@ -5,7 +5,7 @@ import sys
|
||||
from collections.abc import Awaitable, Callable, MutableSequence
|
||||
from typing import TYPE_CHECKING, Any, ClassVar, Literal
|
||||
|
||||
from agent_framework import ChatMessage, Context, ContextProvider, Role
|
||||
from agent_framework import AGENT_FRAMEWORK_USER_AGENT, ChatMessage, Context, ContextProvider, Role
|
||||
from agent_framework._logging import get_logger
|
||||
from agent_framework._pydantic import AFBaseSettings
|
||||
from agent_framework.exceptions import ServiceInitializationError
|
||||
@@ -129,6 +129,8 @@ class AzureAISearchSettings(AFBaseSettings):
|
||||
Can be set via environment variable AZURE_SEARCH_ENDPOINT.
|
||||
index_name: Name of the search index.
|
||||
Can be set via environment variable AZURE_SEARCH_INDEX_NAME.
|
||||
knowledge_base_name: Name of an existing Knowledge Base (for agentic mode).
|
||||
Can be set via environment variable AZURE_SEARCH_KNOWLEDGE_BASE_NAME.
|
||||
api_key: API key for authentication (optional, use managed identity if not provided).
|
||||
Can be set via environment variable AZURE_SEARCH_API_KEY.
|
||||
env_file_path: If provided, the .env settings are read from this file path location.
|
||||
@@ -158,6 +160,7 @@ class AzureAISearchSettings(AFBaseSettings):
|
||||
|
||||
endpoint: str | None = None
|
||||
index_name: str | None = None
|
||||
knowledge_base_name: str | None = None
|
||||
api_key: SecretStr | None = None
|
||||
|
||||
|
||||
@@ -239,7 +242,6 @@ class AzureAISearchContextProvider(ContextProvider):
|
||||
embedding_function: Callable[[str], Awaitable[list[float]]] | None = None,
|
||||
context_prompt: str | None = None,
|
||||
# Agentic mode parameters (Knowledge Base)
|
||||
azure_ai_project_endpoint: str | None = None,
|
||||
azure_openai_resource_url: str | None = None,
|
||||
model_deployment_name: str | None = None,
|
||||
model_name: str | None = None,
|
||||
@@ -277,22 +279,18 @@ class AzureAISearchContextProvider(ContextProvider):
|
||||
Required if vector_field_name is specified and no server-side vectorization.
|
||||
context_prompt: Custom prompt to prepend to retrieved context.
|
||||
Default: "Use the following context to answer the question:"
|
||||
azure_ai_project_endpoint: Azure AI Foundry project endpoint URL.
|
||||
This is NOT the same as azure_openai_resource_url - the project endpoint is used
|
||||
for Azure AI Foundry services, while the OpenAI endpoint is used by the Knowledge
|
||||
Base to call the model for query planning. Required for agentic mode.
|
||||
Example: "https://myproject.services.ai.azure.com/api/projects/myproject"
|
||||
azure_openai_resource_url: Azure OpenAI resource URL for Knowledge Base model calls.
|
||||
This is the OpenAI endpoint used by the Knowledge Base to call the LLM for
|
||||
query planning and reasoning. This is separate from the project endpoint because
|
||||
the Knowledge Base directly calls Azure OpenAI for its internal operations.
|
||||
Required for agentic mode. Example: "https://myresource.openai.azure.com"
|
||||
Required when using agentic mode with index_name (to auto-create Knowledge Base).
|
||||
Not required when using an existing knowledge_base_name.
|
||||
Example: "https://myresource.openai.azure.com"
|
||||
model_deployment_name: Model deployment name in Azure OpenAI for Knowledge Base.
|
||||
This is the deployment name the Knowledge Base uses to call the LLM.
|
||||
Required for agentic mode.
|
||||
Required when using agentic mode with index_name (to auto-create Knowledge Base).
|
||||
Not required when using an existing knowledge_base_name.
|
||||
model_name: The underlying model name (e.g., "gpt-4o", "gpt-4o-mini").
|
||||
If not provided, defaults to model_deployment_name. Used for Knowledge Base configuration.
|
||||
knowledge_base_name: Name for the Knowledge Base. Required for agentic mode.
|
||||
knowledge_base_name: Name of an existing Knowledge Base to use.
|
||||
Required for agentic mode if not providing index_name.
|
||||
Supports KBs with any source type (web, blob, index, etc.).
|
||||
retrieval_instructions: Custom instructions for the Knowledge Base's
|
||||
retrieval planning. Only used in agentic mode.
|
||||
azure_openai_api_key: Azure OpenAI API key for Knowledge Base to call the model.
|
||||
@@ -340,6 +338,7 @@ class AzureAISearchContextProvider(ContextProvider):
|
||||
settings = AzureAISearchSettings(
|
||||
endpoint=endpoint,
|
||||
index_name=index_name,
|
||||
knowledge_base_name=knowledge_base_name,
|
||||
api_key=api_key if isinstance(api_key, str) else None,
|
||||
env_file_path=env_file_path,
|
||||
env_file_encoding=env_file_encoding,
|
||||
@@ -353,11 +352,36 @@ class AzureAISearchContextProvider(ContextProvider):
|
||||
"Azure AI Search endpoint is required. Set via 'endpoint' parameter "
|
||||
"or 'AZURE_SEARCH_ENDPOINT' environment variable."
|
||||
)
|
||||
if not settings.index_name:
|
||||
raise ServiceInitializationError(
|
||||
"Azure AI Search index name is required. Set via 'index_name' parameter "
|
||||
"or 'AZURE_SEARCH_INDEX_NAME' environment variable."
|
||||
)
|
||||
|
||||
# Validate index_name and knowledge_base_name based on mode
|
||||
# Note: settings.* contains the resolved value (explicit param OR env var)
|
||||
if mode == "semantic":
|
||||
# Semantic mode: always requires index_name
|
||||
if not settings.index_name:
|
||||
raise ServiceInitializationError(
|
||||
"Azure AI Search index name is required for semantic mode. "
|
||||
"Set via 'index_name' parameter or 'AZURE_SEARCH_INDEX_NAME' environment variable."
|
||||
)
|
||||
elif mode == "agentic":
|
||||
# Agentic mode: requires exactly ONE of index_name or knowledge_base_name
|
||||
if settings.index_name and settings.knowledge_base_name:
|
||||
raise ServiceInitializationError(
|
||||
"For agentic mode, provide either 'index_name' OR 'knowledge_base_name', not both. "
|
||||
"Use 'index_name' to auto-create a Knowledge Base, or 'knowledge_base_name' to use an existing one."
|
||||
)
|
||||
if not settings.index_name and not settings.knowledge_base_name:
|
||||
raise ServiceInitializationError(
|
||||
"For agentic mode, provide either 'index_name' (to auto-create Knowledge Base) "
|
||||
"or 'knowledge_base_name' (to use existing Knowledge Base). "
|
||||
"Set via parameters or environment variables "
|
||||
"AZURE_SEARCH_INDEX_NAME / AZURE_SEARCH_KNOWLEDGE_BASE_NAME."
|
||||
)
|
||||
# If using index_name to create KB, model config is required
|
||||
if settings.index_name and not model_deployment_name:
|
||||
raise ServiceInitializationError(
|
||||
"model_deployment_name is required for agentic mode when creating Knowledge Base from index. "
|
||||
"This is the Azure OpenAI deployment used by the Knowledge Base for query planning."
|
||||
)
|
||||
|
||||
# Determine the credential to use
|
||||
resolved_credential: AzureKeyCredential | AsyncTokenCredential
|
||||
@@ -389,14 +413,27 @@ class AzureAISearchContextProvider(ContextProvider):
|
||||
self.azure_openai_deployment_name = model_deployment_name
|
||||
# If model_name not provided, default to deployment name
|
||||
self.model_name = model_name or model_deployment_name
|
||||
self.knowledge_base_name = knowledge_base_name
|
||||
# Use resolved KB name (from explicit param or env var)
|
||||
self.knowledge_base_name = settings.knowledge_base_name
|
||||
self.retrieval_instructions = retrieval_instructions
|
||||
self.azure_openai_api_key = azure_openai_api_key
|
||||
self.azure_ai_project_endpoint = azure_ai_project_endpoint
|
||||
self.knowledge_base_output_mode = knowledge_base_output_mode
|
||||
self.retrieval_reasoning_effort = retrieval_reasoning_effort
|
||||
self.agentic_message_history_count = agentic_message_history_count
|
||||
|
||||
# Determine if using existing Knowledge Base or auto-creating from index
|
||||
# Since validation ensures exactly one of index_name/knowledge_base_name for agentic mode:
|
||||
# - knowledge_base_name provided: use existing KB
|
||||
# - index_name provided: auto-create KB from index
|
||||
self._use_existing_knowledge_base = False
|
||||
if mode == "agentic":
|
||||
if settings.knowledge_base_name:
|
||||
# Use existing KB directly (supports any source type: web, blob, index, etc.)
|
||||
self._use_existing_knowledge_base = True
|
||||
else:
|
||||
# Auto-generate KB name from index name
|
||||
self.knowledge_base_name = f"{settings.index_name}-kb"
|
||||
|
||||
# Auto-discover vector field if not specified
|
||||
self._auto_discovered_vector_field = False
|
||||
self._use_vectorizable_query = False # Will be set to True if server-side vectorization detected
|
||||
@@ -415,22 +452,24 @@ class AzureAISearchContextProvider(ContextProvider):
|
||||
"Agentic retrieval requires azure-search-documents >= 11.7.0b1 with Knowledge Base support. "
|
||||
"Please upgrade: pip install azure-search-documents>=11.7.0b1"
|
||||
)
|
||||
if not self.azure_openai_resource_url:
|
||||
# Only require OpenAI resource URL if NOT using existing KB
|
||||
# (existing KB already has its model configuration)
|
||||
# Note: model_deployment_name is already validated at initialization
|
||||
if not self._use_existing_knowledge_base and not self.azure_openai_resource_url:
|
||||
raise ValueError(
|
||||
"azure_openai_resource_url is required for agentic mode. "
|
||||
"azure_openai_resource_url is required for agentic mode when creating Knowledge Base from index. "
|
||||
"This should be your Azure OpenAI endpoint (e.g., 'https://myresource.openai.azure.com')"
|
||||
)
|
||||
if not self.azure_openai_deployment_name:
|
||||
raise ValueError("model_deployment_name is required for agentic mode")
|
||||
if not knowledge_base_name:
|
||||
raise ValueError("knowledge_base_name is required for agentic mode")
|
||||
|
||||
# Create search client for semantic mode
|
||||
self._search_client = SearchClient(
|
||||
endpoint=self.endpoint,
|
||||
index_name=self.index_name,
|
||||
credential=self.credential,
|
||||
)
|
||||
# Create search client for semantic mode (only if index_name is available)
|
||||
self._search_client: SearchClient | None = None
|
||||
if self.index_name:
|
||||
self._search_client = SearchClient(
|
||||
endpoint=self.endpoint,
|
||||
index_name=self.index_name,
|
||||
credential=self.credential,
|
||||
user_agent=AGENT_FRAMEWORK_USER_AGENT,
|
||||
)
|
||||
|
||||
# Create index client and retrieval client for agentic mode (Knowledge Base)
|
||||
self._index_client: SearchIndexClient | None = None
|
||||
@@ -439,6 +478,7 @@ class AzureAISearchContextProvider(ContextProvider):
|
||||
self._index_client = SearchIndexClient(
|
||||
endpoint=self.endpoint,
|
||||
credential=self.credential,
|
||||
user_agent=AGENT_FRAMEWORK_USER_AGENT,
|
||||
)
|
||||
# Retrieval client will be created after Knowledge Base initialization
|
||||
|
||||
@@ -574,10 +614,19 @@ class AzureAISearchContextProvider(ContextProvider):
|
||||
try:
|
||||
# Use existing index client or create temporary one
|
||||
if not self._index_client:
|
||||
self._index_client = SearchIndexClient(endpoint=self.endpoint, credential=self.credential)
|
||||
self._index_client = SearchIndexClient(
|
||||
endpoint=self.endpoint,
|
||||
credential=self.credential,
|
||||
user_agent=AGENT_FRAMEWORK_USER_AGENT,
|
||||
)
|
||||
index_client = self._index_client
|
||||
|
||||
# Get index schema
|
||||
# Get index schema (index_name is guaranteed to be set for semantic mode)
|
||||
if not self.index_name:
|
||||
logger.warning("Cannot auto-discover vector field: index_name is not set.")
|
||||
self._auto_discovered_vector_field = True
|
||||
return
|
||||
|
||||
index = await index_client.get_index(self.index_name)
|
||||
|
||||
# Step 1: Find all vector fields
|
||||
@@ -694,7 +743,10 @@ class AzureAISearchContextProvider(ContextProvider):
|
||||
search_params["semantic_configuration_name"] = self.semantic_configuration_name
|
||||
search_params["query_caption"] = QueryCaptionType.EXTRACTIVE
|
||||
|
||||
# Execute search
|
||||
# Execute search (search client is guaranteed to exist for semantic mode)
|
||||
if not self._search_client:
|
||||
raise RuntimeError("Search client is not initialized. This should not happen in semantic mode.")
|
||||
|
||||
results = await self._search_client.search(**search_params) # type: ignore[reportUnknownVariableType]
|
||||
|
||||
# Format results with citations
|
||||
@@ -711,27 +763,48 @@ class AzureAISearchContextProvider(ContextProvider):
|
||||
return formatted_results
|
||||
|
||||
async def _ensure_knowledge_base(self) -> None:
|
||||
"""Ensure Knowledge Base and knowledge source are created.
|
||||
"""Ensure Knowledge Base and knowledge source are created or use existing KB.
|
||||
|
||||
This method is idempotent - it will only create resources if they don't exist.
|
||||
|
||||
Note: Azure SDK uses KnowledgeAgent classes internally, but the feature
|
||||
is marketed as "Knowledge Bases" in Azure AI Search.
|
||||
"""
|
||||
if self._knowledge_base_initialized or not self._index_client:
|
||||
if self._knowledge_base_initialized:
|
||||
return
|
||||
|
||||
# Runtime validation for agentic mode parameters
|
||||
# Runtime validation
|
||||
if not self.knowledge_base_name:
|
||||
raise ValueError("knowledge_base_name is required for agentic mode")
|
||||
if not self.azure_openai_resource_url:
|
||||
raise ValueError("azure_openai_resource_url is required for agentic mode")
|
||||
if not self.azure_openai_deployment_name:
|
||||
raise ValueError("model_deployment_name is required for agentic mode")
|
||||
|
||||
knowledge_base_name = self.knowledge_base_name
|
||||
|
||||
# Step 1: Create or get knowledge source
|
||||
# Path 1: Use existing Knowledge Base directly (no index needed)
|
||||
# This supports KB with any source type (web, blob, index, etc.)
|
||||
if self._use_existing_knowledge_base:
|
||||
# Just create the retrieval client - KB already exists with its own sources
|
||||
if _agentic_retrieval_available and self._retrieval_client is None:
|
||||
self._retrieval_client = KnowledgeBaseRetrievalClient(
|
||||
endpoint=self.endpoint,
|
||||
knowledge_base_name=knowledge_base_name,
|
||||
credential=self.credential,
|
||||
user_agent=AGENT_FRAMEWORK_USER_AGENT,
|
||||
)
|
||||
self._knowledge_base_initialized = True
|
||||
return
|
||||
|
||||
# Path 2: Auto-create Knowledge Base from search index
|
||||
# Requires index_client and OpenAI configuration
|
||||
if not self._index_client:
|
||||
raise ValueError("Index client is required when creating Knowledge Base from index")
|
||||
if not self.azure_openai_resource_url:
|
||||
raise ValueError("azure_openai_resource_url is required when creating Knowledge Base from index")
|
||||
if not self.azure_openai_deployment_name:
|
||||
raise ValueError("model_deployment_name is required when creating Knowledge Base from index")
|
||||
if not self.index_name:
|
||||
raise ValueError("index_name is required when creating Knowledge Base from index")
|
||||
|
||||
# Step 1: Create or get knowledge source from index
|
||||
knowledge_source_name = f"{self.index_name}-source"
|
||||
|
||||
try:
|
||||
@@ -794,6 +867,7 @@ class AzureAISearchContextProvider(ContextProvider):
|
||||
endpoint=self.endpoint,
|
||||
knowledge_base_name=knowledge_base_name,
|
||||
credential=self.credential,
|
||||
user_agent=AGENT_FRAMEWORK_USER_AGENT,
|
||||
)
|
||||
|
||||
async def _agentic_search(self, messages: list[ChatMessage]) -> list[str]:
|
||||
|
||||
@@ -148,74 +148,105 @@ class TestSearchProviderInitialization:
|
||||
vector_field_name="embedding",
|
||||
)
|
||||
|
||||
def test_init_agentic_mode_requires_azure_openai_resource_url(self) -> None:
|
||||
"""Test that agentic mode requires azure_openai_resource_url."""
|
||||
with pytest.raises(ValueError, match="azure_openai_resource_url"):
|
||||
def test_init_agentic_mode_with_kb_only(self) -> None:
|
||||
"""Test agentic mode with existing knowledge_base_name (simplest path)."""
|
||||
# Clear environment to ensure no env vars interfere
|
||||
clean_env = {k: v for k, v in os.environ.items() if not k.startswith("AZURE_SEARCH_")}
|
||||
with patch.dict(os.environ, clean_env, clear=True):
|
||||
provider = AzureAISearchContextProvider(
|
||||
endpoint="https://test.search.windows.net",
|
||||
api_key="test-key",
|
||||
mode="agentic",
|
||||
knowledge_base_name="test-kb",
|
||||
env_file_path="", # Disable .env file loading
|
||||
)
|
||||
assert provider.mode == "agentic"
|
||||
assert provider.knowledge_base_name == "test-kb"
|
||||
assert provider._use_existing_knowledge_base is True
|
||||
|
||||
def test_init_agentic_mode_with_index_requires_model(self) -> None:
|
||||
"""Test that agentic mode with index_name requires model_deployment_name."""
|
||||
# Clear environment to ensure no env vars interfere
|
||||
clean_env = {k: v for k, v in os.environ.items() if not k.startswith("AZURE_SEARCH_")}
|
||||
with (
|
||||
patch.dict(os.environ, clean_env, clear=True),
|
||||
pytest.raises(ServiceInitializationError, match="model_deployment_name"),
|
||||
):
|
||||
AzureAISearchContextProvider(
|
||||
endpoint="https://test.search.windows.net",
|
||||
index_name="test-index",
|
||||
api_key="test-key",
|
||||
mode="agentic",
|
||||
env_file_path="", # Disable .env file loading
|
||||
)
|
||||
|
||||
def test_init_agentic_mode_requires_model_deployment_name(self) -> None:
|
||||
"""Test that agentic mode requires model_deployment_name."""
|
||||
with pytest.raises(ValueError, match="model_deployment_name"):
|
||||
AzureAISearchContextProvider(
|
||||
def test_init_agentic_mode_with_index_and_model(self) -> None:
|
||||
"""Test agentic mode with index_name (auto-create KB path)."""
|
||||
# Clear environment to ensure no env vars interfere
|
||||
clean_env = {k: v for k, v in os.environ.items() if not k.startswith("AZURE_SEARCH_")}
|
||||
with patch.dict(os.environ, clean_env, clear=True):
|
||||
provider = AzureAISearchContextProvider(
|
||||
endpoint="https://test.search.windows.net",
|
||||
index_name="test-index",
|
||||
api_key="test-key",
|
||||
mode="agentic",
|
||||
azure_ai_project_endpoint="https://test.services.ai.azure.com",
|
||||
azure_openai_resource_url="https://test.openai.azure.com",
|
||||
)
|
||||
|
||||
def test_init_agentic_mode_requires_knowledge_base_name(self) -> None:
|
||||
"""Test that agentic mode requires knowledge_base_name."""
|
||||
with pytest.raises(ValueError, match="knowledge_base_name"):
|
||||
AzureAISearchContextProvider(
|
||||
endpoint="https://test.search.windows.net",
|
||||
index_name="test-index",
|
||||
api_key="test-key",
|
||||
mode="agentic",
|
||||
azure_ai_project_endpoint="https://test.services.ai.azure.com",
|
||||
model_deployment_name="gpt-4o",
|
||||
azure_openai_resource_url="https://test.openai.azure.com",
|
||||
env_file_path="", # Disable .env file loading
|
||||
)
|
||||
assert provider.mode == "agentic"
|
||||
assert provider.index_name == "test-index"
|
||||
assert provider.knowledge_base_name == "test-index-kb" # Auto-generated
|
||||
assert provider._use_existing_knowledge_base is False
|
||||
|
||||
def test_init_agentic_mode_rejects_both_index_and_kb(self) -> None:
|
||||
"""Test that agentic mode rejects both index_name AND knowledge_base_name."""
|
||||
# Clear environment to ensure no env vars interfere
|
||||
clean_env = {k: v for k, v in os.environ.items() if not k.startswith("AZURE_SEARCH_")}
|
||||
with (
|
||||
patch.dict(os.environ, clean_env, clear=True),
|
||||
pytest.raises(ServiceInitializationError, match="either 'index_name' OR 'knowledge_base_name', not both"),
|
||||
):
|
||||
AzureAISearchContextProvider(
|
||||
endpoint="https://test.search.windows.net",
|
||||
index_name="test-index",
|
||||
api_key="test-key",
|
||||
mode="agentic",
|
||||
knowledge_base_name="test-kb",
|
||||
model_deployment_name="gpt-4o",
|
||||
azure_openai_resource_url="https://test.openai.azure.com",
|
||||
env_file_path="", # Disable .env file loading
|
||||
)
|
||||
|
||||
def test_init_agentic_mode_with_all_params(self) -> None:
|
||||
"""Test initialization with all agentic mode parameters."""
|
||||
provider = AzureAISearchContextProvider(
|
||||
endpoint="https://test.search.windows.net",
|
||||
index_name="test-index",
|
||||
api_key="test-key",
|
||||
mode="agentic",
|
||||
azure_ai_project_endpoint="https://test.services.ai.azure.com",
|
||||
model_deployment_name="my-gpt-4o-deployment",
|
||||
model_name="gpt-4o",
|
||||
knowledge_base_name="test-kb",
|
||||
azure_openai_resource_url="https://test.openai.azure.com",
|
||||
)
|
||||
assert provider.mode == "agentic"
|
||||
assert provider.azure_ai_project_endpoint == "https://test.services.ai.azure.com"
|
||||
assert provider.azure_openai_resource_url == "https://test.openai.azure.com"
|
||||
assert provider.azure_openai_deployment_name == "my-gpt-4o-deployment"
|
||||
assert provider.model_name == "gpt-4o"
|
||||
assert provider.knowledge_base_name == "test-kb"
|
||||
def test_init_agentic_mode_requires_index_or_kb(self) -> None:
|
||||
"""Test that agentic mode requires either index_name or knowledge_base_name."""
|
||||
# Clear environment to ensure no env vars interfere
|
||||
clean_env = {k: v for k, v in os.environ.items() if not k.startswith("AZURE_SEARCH_")}
|
||||
with (
|
||||
patch.dict(os.environ, clean_env, clear=True),
|
||||
pytest.raises(ServiceInitializationError, match="provide either 'index_name'.*or 'knowledge_base_name'"),
|
||||
):
|
||||
AzureAISearchContextProvider(
|
||||
endpoint="https://test.search.windows.net",
|
||||
api_key="test-key",
|
||||
mode="agentic",
|
||||
env_file_path="", # Disable .env file loading
|
||||
)
|
||||
|
||||
def test_init_model_name_defaults_to_deployment_name(self) -> None:
|
||||
"""Test that model_name defaults to deployment_name if not provided."""
|
||||
provider = AzureAISearchContextProvider(
|
||||
endpoint="https://test.search.windows.net",
|
||||
index_name="test-index",
|
||||
api_key="test-key",
|
||||
mode="agentic",
|
||||
azure_ai_project_endpoint="https://test.services.ai.azure.com",
|
||||
model_deployment_name="gpt-4o",
|
||||
knowledge_base_name="test-kb",
|
||||
azure_openai_resource_url="https://test.openai.azure.com",
|
||||
)
|
||||
assert provider.model_name == "gpt-4o"
|
||||
# Clear environment to ensure no env vars interfere
|
||||
clean_env = {k: v for k, v in os.environ.items() if not k.startswith("AZURE_SEARCH_")}
|
||||
with patch.dict(os.environ, clean_env, clear=True):
|
||||
provider = AzureAISearchContextProvider(
|
||||
endpoint="https://test.search.windows.net",
|
||||
api_key="test-key",
|
||||
mode="agentic",
|
||||
knowledge_base_name="test-kb",
|
||||
model_deployment_name="gpt-4o",
|
||||
env_file_path="", # Disable .env file loading
|
||||
)
|
||||
assert provider.model_name == "gpt-4o"
|
||||
|
||||
def test_init_with_custom_context_prompt(self) -> None:
|
||||
"""Test initialization with custom context prompt."""
|
||||
@@ -335,7 +366,7 @@ class TestKnowledgeBaseSetup:
|
||||
async def test_ensure_knowledge_base_creates_when_not_exists(
|
||||
self, mock_search_class: MagicMock, mock_index_class: MagicMock
|
||||
) -> None:
|
||||
"""Test that Knowledge Base is created when it doesn't exist."""
|
||||
"""Test that Knowledge Base is created when it doesn't exist (index_name path)."""
|
||||
# Setup mocks
|
||||
mock_index_client = AsyncMock()
|
||||
mock_index_client.get_knowledge_source.side_effect = ResourceNotFoundError("Not found")
|
||||
@@ -347,57 +378,58 @@ class TestKnowledgeBaseSetup:
|
||||
mock_search_client = AsyncMock()
|
||||
mock_search_class.return_value = mock_search_client
|
||||
|
||||
provider = AzureAISearchContextProvider(
|
||||
endpoint="https://test.search.windows.net",
|
||||
index_name="test-index",
|
||||
api_key="test-key",
|
||||
mode="agentic",
|
||||
azure_ai_project_endpoint="https://test.services.ai.azure.com",
|
||||
model_deployment_name="gpt-4o",
|
||||
model_name="gpt-4o",
|
||||
knowledge_base_name="test-kb",
|
||||
azure_openai_resource_url="https://test.openai.azure.com",
|
||||
)
|
||||
# Clear environment to ensure no env vars interfere
|
||||
clean_env = {k: v for k, v in os.environ.items() if not k.startswith("AZURE_SEARCH_")}
|
||||
with patch.dict(os.environ, clean_env, clear=True):
|
||||
# Use index_name path (auto-create KB)
|
||||
provider = AzureAISearchContextProvider(
|
||||
endpoint="https://test.search.windows.net",
|
||||
index_name="test-index",
|
||||
api_key="test-key",
|
||||
mode="agentic",
|
||||
model_deployment_name="gpt-4o",
|
||||
azure_openai_resource_url="https://test.openai.azure.com",
|
||||
env_file_path="", # Disable .env file loading
|
||||
)
|
||||
|
||||
await provider._ensure_knowledge_base()
|
||||
await provider._ensure_knowledge_base()
|
||||
|
||||
# Verify knowledge source was created
|
||||
mock_index_client.create_knowledge_source.assert_called_once()
|
||||
# Verify Knowledge Base was created
|
||||
mock_index_client.create_or_update_knowledge_base.assert_called_once()
|
||||
# Verify knowledge source was created
|
||||
mock_index_client.create_knowledge_source.assert_called_once()
|
||||
# Verify Knowledge Base was created
|
||||
mock_index_client.create_or_update_knowledge_base.assert_called_once()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch("agent_framework_azure_ai_search._search_provider.SearchIndexClient")
|
||||
@patch("agent_framework_azure_ai_search._search_provider.SearchClient")
|
||||
async def test_ensure_knowledge_base_skips_when_exists(
|
||||
async def test_ensure_knowledge_base_skips_when_using_existing_kb(
|
||||
self, mock_search_class: MagicMock, mock_index_class: MagicMock
|
||||
) -> None:
|
||||
"""Test that Knowledge Base setup is skipped when already exists."""
|
||||
"""Test that KB setup is skipped when using existing knowledge_base_name."""
|
||||
# Setup mocks
|
||||
mock_index_client = AsyncMock()
|
||||
mock_index_client.get_knowledge_source.return_value = MagicMock() # Exists
|
||||
mock_index_client.get_knowledge_base.return_value = MagicMock() # Exists
|
||||
mock_index_class.return_value = mock_index_client
|
||||
|
||||
mock_search_client = AsyncMock()
|
||||
mock_search_class.return_value = mock_search_client
|
||||
|
||||
provider = AzureAISearchContextProvider(
|
||||
endpoint="https://test.search.windows.net",
|
||||
index_name="test-index",
|
||||
api_key="test-key",
|
||||
mode="agentic",
|
||||
azure_ai_project_endpoint="https://test.services.ai.azure.com",
|
||||
model_deployment_name="gpt-4o",
|
||||
knowledge_base_name="test-kb",
|
||||
azure_openai_resource_url="https://test.openai.azure.com",
|
||||
)
|
||||
# Clear environment to ensure no env vars interfere
|
||||
clean_env = {k: v for k, v in os.environ.items() if not k.startswith("AZURE_SEARCH_")}
|
||||
with patch.dict(os.environ, clean_env, clear=True):
|
||||
# Use knowledge_base_name path (existing KB)
|
||||
provider = AzureAISearchContextProvider(
|
||||
endpoint="https://test.search.windows.net",
|
||||
api_key="test-key",
|
||||
mode="agentic",
|
||||
knowledge_base_name="test-kb",
|
||||
env_file_path="", # Disable .env file loading
|
||||
)
|
||||
|
||||
await provider._ensure_knowledge_base()
|
||||
await provider._ensure_knowledge_base()
|
||||
|
||||
# Verify nothing was created
|
||||
mock_index_client.create_knowledge_source.assert_not_called()
|
||||
mock_index_client.create_agent.assert_not_called()
|
||||
# Verify nothing was created (using existing KB)
|
||||
mock_index_client.create_knowledge_source.assert_not_called()
|
||||
mock_index_client.create_or_update_knowledge_base.assert_not_called()
|
||||
|
||||
|
||||
class TestContextProviderLifecycle:
|
||||
@@ -437,21 +469,22 @@ class TestContextProviderLifecycle:
|
||||
mock_retrieval_client.close = AsyncMock()
|
||||
mock_retrieval_class.return_value = mock_retrieval_client
|
||||
|
||||
async with AzureAISearchContextProvider(
|
||||
endpoint="https://test.search.windows.net",
|
||||
index_name="test-index",
|
||||
api_key="test-key",
|
||||
mode="agentic",
|
||||
azure_ai_project_endpoint="https://test.services.ai.azure.com",
|
||||
model_deployment_name="gpt-4o",
|
||||
knowledge_base_name="test-kb",
|
||||
azure_openai_resource_url="https://test.openai.azure.com",
|
||||
) as provider:
|
||||
# Simulate retrieval client being created
|
||||
provider._retrieval_client = mock_retrieval_client
|
||||
# Clear environment to ensure no env vars interfere
|
||||
clean_env = {k: v for k, v in os.environ.items() if not k.startswith("AZURE_SEARCH_")}
|
||||
with patch.dict(os.environ, clean_env, clear=True):
|
||||
# Use knowledge_base_name path (existing KB)
|
||||
async with AzureAISearchContextProvider(
|
||||
endpoint="https://test.search.windows.net",
|
||||
api_key="test-key",
|
||||
mode="agentic",
|
||||
knowledge_base_name="test-kb",
|
||||
env_file_path="", # Disable .env file loading
|
||||
) as provider:
|
||||
# Simulate retrieval client being created
|
||||
provider._retrieval_client = mock_retrieval_client
|
||||
|
||||
# Verify cleanup was called
|
||||
mock_retrieval_client.close.assert_called_once()
|
||||
# Verify cleanup was called
|
||||
mock_retrieval_client.close.assert_called_once()
|
||||
|
||||
def test_string_api_key_conversion(self) -> None:
|
||||
"""Test that string api_key is converted to AzureKeyCredential."""
|
||||
@@ -579,9 +612,6 @@ class TestAgenticSearch:
|
||||
|
||||
# Setup index client mock
|
||||
mock_index_client = AsyncMock()
|
||||
mock_index_client.get_knowledge_source.side_effect = ResourceNotFoundError("Not found")
|
||||
mock_index_client.create_knowledge_source = AsyncMock()
|
||||
mock_index_client.create_or_update_knowledge_base = AsyncMock()
|
||||
mock_index_class.return_value = mock_index_client
|
||||
|
||||
# Setup retrieval client mock with response
|
||||
@@ -603,22 +633,23 @@ class TestAgenticSearch:
|
||||
mock_retrieval_client.close = AsyncMock()
|
||||
mock_retrieval_class.return_value = mock_retrieval_client
|
||||
|
||||
provider = AzureAISearchContextProvider(
|
||||
endpoint="https://test.search.windows.net",
|
||||
index_name="test-index",
|
||||
api_key="test-key",
|
||||
mode="agentic",
|
||||
azure_ai_project_endpoint="https://test.services.ai.azure.com",
|
||||
model_deployment_name="gpt-4o",
|
||||
knowledge_base_name="test-kb",
|
||||
azure_openai_resource_url="https://test.openai.azure.com",
|
||||
)
|
||||
# Clear environment to ensure no env vars interfere
|
||||
clean_env = {k: v for k, v in os.environ.items() if not k.startswith("AZURE_SEARCH_")}
|
||||
with patch.dict(os.environ, clean_env, clear=True):
|
||||
# Use knowledge_base_name path (existing KB)
|
||||
provider = AzureAISearchContextProvider(
|
||||
endpoint="https://test.search.windows.net",
|
||||
api_key="test-key",
|
||||
mode="agentic",
|
||||
knowledge_base_name="test-kb",
|
||||
env_file_path="", # Disable .env file loading
|
||||
)
|
||||
|
||||
context = await provider.invoking(sample_messages)
|
||||
context = await provider.invoking(sample_messages)
|
||||
|
||||
assert isinstance(context, Context)
|
||||
# Should have at least the prompt message
|
||||
assert len(context.messages) >= 1
|
||||
assert isinstance(context, Context)
|
||||
# Should have at least the prompt message
|
||||
assert len(context.messages) >= 1
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch("agent_framework_azure_ai_search._search_provider.KnowledgeBaseRetrievalClient")
|
||||
@@ -637,9 +668,6 @@ class TestAgenticSearch:
|
||||
mock_search_class.return_value = mock_search_client
|
||||
|
||||
mock_index_client = AsyncMock()
|
||||
mock_index_client.get_knowledge_source.side_effect = ResourceNotFoundError("Not found")
|
||||
mock_index_client.create_knowledge_source = AsyncMock()
|
||||
mock_index_client.create_or_update_knowledge_base = AsyncMock()
|
||||
mock_index_class.return_value = mock_index_client
|
||||
|
||||
# Empty response
|
||||
@@ -650,22 +678,23 @@ class TestAgenticSearch:
|
||||
mock_retrieval_client.close = AsyncMock()
|
||||
mock_retrieval_class.return_value = mock_retrieval_client
|
||||
|
||||
provider = AzureAISearchContextProvider(
|
||||
endpoint="https://test.search.windows.net",
|
||||
index_name="test-index",
|
||||
api_key="test-key",
|
||||
mode="agentic",
|
||||
azure_ai_project_endpoint="https://test.services.ai.azure.com",
|
||||
model_deployment_name="gpt-4o",
|
||||
knowledge_base_name="test-kb",
|
||||
azure_openai_resource_url="https://test.openai.azure.com",
|
||||
)
|
||||
# Clear environment to ensure no env vars interfere
|
||||
clean_env = {k: v for k, v in os.environ.items() if not k.startswith("AZURE_SEARCH_")}
|
||||
with patch.dict(os.environ, clean_env, clear=True):
|
||||
# Use knowledge_base_name path (existing KB)
|
||||
provider = AzureAISearchContextProvider(
|
||||
endpoint="https://test.search.windows.net",
|
||||
api_key="test-key",
|
||||
mode="agentic",
|
||||
knowledge_base_name="test-kb",
|
||||
env_file_path="", # Disable .env file loading
|
||||
)
|
||||
|
||||
context = await provider.invoking(sample_messages)
|
||||
context = await provider.invoking(sample_messages)
|
||||
|
||||
assert isinstance(context, Context)
|
||||
# Should have fallback message
|
||||
assert len(context.messages) >= 1
|
||||
assert isinstance(context, Context)
|
||||
# Should have fallback message
|
||||
assert len(context.messages) >= 1
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch("agent_framework_azure_ai_search._search_provider.KnowledgeBaseRetrievalClient")
|
||||
@@ -684,9 +713,6 @@ class TestAgenticSearch:
|
||||
mock_search_class.return_value = mock_search_client
|
||||
|
||||
mock_index_client = AsyncMock()
|
||||
mock_index_client.get_knowledge_source.side_effect = ResourceNotFoundError("Not found")
|
||||
mock_index_client.create_knowledge_source = AsyncMock()
|
||||
mock_index_client.create_or_update_knowledge_base = AsyncMock()
|
||||
mock_index_class.return_value = mock_index_client
|
||||
|
||||
mock_retrieval_client = AsyncMock()
|
||||
@@ -706,22 +732,23 @@ class TestAgenticSearch:
|
||||
mock_retrieval_client.close = AsyncMock()
|
||||
mock_retrieval_class.return_value = mock_retrieval_client
|
||||
|
||||
provider = AzureAISearchContextProvider(
|
||||
endpoint="https://test.search.windows.net",
|
||||
index_name="test-index",
|
||||
api_key="test-key",
|
||||
mode="agentic",
|
||||
azure_ai_project_endpoint="https://test.services.ai.azure.com",
|
||||
model_deployment_name="gpt-4o",
|
||||
knowledge_base_name="test-kb",
|
||||
azure_openai_resource_url="https://test.openai.azure.com",
|
||||
retrieval_reasoning_effort="medium", # Test medium reasoning
|
||||
)
|
||||
# Clear environment to ensure no env vars interfere
|
||||
clean_env = {k: v for k, v in os.environ.items() if not k.startswith("AZURE_SEARCH_")}
|
||||
with patch.dict(os.environ, clean_env, clear=True):
|
||||
# Use knowledge_base_name path (existing KB)
|
||||
provider = AzureAISearchContextProvider(
|
||||
endpoint="https://test.search.windows.net",
|
||||
api_key="test-key",
|
||||
mode="agentic",
|
||||
knowledge_base_name="test-kb",
|
||||
retrieval_reasoning_effort="medium", # Test medium reasoning
|
||||
env_file_path="", # Disable .env file loading
|
||||
)
|
||||
|
||||
context = await provider.invoking(sample_messages)
|
||||
context = await provider.invoking(sample_messages)
|
||||
|
||||
assert isinstance(context, Context)
|
||||
assert len(context.messages) >= 1
|
||||
assert isinstance(context, Context)
|
||||
assert len(context.messages) >= 1
|
||||
|
||||
|
||||
class TestVectorFieldAutoDiscovery:
|
||||
|
||||
+52
-27
@@ -25,18 +25,22 @@ This sample demonstrates how to use Azure AI Search with agentic mode for RAG
|
||||
For simple queries where speed is critical, use semantic mode instead (see azure_ai_with_search_context_semantic.py).
|
||||
|
||||
Prerequisites:
|
||||
1. An Azure AI Search service with a search index
|
||||
1. An Azure AI Search service
|
||||
2. An Azure AI Foundry project with a model deployment
|
||||
3. An Azure OpenAI resource (for Knowledge Base model calls)
|
||||
4. Set the following environment variables:
|
||||
3. Either an existing Knowledge Base OR a search index (to auto-create a KB)
|
||||
|
||||
Environment variables:
|
||||
- AZURE_SEARCH_ENDPOINT: Your Azure AI Search endpoint
|
||||
- AZURE_SEARCH_API_KEY: (Optional) Your search API key - if not provided, uses DefaultAzureCredential for Entra ID
|
||||
- AZURE_SEARCH_INDEX_NAME: Your search index name
|
||||
- AZURE_SEARCH_API_KEY: (Optional) API key - if not provided, uses DefaultAzureCredential
|
||||
- AZURE_AI_PROJECT_ENDPOINT: Your Azure AI Foundry project endpoint
|
||||
- AZURE_AI_MODEL_DEPLOYMENT_NAME: Your model deployment name (e.g., "gpt-4o")
|
||||
|
||||
For using an existing Knowledge Base (recommended):
|
||||
- AZURE_SEARCH_KNOWLEDGE_BASE_NAME: Your Knowledge Base name
|
||||
- AZURE_OPENAI_RESOURCE_URL: Your Azure OpenAI resource URL (e.g., "https://myresource.openai.azure.com")
|
||||
Note: This is different from AZURE_AI_PROJECT_ENDPOINT - Knowledge Base needs the OpenAI endpoint for model calls
|
||||
|
||||
For auto-creating a Knowledge Base from an index:
|
||||
- AZURE_SEARCH_INDEX_NAME: Your search index name
|
||||
- AZURE_OPENAI_RESOURCE_URL: Azure OpenAI resource URL (e.g., "https://myresource.openai.azure.com")
|
||||
"""
|
||||
|
||||
# Sample queries to demonstrate agentic RAG
|
||||
@@ -53,31 +57,52 @@ async def main() -> None:
|
||||
# Get configuration from environment
|
||||
search_endpoint = os.environ["AZURE_SEARCH_ENDPOINT"]
|
||||
search_key = os.environ.get("AZURE_SEARCH_API_KEY")
|
||||
index_name = os.environ["AZURE_SEARCH_INDEX_NAME"]
|
||||
project_endpoint = os.environ["AZURE_AI_PROJECT_ENDPOINT"]
|
||||
model_deployment = os.environ.get("AZURE_AI_MODEL_DEPLOYMENT_NAME", "gpt-4o")
|
||||
knowledge_base_name = os.environ["AZURE_SEARCH_KNOWLEDGE_BASE_NAME"]
|
||||
azure_openai_resource_url = os.environ["AZURE_OPENAI_RESOURCE_URL"]
|
||||
|
||||
# Agentic mode requires exactly ONE of: knowledge_base_name OR index_name
|
||||
# Option 1: Use existing Knowledge Base (recommended)
|
||||
knowledge_base_name = os.environ.get("AZURE_SEARCH_KNOWLEDGE_BASE_NAME")
|
||||
# Option 2: Auto-create KB from index (requires azure_openai_resource_url)
|
||||
index_name = os.environ.get("AZURE_SEARCH_INDEX_NAME")
|
||||
azure_openai_resource_url = os.environ.get("AZURE_OPENAI_RESOURCE_URL")
|
||||
|
||||
# Create Azure AI Search context provider with agentic mode (recommended for accuracy)
|
||||
print("Using AGENTIC mode (Knowledge Bases with query planning, recommended)\n")
|
||||
print("ℹ️ This mode is slightly slower but provides more accurate results.\n")
|
||||
search_provider = AzureAISearchContextProvider(
|
||||
endpoint=search_endpoint,
|
||||
index_name=index_name,
|
||||
api_key=search_key, # Use api_key for API key auth, or credential for managed identity
|
||||
credential=AzureCliCredential() if not search_key else None,
|
||||
mode="agentic", # Advanced mode for multi-hop reasoning
|
||||
# Agentic mode configuration
|
||||
azure_ai_project_endpoint=project_endpoint,
|
||||
azure_openai_resource_url=azure_openai_resource_url,
|
||||
model_deployment_name=model_deployment,
|
||||
knowledge_base_name=knowledge_base_name,
|
||||
# Optional: Configure retrieval behavior
|
||||
knowledge_base_output_mode="extractive_data", # or "answer_synthesis"
|
||||
retrieval_reasoning_effort="minimal", # or "medium", "low"
|
||||
top_k=3, # Note: In agentic mode, the server-side Knowledge Base determines final retrieval
|
||||
)
|
||||
print("This mode is slightly slower but provides more accurate results.\n")
|
||||
|
||||
# Configure based on whether using existing KB or auto-creating from index
|
||||
if knowledge_base_name:
|
||||
# Use existing Knowledge Base - simplest approach
|
||||
search_provider = AzureAISearchContextProvider(
|
||||
endpoint=search_endpoint,
|
||||
api_key=search_key,
|
||||
credential=AzureCliCredential() if not search_key else None,
|
||||
mode="agentic",
|
||||
knowledge_base_name=knowledge_base_name,
|
||||
# Optional: Configure retrieval behavior
|
||||
knowledge_base_output_mode="extractive_data", # or "answer_synthesis"
|
||||
retrieval_reasoning_effort="minimal", # or "medium", "low"
|
||||
)
|
||||
else:
|
||||
# Auto-create Knowledge Base from index
|
||||
if not index_name:
|
||||
raise ValueError("Set AZURE_SEARCH_KNOWLEDGE_BASE_NAME or AZURE_SEARCH_INDEX_NAME")
|
||||
if not azure_openai_resource_url:
|
||||
raise ValueError("AZURE_OPENAI_RESOURCE_URL required when using index_name")
|
||||
search_provider = AzureAISearchContextProvider(
|
||||
endpoint=search_endpoint,
|
||||
index_name=index_name,
|
||||
api_key=search_key,
|
||||
credential=AzureCliCredential() if not search_key else None,
|
||||
mode="agentic",
|
||||
azure_openai_resource_url=azure_openai_resource_url,
|
||||
model_deployment_name=model_deployment,
|
||||
# Optional: Configure retrieval behavior
|
||||
knowledge_base_output_mode="extractive_data", # or "answer_synthesis"
|
||||
retrieval_reasoning_effort="minimal", # or "medium", "low"
|
||||
top_k=3,
|
||||
)
|
||||
|
||||
# Create agent with search context provider
|
||||
async with (
|
||||
|
||||
Reference in New Issue
Block a user