Skip to content

Commit 9673c5e

Browse files
authored
Scope Change in SDK (Azure#40882)
* test-fix * assset for test fix * assset for test fix * assset for test fix * new assets from prompties * new assets from prompties * disabling 2 tests
1 parent 659ceb1 commit 9673c5e

File tree

4 files changed

+51
-10
lines changed

4 files changed

+51
-10
lines changed

sdk/evaluation/azure-ai-evaluation/assets.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,5 @@
22
"AssetsRepo": "Azure/azure-sdk-assets",
33
"AssetsRepoPrefixPath": "python",
44
"TagPrefix": "python/evaluation/azure-ai-evaluation",
5-
"Tag": "python/evaluation/azure-ai-evaluation_a93be1e527"
5+
"Tag": "python/evaluation/azure-ai-evaluation_d585f1f45d"
66
}

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_conversation/constants.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212

1313
# Azure endpoint constants
1414
AZUREML_TOKEN_SCOPE = "https://ml.azure.com"
15-
COGNITIVE_SERVICES_TOKEN_SCOPE = "https://cognitiveservices.azure.com/"
15+
COGNITIVE_SERVICES_TOKEN_SCOPE = "https://ai.azure.com/"
1616
AZURE_TOKEN_REFRESH_INTERVAL = 600 # seconds
1717
AZURE_ENDPOINT_DOMAIN_VALID_PATTERN_RE = (
1818
r"^(?=.{1,255}$)(?!-)[a-zA-Z0-9-]{1,63}(?<!-)"

sdk/evaluation/azure-ai-evaluation/tests/conftest.py

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -331,12 +331,18 @@ def get_config(
331331
@pytest.fixture(scope="session")
332332
def mock_model_config() -> AzureOpenAIModelConfiguration:
333333
return AzureOpenAIModelConfiguration(
334-
azure_endpoint="https://Sanitized.cognitiveservices.azure.com",
334+
azure_endpoint="https://Sanitized.api.cognitive.microsoft.com",
335335
api_key="aoai-api-key",
336-
api_version="2024-08-01-preview",
336+
api_version="2023-07-01-preview",
337+
azure_deployment="aoai-deployment",
338+
)
339+
@pytest.fixture(scope="session")
340+
def mock_model_config_onedp() -> AzureOpenAIModelConfiguration:
341+
return AzureOpenAIModelConfiguration(
342+
azure_endpoint="https://Sanitized.services.ai.azure.com",
343+
api_version="2024-12-01-preview",
337344
azure_deployment="aoai-deployment",
338345
)
339-
340346

341347
@pytest.fixture(scope="session")
342348
def mock_project_scope() -> Dict[str, str]:
@@ -350,9 +356,10 @@ def mock_project_scope() -> Dict[str, str]:
350356

351357
@pytest.fixture(scope="session")
352358
def mock_onedp_project_scope() -> Dict[str, str]:
353-
return "https://Sanitized.cognitiveservices.azure.com/api/projects/00000"
359+
return "https://Sanitized.services.ai.azure.com/api/projects/00000"
354360

355361
KEY_AZURE_MODEL_CONFIG = "azure_openai_model_config"
362+
KEY_ONE_DP_AZURE_MODEL_CONFIG = "azure_openai_model_config_onedp"
356363
KEY_OPENAI_MODEL_CONFIG = "openai_model_config"
357364
KEY_AZURE_PROJECT_SCOPE = "azure_ai_project_scope"
358365
KEY_ONE_DP_PROJECT_SCOPE = "azure_ai_one_dp_project_scope"
@@ -371,6 +378,18 @@ def model_config(
371378

372379
return model_config
373380

381+
@pytest.fixture(scope="session")
382+
def model_config_onedp(
383+
connection_file: Dict[str, Any], mock_model_config_onedp: AzureOpenAIModelConfiguration
384+
) -> AzureOpenAIModelConfiguration:
385+
if not is_live():
386+
return mock_model_config_onedp
387+
388+
config = get_config(connection_file, KEY_ONE_DP_AZURE_MODEL_CONFIG)
389+
model_config = AzureOpenAIModelConfiguration(**config)
390+
AzureOpenAIModelConfiguration.__repr__ = lambda self: "<sensitive data redacted>"
391+
392+
return model_config
374393

375394
@pytest.fixture
376395
def non_azure_openai_model_config(connection_file: Mapping[str, Any]) -> OpenAIModelConfiguration:

sdk/evaluation/azure-ai-evaluation/tests/e2etests/test_mass_evaluate.py

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -88,8 +88,18 @@ class TestMassEvaluate:
8888
- Multi-modal inputs: This one has some parameters for the different types of multi-modal inputs.
8989
"""
9090

91-
@pytest.mark.skipif(not is_live(), reason="Skip in playback due to inconsistency in evaluation results.")
92-
def test_evaluate_singleton_inputs(self, model_config, azure_cred, project_scope, data_file):
91+
@pytest.mark.parametrize(
92+
("proj_scope", "cred", "conv", "m_config"),
93+
(
94+
("project_scope", "azure_cred", "data_file", "model_config"),
95+
# ("project_scope_onedp", "azure_cred_onedp", "data_file", "model_config_onedp"),
96+
)
97+
)
98+
def test_evaluate_singleton_inputs(self, request, proj_scope, cred, conv, m_config):
99+
project_scope = request.getfixturevalue(proj_scope)
100+
azure_cred = request.getfixturevalue(cred)
101+
data_file = request.getfixturevalue(conv)
102+
model_config = request.getfixturevalue(m_config)
93103
# qa fails in playback but ONLY when using the pf proxy for some reason, and
94104
# using it without pf proxy causes CI to hang and timeout after 3 hours.
95105
evaluators = {
@@ -184,7 +194,7 @@ def test_evaluate_singleton_inputs(self, model_config, azure_cred, project_scope
184194
assert len(row_result_df["outputs.qa.similarity"]) == 3
185195
assert len(row_result_df["outputs.qa.gpt_similarity"]) == 3
186196

187-
assert len(metrics.keys()) == 62
197+
assert len(metrics.keys()) == 76
188198
assert metrics["f1_score.f1_score"] >= 0
189199
assert metrics["gleu.gleu_score"] >= 0
190200
assert metrics["bleu.bleu_score"] >= 0
@@ -225,7 +235,19 @@ def test_evaluate_singleton_inputs(self, model_config, azure_cred, project_scope
225235
assert metrics["qa.similarity"] >= 0
226236
assert metrics["qa.gpt_similarity"] >= 0
227237

228-
def test_evaluate_conversation(self, model_config, data_convo_file, azure_cred, project_scope):
238+
@pytest.mark.parametrize(
239+
("proj_scope", "cred", "conv", "m_config"),
240+
(
241+
("project_scope", "azure_cred", "data_convo_file", "model_config"),
242+
# ("project_scope_onedp", "azure_cred_onedp", "data_convo_file", "model_config_onedp"),
243+
)
244+
)
245+
def test_evaluate_conversation(self, request, proj_scope, cred, conv, m_config):
246+
project_scope = request.getfixturevalue(proj_scope)
247+
azure_cred = request.getfixturevalue(cred)
248+
data_convo_file = request.getfixturevalue(conv)
249+
model_config = request.getfixturevalue(m_config)
250+
229251
evaluators = {
230252
"grounded": GroundednessEvaluator(model_config),
231253
"coherence": CoherenceEvaluator(model_config),

0 commit comments

Comments
 (0)