Skip to content

Commit aedbfa5

Browse files
w-javedRena Chen
authored and
Rena Chen
committed
Removing Multimodal evals and fixes few tests (#39558)
* Removing Multimodal evals and fixes few tests * fix * release date * release date * release date
1 parent 743911e commit aedbfa5

18 files changed

+21
-807
lines changed

Diff for: sdk/evaluation/azure-ai-evaluation/CHANGELOG.md

+6
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,11 @@
11
# Release History
22

3+
## 1.3.0 (Unreleased)
4+
5+
### Breaking Changes
6+
- Multimodal specific evaluators `ContentSafetyMultimodalEvaluator`, `ViolenceMultimodalEvaluator`, `SexualMultimodalEvaluator`, `SelfHarmMultimodalEvaluator`, `HateUnfairnessMultimodalEvaluator` and `ProtectedMaterialMultimodalEvaluator` has been removed. Please use `ContentSafetyEvaluator`, `ViolenceEvaluator`, `SexualEvaluator`, `SelfHarmEvaluator`, `HateUnfairnessEvaluator` and `ProtectedMaterialEvaluator` instead.
7+
- Metric name in ProtectedMaterialEvaluator's output is changed from `protected_material.fictional_characters_label` to `protected_material.fictional_characters_defect_rate`. It's now consistent with other evaluator's metric names (ending with `_defect_rate`).
8+
39
## 1.2.0 (2025-01-27)
410

511
### Features Added

Diff for: sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/__init__.py

+1-15
Original file line numberDiff line numberDiff line change
@@ -12,14 +12,6 @@
1212
SexualEvaluator,
1313
ViolenceEvaluator,
1414
)
15-
from ._evaluators._multimodal._content_safety_multimodal import (
16-
ContentSafetyMultimodalEvaluator,
17-
HateUnfairnessMultimodalEvaluator,
18-
SelfHarmMultimodalEvaluator,
19-
SexualMultimodalEvaluator,
20-
ViolenceMultimodalEvaluator,
21-
)
22-
from ._evaluators._multimodal._protected_material import ProtectedMaterialMultimodalEvaluator
2315
from ._evaluators._f1_score import F1ScoreEvaluator
2416
from ._evaluators._fluency import FluencyEvaluator
2517
from ._evaluators._gleu import GleuScoreEvaluator
@@ -72,11 +64,5 @@
7264
"EvaluatorConfig",
7365
"Conversation",
7466
"Message",
75-
"EvaluationResult",
76-
"ContentSafetyMultimodalEvaluator",
77-
"HateUnfairnessMultimodalEvaluator",
78-
"SelfHarmMultimodalEvaluator",
79-
"SexualMultimodalEvaluator",
80-
"ViolenceMultimodalEvaluator",
81-
"ProtectedMaterialMultimodalEvaluator",
67+
"EvaluationResult"
8268
]

Diff for: sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_common/utils.py

+8-8
Original file line numberDiff line numberDiff line change
@@ -366,7 +366,7 @@ def raise_exception(msg, target):
366366
if not isinstance(messages, list):
367367
raise_exception(
368368
"'messages' parameter must be a JSON-compatible list of chat messages",
369-
ErrorTarget.CONTENT_SAFETY_MULTIMODAL_EVALUATOR,
369+
ErrorTarget.CONTENT_SAFETY_CHAT_EVALUATOR,
370370
)
371371
expected_roles = {"user", "assistant", "system"}
372372
image_found = False
@@ -393,7 +393,7 @@ def raise_exception(msg, target):
393393
):
394394
raise_exception(
395395
f"Messages must be a strongly typed class of ChatRequestMessage. Message number: {num}",
396-
ErrorTarget.CONTENT_SAFETY_MULTIMODAL_EVALUATOR,
396+
ErrorTarget.CONTENT_SAFETY_CHAT_EVALUATOR,
397397
)
398398
if isinstance(message, AssistantMessage):
399399
assistant_message_count += 1
@@ -407,7 +407,7 @@ def raise_exception(msg, target):
407407
if message.get("role") not in expected_roles:
408408
raise_exception(
409409
f"Invalid role provided: {message.get('role')}. Message number: {num}",
410-
ErrorTarget.CONTENT_SAFETY_MULTIMODAL_EVALUATOR,
410+
ErrorTarget.CONTENT_SAFETY_CHAT_EVALUATOR,
411411
)
412412
if message.get("role") == "assistant":
413413
assistant_message_count += 1
@@ -417,29 +417,29 @@ def raise_exception(msg, target):
417417
if not isinstance(content, (str, list)):
418418
raise_exception(
419419
f"Content in each turn must be a string or array. Message number: {num}",
420-
ErrorTarget.CONTENT_SAFETY_MULTIMODAL_EVALUATOR,
420+
ErrorTarget.CONTENT_SAFETY_CHAT_EVALUATOR,
421421
)
422422
if isinstance(content, list):
423423
if any(item.get("type") == "image_url" and "url" in item.get("image_url", {}) for item in content):
424424
image_found = True
425425
if not image_found:
426426
raise_exception(
427427
"Message needs to have multi-modal input like images.",
428-
ErrorTarget.CONTENT_SAFETY_MULTIMODAL_EVALUATOR,
428+
ErrorTarget.CONTENT_SAFETY_CHAT_EVALUATOR,
429429
)
430430
if assistant_message_count == 0:
431431
raise_exception(
432432
"Assistant role required in one of the messages.",
433-
ErrorTarget.CONTENT_SAFETY_MULTIMODAL_EVALUATOR,
433+
ErrorTarget.CONTENT_SAFETY_CHAT_EVALUATOR,
434434
)
435435
if user_message_count == 0:
436436
raise_exception(
437437
"User role required in one of the messages.",
438-
ErrorTarget.CONTENT_SAFETY_MULTIMODAL_EVALUATOR,
438+
ErrorTarget.CONTENT_SAFETY_CHAT_EVALUATOR,
439439
)
440440
if assistant_message_count > 1:
441441
raise_exception(
442442
"Evaluators for multimodal conversations only support single turn. "
443443
"User and assistant role expected as the only role in each message.",
444-
ErrorTarget.CONTENT_SAFETY_MULTIMODAL_EVALUATOR,
444+
ErrorTarget.CONTENT_SAFETY_CHAT_EVALUATOR,
445445
)

Diff for: sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_multimodal/__init__.py

-20
This file was deleted.

Diff for: sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal.py

-132
This file was deleted.

Diff for: sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_multimodal/_content_safety_multimodal_base.py

-55
This file was deleted.

0 commit comments

Comments
 (0)