aws · philschmid · Aug 29, 2024 · Aug 20, 2024
diff --git a/src/sagemaker_huggingface_inference_toolkit/transformers_utils.py b/src/sagemaker_huggingface_inference_toolkit/transformers_utils.py
@@ -21,7 +21,7 @@
 from huggingface_hub import HfApi, login, snapshot_download
 from transformers import AutoTokenizer, pipeline
 from transformers.file_utils import is_tf_available, is_torch_available
-from transformers.pipelines import Conversation, Pipeline
+from transformers.pipelines import Pipeline
 
 from sagemaker_huggingface_inference_toolkit.diffusers_utils import get_diffusers_pipeline, is_diffusers_available
 from sagemaker_huggingface_inference_toolkit.optimum_utils import (
@@ -117,25 +117,6 @@ def create_artifact_filter(framework):
         return []
 
 
-def wrap_conversation_pipeline(pipeline):
-    def wrapped_pipeline(inputs, *args, **kwargs):
-        converted_input = Conversation(
-            inputs["text"],
-            past_user_inputs=inputs.get("past_user_inputs", []),
-            generated_responses=inputs.get("generated_responses", []),
-        )
-        prediction = pipeline(converted_input, *args, **kwargs)
-        return {
-            "generated_text": prediction.generated_responses[-1],
-            "conversation": {
-                "past_user_inputs": prediction.past_user_inputs,
-                "generated_responses": prediction.generated_responses,
-            },
-        }
-
-    return wrapped_pipeline
-
-
 def _is_gpu_available():
     """
     checks if a gpu is available.
@@ -310,8 +291,4 @@ def get_pipeline(task: str, device: int, model_dir: Path, **kwargs) -> Pipeline:
             task=task, model=model_dir, device=device, trust_remote_code=TRUST_REMOTE_CODE, **kwargs
         )
 
-    # wrapp specific pipeline to support better ux
-    if task == "conversational":
-        hf_pipeline = wrap_conversation_pipeline(hf_pipeline)
-
     return hf_pipeline
diff --git a/tests/unit/test_transformers_utils.py b/tests/unit/test_transformers_utils.py
@@ -14,7 +14,6 @@
 import os
 import tempfile
 
-from transformers import pipeline
 from transformers.file_utils import is_torch_available
 from transformers.testing_utils import require_tf, require_torch, slow
 
@@ -26,7 +25,6 @@
     get_pipeline,
     infer_task_from_hub,
     infer_task_from_model_architecture,
-    wrap_conversation_pipeline,
 )
 
 
@@ -129,37 +127,3 @@ def test_infer_task_from_model_architecture():
         storage_dir = _load_model_from_hub(TASK_MODEL, tmpdirname)
         task = infer_task_from_model_architecture(f"{storage_dir}/config.json")
         assert task == "token-classification"
-
-
-@require_torch
-def test_wrap_conversation_pipeline():
-    init_pipeline = pipeline(
-        "conversational",
-        model="microsoft/DialoGPT-small",
-        tokenizer="microsoft/DialoGPT-small",
-        framework="pt",
-    )
-    conv_pipe = wrap_conversation_pipeline(init_pipeline)
-    data = {
-        "past_user_inputs": ["Which movie is the best ?"],
-        "generated_responses": ["It's Die Hard for sure."],
-        "text": "Can you explain why?",
-    }
-    res = conv_pipe(data)
-    assert "conversation" in res
-    assert "generated_text" in res
-
-
-@require_torch
-def test_wrapped_pipeline():
-    with tempfile.TemporaryDirectory() as tmpdirname:
-        storage_dir = _load_model_from_hub("microsoft/DialoGPT-small", tmpdirname)
-        conv_pipe = get_pipeline("conversational", -1, storage_dir)
-        data = {
-            "past_user_inputs": ["Which movie is the best ?"],
-            "generated_responses": ["It's Die Hard for sure."],
-            "text": "Can you explain why?",
-        }
-        res = conv_pipe(data)
-        assert "conversation" in res
-        assert "generated_text" in res