Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable loading model from hub that has already been converted #13

Merged
merged 24 commits into from
Feb 12, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
392 changes: 188 additions & 204 deletions optimum/executorch/modeling.py

Large diffs are not rendered by default.

3 changes: 1 addition & 2 deletions setup.py
Original file line number Diff line number Diff line change
@@ -12,8 +12,7 @@
assert False, "Error: Could not open '%s' due %s\n" % (filepath, error)

INSTALL_REQUIRE = [
# "optimum~=1.24",
"optimum@git+https://github.com/huggingface/optimum.git",
"optimum~=1.24",
"executorch>=0.4.0",
"transformers>=4.46",
]
62 changes: 41 additions & 21 deletions tests/models/test_modeling.py
Original file line number Diff line number Diff line change
@@ -16,52 +16,72 @@
import os
import tempfile
import unittest
from pathlib import Path
from tempfile import TemporaryDirectory

import pytest
from executorch.extension.pybindings.portable_lib import ExecuTorchModule
from transformers.testing_utils import slow
from huggingface_hub import HfApi

from optimum.executorch import ExecuTorchModelForCausalLM
from optimum.executorch.modeling import _FILE_PATTERN
from optimum.exporters.executorch import main_export
from optimum.utils.file_utils import find_files_matching_pattern


class ExecuTorchModelIntegrationTest(unittest.TestCase):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)

@slow
@pytest.mark.run_slow
def test_load_model_from_hub(self):
model = ExecuTorchModelForCausalLM.from_pretrained(
model_name_or_path="NousResearch/Llama-3.2-1B",
export=True,
recipe="xnnpack",
)
def test_load_cached_model_from_hub(self):
model_id = "optimum-internal-testing/tiny-random-llama"

model = ExecuTorchModelForCausalLM.from_pretrained(model_id, recipe="xnnpack")
self.assertIsInstance(model, ExecuTorchModelForCausalLM)
self.assertIsInstance(model.model, ExecuTorchModule)

@slow
@pytest.mark.run_slow
def test_load_model_from_local_path(self):
from optimum.exporters.executorch import main_export
def test_load_et_model_from_hub(self):
model_id = "optimum-internal-testing/tiny-random-llama"

model = ExecuTorchModelForCausalLM.from_pretrained(model_id, revision="executorch")
self.assertIsInstance(model, ExecuTorchModelForCausalLM)
self.assertIsInstance(model.model, ExecuTorchModule)

model_id = "NousResearch/Llama-3.2-1B"
task = "text-generation"
model = ExecuTorchModelForCausalLM.from_pretrained(model_id, revision="executorch-subfolder")
self.assertIsInstance(model, ExecuTorchModelForCausalLM)
self.assertIsInstance(model.model, ExecuTorchModule)

def test_load_cached_model_from_local_path(self):
model_id = "optimum-internal-testing/tiny-random-llama"
recipe = "xnnpack"

with tempfile.TemporaryDirectory() as tempdir:
# Export to a local dir
main_export(
model_name_or_path=model_id,
task=task,
recipe=recipe,
output_dir=tempdir,
task="text-generation",
)
self.assertTrue(os.path.exists(f"{tempdir}/model.pte"))

# Load the exported model from a local dir
model = ExecuTorchModelForCausalLM.from_pretrained(
model_name_or_path=tempdir,
export=False,
)
model = ExecuTorchModelForCausalLM.from_pretrained(tempdir)
self.assertIsInstance(model, ExecuTorchModelForCausalLM)
self.assertIsInstance(model.model, ExecuTorchModule)

def test_find_files_matching_pattern(self):
model_id = "optimum-internal-testing/tiny-random-llama"

# hub model
for revision in ("main", "executorch"):
pte_files = find_files_matching_pattern(model_id, pattern=_FILE_PATTERN, revision=revision)
self.assertTrue(len(pte_files) == 0 if revision == "main" else len(pte_files) > 0)

# local model
api = HfApi()
with TemporaryDirectory() as tmpdirname:
for revision in ("main", "executorch"):
local_dir = Path(tmpdirname) / revision
api.snapshot_download(repo_id=model_id, local_dir=local_dir, revision=revision)
pte_files = find_files_matching_pattern(local_dir, pattern=_FILE_PATTERN, revision=revision)
self.assertTrue(len(pte_files) == 0 if revision == "main" else len(pte_files) > 0)
6 changes: 1 addition & 5 deletions tests/models/test_modeling_gemma.py
Original file line number Diff line number Diff line change
@@ -33,11 +33,7 @@ def test_gemma_text_generation_with_xnnpack(self):
# TODO: Switch to use google/gemma-2b once https://github.com/huggingface/optimum/issues/2127 is fixed
# model_id = "google/gemma-2b"
model_id = "weqweasdas/RM-Gemma-2B"
model = ExecuTorchModelForCausalLM.from_pretrained(
model_name_or_path=model_id,
export=True,
recipe="xnnpack",
)
model = ExecuTorchModelForCausalLM.from_pretrained(model_id, recipe="xnnpack")
self.assertIsInstance(model, ExecuTorchModelForCausalLM)
self.assertIsInstance(model.model, ExecuTorchModule)

6 changes: 1 addition & 5 deletions tests/models/test_modeling_gemma2.py
Original file line number Diff line number Diff line change
@@ -33,11 +33,7 @@ def test_gemma2_text_generation_with_xnnpack(self):
# TODO: Switch to use google/gemma-2-2b once https://github.com/huggingface/optimum/issues/2127 is fixed
# model_id = "google/gemma-2-2b"
model_id = "unsloth/gemma-2-2b-it"
model = ExecuTorchModelForCausalLM.from_pretrained(
model_name_or_path=model_id,
export=True,
recipe="xnnpack",
)
model = ExecuTorchModelForCausalLM.from_pretrained(model_id, recipe="xnnpack")
self.assertIsInstance(model, ExecuTorchModelForCausalLM)
self.assertIsInstance(model.model, ExecuTorchModule)

13 changes: 3 additions & 10 deletions tests/models/test_modeling_llama.py
Original file line number Diff line number Diff line change
@@ -33,11 +33,7 @@ def test_llama3_2_1b_text_generation_with_xnnpack(self):
# TODO: Switch to use meta-llama/Llama-3.2-1B once https://github.com/huggingface/optimum/issues/2127 is fixed
# model_id = "lama/Llama-3.2-1B"
model_id = "NousResearch/Llama-3.2-1B"
model = ExecuTorchModelForCausalLM.from_pretrained(
model_name_or_path=model_id,
export=True,
recipe="xnnpack",
)
model = ExecuTorchModelForCausalLM.from_pretrained(model_id, recipe="xnnpack")
self.assertIsInstance(model, ExecuTorchModelForCausalLM)
self.assertIsInstance(model.model, ExecuTorchModule)

@@ -57,11 +53,8 @@ def test_llama3_2_3b_text_generation_with_xnnpack(self):
# TODO: Switch to use meta-llama/Llama-3.2-3B once https://github.com/huggingface/optimum/issues/2127 is fixed
# model_id = "lama/Llama-3.2-3B"
model_id = "NousResearch/Hermes-3-Llama-3.2-3B"
model = ExecuTorchModelForCausalLM.from_pretrained(
model_name_or_path=model_id,
export=True,
recipe="xnnpack",
)
model = ExecuTorchModelForCausalLM.from_pretrained(model_id, recipe="xnnpack")

self.assertIsInstance(model, ExecuTorchModelForCausalLM)
self.assertIsInstance(model.model, ExecuTorchModule)

6 changes: 1 addition & 5 deletions tests/models/test_modeling_olmo.py
Original file line number Diff line number Diff line change
@@ -31,11 +31,7 @@ def __init__(self, *args, **kwargs):
@pytest.mark.run_slow
def test_olmo_text_generation_with_xnnpack(self):
model_id = "allenai/OLMo-1B-hf"
model = ExecuTorchModelForCausalLM.from_pretrained(
model_name_or_path=model_id,
export=True,
recipe="xnnpack",
)
model = ExecuTorchModelForCausalLM.from_pretrained(model_id, recipe="xnnpack")
self.assertIsInstance(model, ExecuTorchModelForCausalLM)
self.assertIsInstance(model.model, ExecuTorchModule)

6 changes: 1 addition & 5 deletions tests/models/test_modeling_qwen2.py
Original file line number Diff line number Diff line change
@@ -31,11 +31,7 @@ def __init__(self, *args, **kwargs):
@pytest.mark.run_slow
def test_qwen2_5_text_generation_with_xnnpack(self):
model_id = "Qwen/Qwen2.5-0.5B"
model = ExecuTorchModelForCausalLM.from_pretrained(
model_name_or_path=model_id,
export=True,
recipe="xnnpack",
)
model = ExecuTorchModelForCausalLM.from_pretrained(model_id, recipe="xnnpack")
self.assertIsInstance(model, ExecuTorchModelForCausalLM)
self.assertIsInstance(model.model, ExecuTorchModule)