Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions model2vec/hf_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,11 +53,19 @@ def save_pretrained(

save_file(model_weights, folder_path / "model.safetensors")
tokenizer.save(str(folder_path / "tokenizer.json"), pretty=False)
json.dump(config, open(folder_path / "config.json", "w"), indent=4)

# Create a copy of config and add dtype and vocab quantization
cfg = dict(config)
cfg["embedding_dtype"] = np.dtype(embeddings.dtype).name
if mapping is not None:
cfg["vocabulary_quantization"] = int(embeddings.shape[0])
else:
cfg.pop("vocabulary_quantization", None)
json.dump(cfg, open(folder_path / "config.json", "w"), indent=4)

# Create modules.json
modules = [{"idx": 0, "name": "0", "path": ".", "type": "sentence_transformers.models.StaticEmbedding"}]
if config.get("normalize"):
if cfg.get("normalize"):
# If normalize=True, add sentence_transformers.models.Normalize
modules.append({"idx": 1, "name": "1", "path": "1_Normalize", "type": "sentence_transformers.models.Normalize"})
json.dump(modules, open(folder_path / "modules.json", "w"), indent=4)
Expand Down
15 changes: 12 additions & 3 deletions model2vec/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,17 @@ def normalize(self, value: bool) -> None:
)
self.config["normalize"] = value

@property
def embedding_dtype(self) -> str:
"""Get the dtype (precision) of the embedding matrix."""
return np.dtype(self.embedding.dtype).name

@property
def vocabulary_quantization(self) -> int | None:
"""Get the number of clusters used for vocabulary quantization, if applicable."""
is_quantized = (self.token_mapping is not None) or (len(self.embedding) != len(self.tokens))
return int(self.embedding.shape[0]) if is_quantized else None

def save_pretrained(self, path: PathLike, model_name: str | None = None, subfolder: str | None = None) -> None:
"""
Save the pretrained model.
Expand Down Expand Up @@ -493,8 +504,6 @@ def quantize_model(
:return: A new StaticModel with the quantized embeddings.
:raises: ValueError if the model is already quantized.
"""
from model2vec.quantization import quantize_and_reduce_dim

token_mapping: np.ndarray | None
weights: np.ndarray | None
if vocabulary_quantization is not None:
Expand All @@ -520,7 +529,7 @@ def quantize_model(
return StaticModel(
vectors=embeddings,
tokenizer=model.tokenizer,
config=model.config,
config=dict(model.config),
weights=weights,
token_mapping=token_mapping,
normalize=model.normalize,
Expand Down
16 changes: 13 additions & 3 deletions tests/test_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,9 @@ def test_load_pretrained(
# Assert that the loaded model has the same properties as the original one
np.testing.assert_array_equal(loaded_model.embedding, mock_vectors)
assert loaded_model.tokenizer.get_vocab() == mock_tokenizer.get_vocab()
assert loaded_model.config == mock_config
for k, v in mock_config.items():
assert loaded_model.config.get(k) == v
assert "embedding_dtype" in loaded_model.config


def test_load_pretrained_quantized(
Expand All @@ -198,19 +200,22 @@ def test_load_pretrained_quantized(
# Assert that the loaded model has the same properties as the original one
assert loaded_model.embedding.dtype == np.int8
assert loaded_model.embedding.shape == mock_vectors.shape
assert loaded_model.embedding_dtype == "int8"

# Load the model back from the same path
loaded_model = StaticModel.from_pretrained(save_path, quantize_to="float16")

# Assert that the loaded model has the same properties as the original one
assert loaded_model.embedding.dtype == np.float16
assert loaded_model.embedding.shape == mock_vectors.shape
assert loaded_model.embedding_dtype == "float16"

# Load the model back from the same path
loaded_model = StaticModel.from_pretrained(save_path, quantize_to="float32")
# Assert that the loaded model has the same properties as the original one
assert loaded_model.embedding.dtype == np.float32
assert loaded_model.embedding.shape == mock_vectors.shape
assert loaded_model.embedding_dtype == "float32"

# Load the model back from the same path
loaded_model = StaticModel.from_pretrained(save_path, quantize_to="float64")
Expand All @@ -234,15 +239,19 @@ def test_load_pretrained_dim(
# Assert that the loaded model has the same properties as the original one
np.testing.assert_array_equal(loaded_model.embedding, mock_vectors[:, :2])
assert loaded_model.tokenizer.get_vocab() == mock_tokenizer.get_vocab()
assert loaded_model.config == mock_config
for k, v in mock_config.items():
assert loaded_model.config.get(k) == v
assert "embedding_dtype" in loaded_model.config

# Load the model back from the same path
loaded_model = StaticModel.from_pretrained(save_path, dimensionality=None)

# Assert that the loaded model has the same properties as the original one
np.testing.assert_array_equal(loaded_model.embedding, mock_vectors)
assert loaded_model.tokenizer.get_vocab() == mock_tokenizer.get_vocab()
assert loaded_model.config == mock_config
for k, v in mock_config.items():
assert loaded_model.config.get(k) == v
assert "embedding_dtype" in loaded_model.config

# Load the model back from the same path
with pytest.raises(ValueError):
Expand All @@ -267,6 +276,7 @@ def test_load_pretrained_vocabulary_quantized(
assert loaded_model.weights is not None
assert loaded_model.weights.shape == (5,)
assert loaded_model.token_mapping is not None
assert loaded_model.vocabulary_quantization == 3
assert len(loaded_model.token_mapping) == mock_tokenizer.get_vocab_size()
assert len(loaded_model.token_mapping) == len(loaded_model.weights)
assert loaded_model.encode("word1 word2").shape == (2,)
Expand Down