MinishLab · Pringled · Sep 29, 2025 · Sep 11, 2025 · Sep 29, 2025
diff --git a/model2vec/hf_utils.py b/model2vec/hf_utils.py
@@ -53,11 +53,19 @@ def save_pretrained(
 
     save_file(model_weights, folder_path / "model.safetensors")
     tokenizer.save(str(folder_path / "tokenizer.json"), pretty=False)
-    json.dump(config, open(folder_path / "config.json", "w"), indent=4)
+
+    # Create a copy of config and add dtype and vocab quantization
+    cfg = dict(config)
+    cfg["embedding_dtype"] = np.dtype(embeddings.dtype).name
+    if mapping is not None:
+        cfg["vocabulary_quantization"] = int(embeddings.shape[0])
+    else:
+        cfg.pop("vocabulary_quantization", None)
+    json.dump(cfg, open(folder_path / "config.json", "w"), indent=4)
 
     # Create modules.json
     modules = [{"idx": 0, "name": "0", "path": ".", "type": "sentence_transformers.models.StaticEmbedding"}]
-    if config.get("normalize"):
+    if cfg.get("normalize"):
         # If normalize=True, add sentence_transformers.models.Normalize
         modules.append({"idx": 1, "name": "1", "path": "1_Normalize", "type": "sentence_transformers.models.Normalize"})
     json.dump(modules, open(folder_path / "modules.json", "w"), indent=4)

diff --git a/model2vec/model.py b/model2vec/model.py
@@ -111,6 +111,17 @@ def normalize(self, value: bool) -> None:
             )
         self.config["normalize"] = value
 
+    @property
+    def embedding_dtype(self) -> str:
+        """Get the dtype (precision) of the embedding matrix."""
+        return np.dtype(self.embedding.dtype).name
+
+    @property
+    def vocabulary_quantization(self) -> int | None:
+        """Get the number of clusters used for vocabulary quantization, if applicable."""
+        is_quantized = (self.token_mapping is not None) or (len(self.embedding) != len(self.tokens))
+        return int(self.embedding.shape[0]) if is_quantized else None
+
     def save_pretrained(self, path: PathLike, model_name: str | None = None, subfolder: str | None = None) -> None:
         """
         Save the pretrained model.
@@ -493,8 +504,6 @@ def quantize_model(
     :return: A new StaticModel with the quantized embeddings.
     :raises: ValueError if the model is already quantized.
     """
-    from model2vec.quantization import quantize_and_reduce_dim
-
     token_mapping: np.ndarray | None
     weights: np.ndarray | None
     if vocabulary_quantization is not None:
@@ -520,7 +529,7 @@ def quantize_model(
     return StaticModel(
         vectors=embeddings,
         tokenizer=model.tokenizer,
-        config=model.config,
+        config=dict(model.config),
         weights=weights,
         token_mapping=token_mapping,
         normalize=model.normalize,

diff --git a/tests/test_model.py b/tests/test_model.py
@@ -180,7 +180,9 @@ def test_load_pretrained(
     # Assert that the loaded model has the same properties as the original one
     np.testing.assert_array_equal(loaded_model.embedding, mock_vectors)
     assert loaded_model.tokenizer.get_vocab() == mock_tokenizer.get_vocab()
-    assert loaded_model.config == mock_config
+    for k, v in mock_config.items():
+        assert loaded_model.config.get(k) == v
+    assert "embedding_dtype" in loaded_model.config
 
 
 def test_load_pretrained_quantized(
@@ -198,19 +200,22 @@ def test_load_pretrained_quantized(
     # Assert that the loaded model has the same properties as the original one
     assert loaded_model.embedding.dtype == np.int8
     assert loaded_model.embedding.shape == mock_vectors.shape
+    assert loaded_model.embedding_dtype == "int8"
 
     # Load the model back from the same path
     loaded_model = StaticModel.from_pretrained(save_path, quantize_to="float16")
 
     # Assert that the loaded model has the same properties as the original one
     assert loaded_model.embedding.dtype == np.float16
     assert loaded_model.embedding.shape == mock_vectors.shape
+    assert loaded_model.embedding_dtype == "float16"
 
     # Load the model back from the same path
     loaded_model = StaticModel.from_pretrained(save_path, quantize_to="float32")
     # Assert that the loaded model has the same properties as the original one
     assert loaded_model.embedding.dtype == np.float32
     assert loaded_model.embedding.shape == mock_vectors.shape
+    assert loaded_model.embedding_dtype == "float32"
 
     # Load the model back from the same path
     loaded_model = StaticModel.from_pretrained(save_path, quantize_to="float64")
@@ -234,15 +239,19 @@ def test_load_pretrained_dim(
     # Assert that the loaded model has the same properties as the original one
     np.testing.assert_array_equal(loaded_model.embedding, mock_vectors[:, :2])
     assert loaded_model.tokenizer.get_vocab() == mock_tokenizer.get_vocab()
-    assert loaded_model.config == mock_config
+    for k, v in mock_config.items():
+        assert loaded_model.config.get(k) == v
+    assert "embedding_dtype" in loaded_model.config
 
     # Load the model back from the same path
     loaded_model = StaticModel.from_pretrained(save_path, dimensionality=None)
 
     # Assert that the loaded model has the same properties as the original one
     np.testing.assert_array_equal(loaded_model.embedding, mock_vectors)
     assert loaded_model.tokenizer.get_vocab() == mock_tokenizer.get_vocab()
-    assert loaded_model.config == mock_config
+    for k, v in mock_config.items():
+        assert loaded_model.config.get(k) == v
+    assert "embedding_dtype" in loaded_model.config
 
     # Load the model back from the same path
     with pytest.raises(ValueError):
@@ -267,6 +276,7 @@ def test_load_pretrained_vocabulary_quantized(
     assert loaded_model.weights is not None
     assert loaded_model.weights.shape == (5,)
     assert loaded_model.token_mapping is not None
+    assert loaded_model.vocabulary_quantization == 3
     assert len(loaded_model.token_mapping) == mock_tokenizer.get_vocab_size()
     assert len(loaded_model.token_mapping) == len(loaded_model.weights)
     assert loaded_model.encode("word1 word2").shape == (2,)