Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ repos:
hooks:
- id: pydoclint
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.4.10
rev: v0.13.0
hooks:
- id: ruff
args: [ --fix ]
Expand Down
4 changes: 1 addition & 3 deletions model2vec/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from logging import getLogger
from pathlib import Path
from tempfile import TemporaryDirectory
from typing import Any, Iterator, Sequence, Union, cast, overload
from typing import Any, Iterator, Sequence, Union, overload

import numpy as np
from joblib import delayed
Expand Down Expand Up @@ -493,8 +493,6 @@ def quantize_model(
:return: A new StaticModel with the quantized embeddings.
:raises: ValueError if the model is already quantized.
"""
from model2vec.quantization import quantize_and_reduce_dim

token_mapping: np.ndarray | None
weights: np.ndarray | None
if vocabulary_quantization is not None:
Expand Down
6 changes: 1 addition & 5 deletions model2vec/utils.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,14 @@
# -*- coding: utf-8 -*-
from __future__ import annotations

import json
import logging
import re
from importlib import import_module
from importlib.metadata import metadata
from pathlib import Path
from typing import Any, Iterator, Protocol, cast
from typing import Any, Iterator, Protocol

import numpy as np
import safetensors
from joblib import Parallel
from tokenizers import Tokenizer
from tqdm import tqdm

logger = logging.getLogger(__name__)
Expand Down
3 changes: 1 addition & 2 deletions model2vec/vocabulary_quantization.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,7 @@
from sklearn.cluster import KMeans
except ImportError:
raise ImportError(
"scikit-learn is required for quantizing the vocabulary. "
"Please install model2vec with the quantization extra."
"scikit-learn is required for quantizing the vocabulary. Please install model2vec with the quantization extra."
)


Expand Down
4 changes: 3 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,8 @@ select = [
"C90",
# Pydocstyle: Enforce docstrings
"D",
# Remove unused imports
"F",
# Isort: Enforce import order
"I",
# Numpy: Enforce numpy style
Expand All @@ -96,7 +98,7 @@ select = [

ignore = [
# Allow self and cls to be untyped, and allow Any type
"ANN101", "ANN102", "ANN401",
"ANN001", "ANN002", "ANN401",
# Pydocstyle ignores
"D100", "D101", "D104", "D203", "D212", "D401",
# Allow use of f-strings in logging
Expand Down