Skip to content

Commit

Permalink
chore: added entry point to poetry config
Browse files Browse the repository at this point in the history
  • Loading branch information
davidberenstein1957 committed Jun 19, 2023
1 parent a1a726e commit 3353db7
Show file tree
Hide file tree
Showing 4 changed files with 85 additions and 71 deletions.
12 changes: 0 additions & 12 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,6 @@ repos:
exclude_types: [text, jupyter]
- id: trailing-whitespace

- repo: https://github.com/Lucas-C/pre-commit-hooks
rev: v1.5.1
hooks:
- id: insert-license
name: "Insert license header in Python source files"
files: \.py$
args:
- --license-filepath
- src/license_header.txt
- --fuzzy-match-generates-todo
# - --remove-header

- repo: https://github.com/psf/black
rev: 23.3.0
hooks:
Expand Down
1 change: 0 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ To use spaCy with SetFit use the following code:

```python
import spacy
import spacy_setfit

# Create some example data
train_dataset = {
Expand Down
8 changes: 7 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "spacy-setfit"
version = "0.1"
version = "0.1.2"
description = ""
authors = ["davidberenstein1957 <[email protected]>"]
readme = "README.md"
Expand All @@ -12,6 +12,11 @@ spacy = "^3.5.3"
setfit = "^0.7.0"
rich = "^13.4.2"

[tool.poetry.plugins]

[tool.poetry.plugins."spacy_factories"]
"spacy" = "spacy_setfit.__init__:create_setfit_model"

[tool.poetry.group.dev.dependencies]
pytest = "^7.3.2"
ruff = "^0.0.272"
Expand All @@ -21,3 +26,4 @@ pre-commit = "^3.3.3"
[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"

135 changes: 78 additions & 57 deletions tests/test_pipeline.py
Original file line number Diff line number Diff line change
@@ -1,65 +1,82 @@

import pytest # noqa
import spacy_setfit # noqa
import pytest # noqa
import pickle


def test_multi_label(nlp, dataset_multi_label): # noqa
nlp.add_pipe("text_categorizer", config={
"pretrained_model_name_or_path": "paraphrase-MiniLM-L3-v2",
"setfit_trainer_args": {
"train_dataset": dataset_multi_label,
"num_iterations": 1
}
})

def test_single_label(nlp, dataset_single_label): # noqa
nlp.add_pipe("text_categorizer", config={
"pretrained_model_name_or_path": "paraphrase-MiniLM-L3-v2",
"setfit_trainer_args": {
"train_dataset": dataset_single_label,
"num_iterations": 1
}
})
def test_multi_label(nlp, dataset_multi_label): # noqa
nlp.add_pipe(
"text_categorizer",
config={
"pretrained_model_name_or_path": "sentence-transformers/paraphrase-MiniLM-L3-v2",
"setfit_trainer_args": {
"train_dataset": dataset_multi_label,
"num_iterations": 1,
},
},
)


def test_single_label(nlp, dataset_single_label): # noqa
nlp.add_pipe(
"text_categorizer",
config={
"pretrained_model_name_or_path": "paraphrase-MiniLM-L3-v2",
"setfit_trainer_args": {
"train_dataset": dataset_single_label,
"num_iterations": 1,
},
},
)
doc = nlp("I really need to get a new sofa.")
doc.cats

def test_alternative_trainer_args(nlp, dataset_single_label): # noqa
nlp.add_pipe("text_categorizer", config={
"pretrained_model_name_or_path": "paraphrase-MiniLM-L3-v2",
"setfit_trainer_args": {
"train_dataset": dataset_single_label,
"eval_dataset": dataset_single_label,
"num_iterations": 1,
}
})

def test_model_without_model_args(nlp, dataset_single_label): # noqa
nlp.add_pipe("text_categorizer", config={
"setfit_trainer_args": {
"train_dataset": dataset_single_label,
"num_iterations": 1,
}
})

def test_model_with_model_args(nlp, dataset_single_label): # noqa
nlp.add_pipe("text_categorizer", config={
"setfit_trainer_args": {
"train_dataset": dataset_single_label,
"num_iterations": 1,

def test_alternative_trainer_args(nlp, dataset_single_label): # noqa
nlp.add_pipe(
"text_categorizer",
config={
"pretrained_model_name_or_path": "paraphrase-MiniLM-L3-v2",
"setfit_trainer_args": {
"train_dataset": dataset_single_label,
"eval_dataset": dataset_single_label,
"num_iterations": 1,
},
},
"setfit_from_pretrained_args": {
"force_download": True
}
})
)


def test_model_without_model_args(nlp, dataset_single_label): # noqa
nlp.add_pipe(
"text_categorizer",
config={
"setfit_trainer_args": {
"train_dataset": dataset_single_label,
"num_iterations": 1,
}
},
)


def test_model_with_model_args(nlp, dataset_single_label): # noqa
nlp.add_pipe(
"text_categorizer",
config={
"setfit_trainer_args": {
"train_dataset": dataset_single_label,
"num_iterations": 1,
},
"setfit_from_pretrained_args": {"force_download": True},
},
)


def test_save_load_pickle(nlp, dataset_single_label):
nlp.add_pipe("text_categorizer", config={
"pretrained_model_name_or_path": "paraphrase-MiniLM-L3-v2",
"setfit_trainer_args": {
"train_dataset": dataset_single_label
}
})
nlp.add_pipe(
"text_categorizer",
config={
"pretrained_model_name_or_path": "paraphrase-MiniLM-L3-v2",
"setfit_trainer_args": {"train_dataset": dataset_single_label},
},
)
doc = nlp("I really need to get a new sofa.")
doc.cats

Expand All @@ -73,9 +90,13 @@ def test_save_load_pickle(nlp, dataset_single_label):

doc = nlp("I really need to get a new sofa.")


def test_without_train_args(nlp):
with pytest.raises(Exception):
nlp.add_pipe("text_categorizer", config={
"pretrained_model_name_or_path": "paraphrase-MiniLM-L3-v2",
})
nlp("I really need to get a new sofa.")
nlp.add_pipe(
"text_categorizer",
config={
"pretrained_model_name_or_path": "paraphrase-MiniLM-L3-v2",
},
)
nlp("I really need to get a new sofa.")

0 comments on commit 3353db7

Please sign in to comment.