Skip to content

Commit 14f413f

Browse files
Merge pull request #26 from OpenDataServices/short-license-titles
Add short titles for licenses
2 parents 3a453bf + 232298c commit 14f413f

7 files changed

+143
-23
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
"""rename column license name to license title
2+
3+
Revision ID: 8182d8c386f7
4+
Revises: 3499656b84e7
5+
Create Date: 2025-02-25 15:40:01.727396
6+
7+
"""
8+
9+
from typing import Sequence, Union
10+
11+
from alembic import op
12+
13+
14+
# revision identifiers, used by Alembic.
15+
revision: str = "8182d8c386f7"
16+
down_revision: Union[str, None] = "3499656b84e7"
17+
branch_labels: Union[str, Sequence[str], None] = None
18+
depends_on: Union[str, Sequence[str], None] = None
19+
20+
21+
def upgrade() -> None:
22+
# ### commands auto generated by Alembic - please adjust! ###
23+
op.alter_column(
24+
"dataset", "license_name", nullable=True, new_column_name="license_title"
25+
)
26+
# ### end Alembic commands ###
27+
28+
29+
def downgrade() -> None:
30+
# ### commands auto generated by Alembic - please adjust! ###
31+
op.alter_column(
32+
"dataset", "license_title", nullable=True, new_column_name="license_name"
33+
)
34+
# ### end Alembic commands ###
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
"""add license_title_short column
2+
3+
Revision ID: ebb26242c904
4+
Revises: 8182d8c386f7
5+
Create Date: 2025-02-25 15:47:03.169950
6+
7+
"""
8+
9+
from typing import Sequence, Union
10+
11+
from alembic import op
12+
import sqlalchemy as sa
13+
14+
15+
# revision identifiers, used by Alembic.
16+
revision: str = "ebb26242c904"
17+
down_revision: Union[str, None] = "8182d8c386f7"
18+
branch_labels: Union[str, Sequence[str], None] = None
19+
depends_on: Union[str, Sequence[str], None] = None
20+
21+
22+
def upgrade() -> None:
23+
# ### commands auto generated by Alembic - please adjust! ###
24+
op.add_column(
25+
"dataset", sa.Column("license_title_short", sa.String(), nullable=True)
26+
)
27+
op.alter_column(
28+
"dataset", "license_title", existing_type=sa.VARCHAR(), nullable=True
29+
)
30+
# ### end Alembic commands ###
31+
32+
33+
def downgrade() -> None:
34+
# ### commands auto generated by Alembic - please adjust! ###
35+
op.alter_column(
36+
"dataset", "license_title", existing_type=sa.VARCHAR(), nullable=False
37+
)
38+
op.drop_column("dataset", "license_title_short")
39+
# ### end Alembic commands ###

oc4ids_datastore_pipeline/database.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,8 @@ class Dataset(Base):
2929
source_url: Mapped[str] = mapped_column(String)
3030
publisher_name: Mapped[str] = mapped_column(String)
3131
license_url: Mapped[Optional[str]] = mapped_column(String, nullable=True)
32-
license_name: Mapped[Optional[str]] = mapped_column(String, nullable=True)
32+
license_title: Mapped[Optional[str]] = mapped_column(String, nullable=True)
33+
license_title_short: Mapped[Optional[str]] = mapped_column(String, nullable=True)
3334
json_url: Mapped[Optional[str]] = mapped_column(String, nullable=True)
3435
csv_url: Mapped[Optional[str]] = mapped_column(String, nullable=True)
3536
xlsx_url: Mapped[Optional[str]] = mapped_column(String, nullable=True)

oc4ids_datastore_pipeline/pipeline.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
from oc4ids_datastore_pipeline.notifications import send_notification
1919
from oc4ids_datastore_pipeline.registry import (
2020
fetch_registered_datasets,
21-
get_license_name_from_url,
21+
get_license_title_from_url,
2222
)
2323
from oc4ids_datastore_pipeline.storage import delete_files_for_dataset, upload_files
2424

@@ -108,13 +108,16 @@ def save_dataset_metadata(
108108
try:
109109
publisher_name = json_data.get("publisher", {}).get("name", "")
110110
license_url = json_data.get("license", None)
111-
license_name = get_license_name_from_url(license_url) if license_url else None
111+
license_title, license_title_short = (
112+
get_license_title_from_url(license_url) if license_url else (None, None)
113+
)
112114
dataset = Dataset(
113115
dataset_id=dataset_id,
114116
source_url=source_url,
115117
publisher_name=publisher_name,
116118
license_url=license_url,
117-
license_name=license_name,
119+
license_title=license_title,
120+
license_title_short=license_title_short,
118121
json_url=json_url,
119122
csv_url=csv_url,
120123
xlsx_url=xlsx_url,

oc4ids_datastore_pipeline/registry.py

+9-5
Original file line numberDiff line numberDiff line change
@@ -31,15 +31,18 @@ def fetch_registered_datasets() -> dict[str, str]:
3131
return registered_datasets
3232

3333

34-
def fetch_license_mappings() -> dict[str, str]:
34+
def fetch_license_mappings() -> dict[str, dict[str, Optional[str]]]:
3535
logger.info("Fetching license mappings from registry")
3636
try:
3737
url = "https://opendataservices.github.io/oc4ids-registry/datatig/type/license/records_api.json" # noqa: E501
3838
r = requests.get(url)
3939
r.raise_for_status()
4040
json_data = r.json()
4141
return {
42-
urls["fields"]["url"]["value"]: license["fields"]["title"]["value"]
42+
urls["fields"]["url"]["value"]: {
43+
"title": license["fields"]["title"]["value"],
44+
"title_short": license["fields"]["title_short"]["value"],
45+
}
4346
for license in json_data["records"].values()
4447
for urls in license["fields"]["urls"]["values"]
4548
}
@@ -50,10 +53,11 @@ def fetch_license_mappings() -> dict[str, str]:
5053
return {}
5154

5255

53-
def get_license_name_from_url(
56+
def get_license_title_from_url(
5457
url: str, force_refresh: Optional[bool] = False
55-
) -> Optional[str]:
58+
) -> tuple[Optional[str], Optional[str]]:
5659
global _license_mappings
5760
if force_refresh or (_license_mappings is None):
5861
_license_mappings = fetch_license_mappings()
59-
return _license_mappings.get(url, None)
62+
license_titles = _license_mappings.get(url, {})
63+
return license_titles.get("title"), license_titles.get("title_short")

pyproject.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ build-backend = "flit_core.buildapi"
55
[project]
66
name = "oc4ids-datastore-pipeline"
77
description = "OC4IDS Datastore Pipeline"
8-
version = "0.2.0"
8+
version = "0.3.0"
99
readme = "README.md"
1010
dependencies = [
1111
"alembic",

tests/test_registry.py

+52-13
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from oc4ids_datastore_pipeline.registry import (
77
fetch_license_mappings,
88
fetch_registered_datasets,
9-
get_license_name_from_url,
9+
get_license_title_from_url,
1010
)
1111

1212

@@ -59,6 +59,7 @@ def test_fetch_license_mappings(mocker: MockerFixture) -> None:
5959
"license_1": {
6060
"fields": {
6161
"title": {"value": "License 1"},
62+
"title_short": {"value": "L1"},
6263
"urls": {
6364
"values": [
6465
{
@@ -80,6 +81,7 @@ def test_fetch_license_mappings(mocker: MockerFixture) -> None:
8081
"license_2": {
8182
"fields": {
8283
"title": {"value": "License 2"},
84+
"title_short": {"value": "L2"},
8385
"urls": {
8486
"values": [
8587
{
@@ -99,9 +101,18 @@ def test_fetch_license_mappings(mocker: MockerFixture) -> None:
99101
result = fetch_license_mappings()
100102

101103
assert result == {
102-
"https://license_1.com/license": "License 1",
103-
"https://license_1.com/different_url": "License 1",
104-
"https://license_2.com/license": "License 2",
104+
"https://license_1.com/license": {
105+
"title": "License 1",
106+
"title_short": "L1",
107+
},
108+
"https://license_1.com/different_url": {
109+
"title": "License 1",
110+
"title_short": "L1",
111+
},
112+
"https://license_2.com/license": {
113+
"title": "License 2",
114+
"title_short": "L2",
115+
},
105116
}
106117

107118

@@ -116,32 +127,60 @@ def test_fetch_license_mappings_catches_exception(
116127
assert result == {}
117128

118129

119-
def test_get_license_name_from_url(mocker: MockerFixture) -> None:
130+
def test_get_license_title_from_url(mocker: MockerFixture) -> None:
120131
patch_license_mappings = mocker.patch(
121132
"oc4ids_datastore_pipeline.registry.fetch_license_mappings"
122133
)
123134
patch_license_mappings.return_value = {
124-
"https://license_1.com/license": "License 1",
125-
"https://license_2.com/license": "License 2",
135+
"https://license_1.com/license": {
136+
"title": "License 1",
137+
"title_short": "L1",
138+
},
139+
"https://license_2.com/license": {
140+
"title": "License 2",
141+
"title_short": "L2",
142+
},
126143
}
127144

128-
license_name = get_license_name_from_url(
145+
license_title = get_license_title_from_url(
129146
"https://license_2.com/license", force_refresh=True
130147
)
131148

132-
assert license_name == "License 2"
149+
assert license_title == ("License 2", "L2")
133150

134151

135-
def test_get_license_name_from_url_not_in_mapping(mocker: MockerFixture) -> None:
152+
def test_get_license_title_from_url_not_in_mapping(mocker: MockerFixture) -> None:
136153
patch_license_mappings = mocker.patch(
137154
"oc4ids_datastore_pipeline.registry.fetch_license_mappings"
138155
)
139156
patch_license_mappings.return_value = {
140-
"https://license_1.com/license": "License 1",
157+
"https://license_1.com/license": {
158+
"title": "License 1",
159+
"title_short": "L1",
160+
},
141161
}
142162

143-
license_name = get_license_name_from_url(
163+
license_title = get_license_title_from_url(
144164
"https://license_2.com/license", force_refresh=True
145165
)
146166

147-
assert license_name is None
167+
assert license_title == (None, None)
168+
169+
170+
def test_get_license_name_from_url_short_name_not_in_mapping(
171+
mocker: MockerFixture,
172+
) -> None:
173+
patch_license_mappings = mocker.patch(
174+
"oc4ids_datastore_pipeline.registry.fetch_license_mappings"
175+
)
176+
patch_license_mappings.return_value = {
177+
"https://license_2.com/license": {
178+
"title": "License 2",
179+
},
180+
}
181+
182+
license_title = get_license_title_from_url(
183+
"https://license_2.com/license", force_refresh=True
184+
)
185+
186+
assert license_title == ("License 2", None)

0 commit comments

Comments
 (0)