Skip to content

Commit 8abb44a

Browse files
authored
Upload uas (#115)
* restore tests from old linehaul * parse some new User-Agents * apply black formatting * test deps * only run the fast tests in CI * bump version, touch up classifier
1 parent 1b6150d commit 8abb44a

36 files changed

+1182
-4
lines changed

.github/workflows/test.yml

+2-2
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,6 @@ jobs:
1616
- name: Install dependencies
1717
run: python -m pip install -r requirements.txt
1818
- name: Install test dependencies
19-
run: python -m pip install pytest pretend
19+
run: python -m pip install pytest pretend hypothesis pyaml
2020
- name: Test
21-
run: python -m pytest
21+
run: python -m pytest test_functions.py tests/unit

linehaul/ua/parser.py

+39
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,45 @@ def PEP381ClientUserAgent(*, version):
158158
return {"installer": {"name": "pep381client", "version": version}}
159159

160160

161+
@_parser.register
162+
@regex_ua_parser(r"^maturin/(?P<version>\S+)$")
163+
def MaturinUserAgent(*, version):
164+
return {"installer": {"name": "maturin", "version": version}}
165+
166+
167+
@_parser.register
168+
@regex_ua_parser(r"^pdm/(?P<version>\S+) (?P<impl_name>\S+)/(?P<impl_version>\S+)$")
169+
def PDMUserAgent(*, version, impl_name, impl_version):
170+
return {
171+
"installer": {"name": "pdm", "version": version},
172+
"implementation": {"name": impl_name, "version": impl_version},
173+
}
174+
175+
176+
@_parser.register
177+
@regex_ua_parser(
178+
r"^poetry/(?P<version>\S+) (?P<impl_name>\S+)/(?P<impl_version>\S+) "
179+
r"(?P<system_name>\S+)/(?P<system_release>\S+)?$"
180+
)
181+
def PoetryUserAgent(*, version, impl_name, impl_version, system_name, system_release):
182+
return {
183+
"installer": {"name": "poetry", "version": version},
184+
"implementation": {"name": impl_name, "version": impl_version},
185+
"system": {"name": system_name, "release": system_release},
186+
}
187+
188+
189+
@_parser.register
190+
@regex_ua_parser(
191+
r"^twine/(?P<version>\S+)(?: .+)? (?P<impl_name>\S+)/(?P<impl_version>\S+)$"
192+
)
193+
def TwineUserAgent(*, version, impl_name, impl_version):
194+
return {
195+
"installer": {"name": "twine", "version": version},
196+
"implementation": {"name": impl_name, "version": impl_version},
197+
}
198+
199+
161200
# TODO: We should probably consider not parsing this specially, and moving it to
162201
# just the same as we treat browsers, since we don't really know anything
163202
# about it-- including whether or not the version of Python mentioned is

pyproject.toml

+1-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "linehaul"
3-
version = "1.0.0"
3+
version = "1.0.1"
44
description = "User-Agent parsing for PyPI analytics"
55

66
readme = "README.md"
@@ -15,7 +15,6 @@ authors = [
1515
classifiers = [
1616
"Development Status :: 5 - Production/Stable",
1717
"Intended Audience :: Developers",
18-
"Topic :: Software Development :: Build Tools",
1918
"License :: OSI Approved :: Apache Software License",
2019
"Programming Language :: Python :: 3",
2120
"Programming Language :: Python :: 3.11",

tests/__init__.py

+11
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# Licensed under the Apache License, Version 2.0 (the "License");
2+
# you may not use this file except in compliance with the License.
3+
# You may obtain a copy of the License at
4+
#
5+
# http://www.apache.org/licenses/LICENSE-2.0
6+
#
7+
# Unless required by applicable law or agreed to in writing, software
8+
# distributed under the License is distributed on an "AS IS" BASIS,
9+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10+
# See the License for the specific language governing permissions and
11+
# limitations under the License.

tests/conftest.py

+29
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
# Licensed under the Apache License, Version 2.0 (the "License");
2+
# you may not use this file except in compliance with the License.
3+
# You may obtain a copy of the License at
4+
#
5+
# http://www.apache.org/licenses/LICENSE-2.0
6+
#
7+
# Unless required by applicable law or agreed to in writing, software
8+
# distributed under the License is distributed on an "AS IS" BASIS,
9+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10+
# See the License for the specific language governing permissions and
11+
# limitations under the License.
12+
13+
import os.path
14+
15+
import pytest
16+
17+
18+
def pytest_collection_modifyitems(items):
19+
for item in items:
20+
if not hasattr(item, "module"): # e.g.: DoctestTextfile
21+
continue
22+
23+
module_path = os.path.relpath(
24+
item.module.__file__, os.path.commonprefix([__file__, item.module.__file__])
25+
)
26+
27+
module_root_dir = module_path.split(os.pathsep)[0]
28+
if module_root_dir.startswith("unit"):
29+
item.add_marker(pytest.mark.unit)

tests/strategies.py

+189
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,189 @@
1+
# Licensed under the Apache License, Version 2.0 (the "License");
2+
# you may not use this file except in compliance with the License.
3+
# You may obtain a copy of the License at
4+
#
5+
# http://www.apache.org/licenses/LICENSE-2.0
6+
#
7+
# Unless required by applicable law or agreed to in writing, software
8+
# distributed under the License is distributed on an "AS IS" BASIS,
9+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10+
# See the License for the specific language governing permissions and
11+
# limitations under the License.
12+
13+
from hypothesis import strategies as st
14+
15+
16+
INF = float("inf")
17+
18+
19+
@st.composite
20+
def line_delimited_data(draw, max_line_size, min_lines=1):
21+
n = draw(max_line_size)
22+
data = st.binary(min_size=1, max_size=n).filter(lambda d: b"\n" not in d)
23+
lines = draw(
24+
st.lists(data, min_size=min_lines).filter(
25+
lambda l: sum(map(len, l)) + len(l) <= n
26+
)
27+
)
28+
return b"\n".join(lines) + b"\n"
29+
30+
31+
@st.composite
32+
def chunked(draw, source):
33+
data = draw(source)
34+
35+
chunk_sizes = [0]
36+
chunk_sizes += draw(
37+
st.lists(st.integers(0, len(data) - 1), unique=True).map(sorted)
38+
)
39+
chunk_sizes += [len(data)]
40+
41+
return [data[u:v] for u, v in zip(chunk_sizes, chunk_sizes[1:])]
42+
43+
44+
def _none_for_inf(v):
45+
if v is INF:
46+
return None
47+
return v
48+
49+
50+
@st.composite
51+
def version(draw, min_digits=1, max_digits=None, min_version=None, max_version=None):
52+
min_version_digits = None if min_version is None else len(min_version.split("."))
53+
max_version_digits = None if max_version is None else len(max_version.split("."))
54+
55+
if min_digits < 1:
56+
raise ValueError("Minimum digits must be >= 1")
57+
if max_digits is None:
58+
# To determine our maximum number of digits, we're going to take the larger of
59+
# our default of 10 greater than the minimum, or the number of digits in the min
60+
# and max versions.
61+
max_digits = max(
62+
filter(None, [min_digits + 10, min_version_digits, max_version_digits])
63+
)
64+
if min_digits > max_digits:
65+
raise ValueError("Maximum digits must be greater than the minimum digits.")
66+
if min_version_digits is not None and min_version_digits > max_digits:
67+
raise ValueError(
68+
"Cannot have a minimum version with more digits than the maximum number "
69+
"of digits."
70+
)
71+
if max_version_digits is not None and max_version_digits > max_digits:
72+
raise ValueError(
73+
"Cannot have a maximum version with more digits than the maximum number "
74+
"of digits."
75+
)
76+
77+
num_digits = draw(st.integers(min_value=min_digits, max_value=max_digits))
78+
79+
if min_version is not None:
80+
min_version = [int(i) for i in min_version.split(".")]
81+
else:
82+
min_version = [0]
83+
84+
# We need to pad out the minimum version so that it matches our number of digits.
85+
min_version += [0 for _ in range(num_digits - len(min_version))]
86+
87+
if max_version is not None:
88+
# If we were given a max range, than we want to pad it out to zeros to match
89+
# the number of digits we're trying to generate.
90+
max_version = [int(i) for i in max_version.split(".")]
91+
max_version += [0 for _ in range(num_digits - len(max_version))]
92+
else:
93+
# If we were not given a max range, we want to have an infinte top end.
94+
max_version = [INF] * num_digits
95+
96+
if min_version > max_version:
97+
raise ValueError("The mininum version *MUST* be less than the maximum version.")
98+
99+
# The very first version strategy we can have, is simply matching whatever the
100+
# mininum version is.
101+
version_strategies = [st.tuples(*[st.just(i) for i in min_version])]
102+
103+
# Now we have to build up a list of possible versions besides our basic one.
104+
while min_version:
105+
# We're going to start with incrementing the rightmost digit in our version.
106+
incrementing_part = min_version.pop()
107+
108+
# If the number of digits we would require to handle a version that is
109+
# larger than this mininum version is greater than the number of digits
110+
# we're trying to generate in a version, then we'll skip it and move onto
111+
# the next one.
112+
# Note: We add one to this to account for the incrementing_part that we removed
113+
# from this list earlier.
114+
if len(min_version) + 1 > num_digits:
115+
continue
116+
117+
# We're going to be making a version that has the same prefix as min_version,
118+
# but the incrementing part is one higher. If doing that would make the version
119+
# number we're just about to generate greater than our maximum version, then
120+
# we'll break out of the loop. Any further incrementing will continue to be
121+
# too large of a version number.
122+
if min_version + [incrementing_part + 1] > max_version[: len(min_version) + 1]:
123+
break
124+
125+
# We're going to limit our generated version by the right most digit in our
126+
# maximum version.
127+
max_incrementing_part = max_version[len(min_version)]
128+
129+
# Build up a parts that is all of the preceding digits, sans the final
130+
# digit, e.g. if our minimum version is 1.5.6.0, then we want 1, 5, 6.
131+
# We know this is safe with the maximum version, becasue if it wasn't, then
132+
# we would have bailed out earlier.
133+
parts = [st.just(i) for i in min_version]
134+
135+
# If there are any values where the incrementing part will *always* mean that
136+
# any version number we generate, no matter what gets generated for the padded
137+
# versions, then we'll create strategies to deal with those first.
138+
if min_version + [incrementing_part + 1] < max_version[: len(min_version) + 1]:
139+
# if incrementing_part + 1 < max_incrementing_part:
140+
if (
141+
max_incrementing_part is INF
142+
or min_version != max_version[: len(min_version)]
143+
):
144+
max_incr_value = None
145+
else:
146+
max_incr_value = max_incrementing_part - 1
147+
subparts = [
148+
st.integers(min_value=incrementing_part + 1, max_value=max_incr_value)
149+
]
150+
151+
# At this part, we know we can just blindly generate any padding we want,
152+
# because our leading digits will ensure that we are *always* less than
153+
# our maximum version.
154+
# Note: We have to subtract an extra 1 from our number of needed parts to
155+
# complete our padding, because of the one we generated above.
156+
subparts += [
157+
st.integers(min_value=0) for _ in range(num_digits - len(parts) - 1)
158+
]
159+
160+
# Now we're going to create a hypothesis tuple from our prefix parts, and
161+
# our subparts, and add it to our list of strategies to try.
162+
version_strategies.append(st.tuples(*parts + subparts))
163+
164+
# Finally, we will generate a strategy that sets the incrementing part and all
165+
# padded parts maximum value to be equal to the maximum value for that part in
166+
# our maximum value. The only special case here is that Infinity values in our
167+
# maximum values need to be translated to None for hypothesis. We need one
168+
# special case here, if our max_incrementing_part is inf, then this case should
169+
# already have been handled up above.
170+
if (
171+
max_incrementing_part is not INF
172+
and min_version == max_version[: len(min_version)]
173+
):
174+
parts += [st.just(max_incrementing_part)]
175+
176+
parts += [
177+
st.integers(min_value=0, max_value=_none_for_inf(max_version[i]))
178+
for i in range(len(parts), num_digits)
179+
]
180+
181+
# Create a hypothesis tuple from our parts, and add it to our list of
182+
# strategies to try.
183+
version_strategies.append(st.tuples(*parts))
184+
185+
version = draw(st.one_of(version_strategies))
186+
187+
# Now that we have a list of version strategies, we'll draw from one of those
188+
# possible strategies, and join the parts together to create a verison number.
189+
return ".".join(str(i) for i in version)

0 commit comments

Comments
 (0)