Skip to content

Commit d5f8752

Browse files
committed
Merge branch '513_attrib_from_spdx' into develop
2 parents e2110d2 + 6fe7856 commit d5f8752

File tree

8 files changed

+305
-52
lines changed

8 files changed

+305
-52
lines changed

CHANGELOG.rst

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,16 @@
22
Changelog
33

44
2023-xx-xx
5-
Release 10.0.1
5+
Release 10.1.0
66

77
* Fixed `transform` with nested list #531
88
* Added curl dependency in Dockerfile #532
9+
* Introduce spdx_license_expression
10+
* Ability to transform spdx license key from spdx_license_expression to
11+
license_expression (i.e. Generate attribution with
12+
spdx_license_expression) #513
13+
* Ability to configure the proxy settings #533
14+
* Fixed licenses issue #534
915

1016
2023-08-20
1117
Release 10.0.0

docs/source/reference.rst

Lines changed: 73 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -83,8 +83,8 @@ Options
8383
Purpose
8484
-------
8585

86-
Generate an attribution file which contains license information
87-
from the INPUT along with the license text.
86+
Generate an attribution file which contains license information from the INPUT
87+
along with the license text.
8888

8989
Assume the following:
9090

@@ -421,6 +421,60 @@ Details
421421
This option tells the tool to show all errors found.
422422
The default behavior will only show 'CRITICAL', 'ERROR', and 'WARNING'
423423
424+
Special Notes
425+
-------------
426+
If the input contains values for license_file, the tool will attempt to
427+
associate the license_file with the corresponding license_key.
428+
429+
sample.csv
430+
431+
+----------------+------+---------------------+--------------+
432+
| about_resource | name | license_expression | license_file |
433+
+================+======+=====================+==============+
434+
| /project/test.c| test.c | mit AND custom | custom.txt |
435+
+----------------+------+---------------------+--------------+
436+
437+
If the user does not utilize the **--fetch-license** option, the input will
438+
contain two license keys and one license file. In this scenario, the tool cannot
439+
determine which license key the license file is referencing. As a result, the
440+
license_file will be saved separately.
441+
442+
i.e.
443+
444+
.. code-block:: none
445+
446+
about_resource: test.c
447+
name: test.c
448+
license_expression: mit AND custom
449+
licenses:
450+
- key: mit
451+
name: mit
452+
- key: custom
453+
name: custom
454+
- file: custom.txt
455+
456+
On the other hand, if the user generates ABOUT files using the
457+
**--fetch-license** option, the MIT license will be retrieved. This will result
458+
in having one license key and one license file. In such cases, the tool will
459+
consider it a successful match.
460+
461+
i.e.
462+
463+
.. code-block:: none
464+
465+
about_resource: test.c
466+
name: test.c
467+
license_expression: mit AND custom
468+
licenses:
469+
- key: mit
470+
name: MIT License
471+
file: mit.LICENSE
472+
url: https://scancode-licensedb.aboutcode.org/mit.LICENSE
473+
spdx_license_key: MIT
474+
- key: custom
475+
name: custom
476+
file: custom.txt
477+
424478
gen_license
425479
===========
426480

@@ -780,3 +834,20 @@ version 32.0.0 or later. If you are using an earlier version of Scancode Toolkit
780834
specifically version 31 or older, it will only be compatible with prior versions
781835
of AboutCode Toolkit.
782836

837+
838+
Configure proxy
839+
---------------
840+
The `requests` library is used since AboutCode Toolkit version 10.1.0. To do the
841+
http request, users can set the standard environment variables **http_proxy**,
842+
**https_proxy**, **no_proxy**, **all_proxy** with the export statement
843+
844+
i.e.
845+
846+
.. code-block:: none
847+
848+
$ export HTTP_PROXY="http://10.10.1.10:3128"
849+
$ export HTTPS_PROXY="http://10.10.1.10:1080"
850+
$ export ALL_PROXY="socks5://10.10.1.10:3434"
851+
852+
See https://requests.readthedocs.io/en/latest/user/advanced/#proxies for
853+
references

src/attributecode/attrib.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -323,7 +323,7 @@ def generate_and_save(abouts, is_about_input, license_dict, output_location, sca
323323
)
324324

325325
if rendering_error:
326-
errors.extend(rendering_error)
326+
errors.append(rendering_error)
327327

328328
if rendered:
329329
output_location = add_unc(output_location)

src/attributecode/model.py

Lines changed: 90 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@
5555
from attributecode.util import csv
5656
from attributecode.util import file_fields
5757
from attributecode.util import filter_errors
58+
from attributecode.util import get_spdx_key_and_lic_key_from_licdb
5859
from attributecode.util import is_valid_name
5960
from attributecode.util import on_windows
6061
from attributecode.util import norm
@@ -802,6 +803,7 @@ def set_standard_fields(self):
802803
('license_name', ListField()),
803804
('license_file', FileTextField()),
804805
('license_url', UrlListField()),
806+
('spdx_license_expression', StringField()),
805807
('spdx_license_key', ListField()),
806808
('copyright', StringField()),
807809
('notice_file', FileTextField()),
@@ -1222,6 +1224,13 @@ def dumps(self, licenses_dict=None):
12221224
else:
12231225
if field.value:
12241226
data[field.name] = field.value
1227+
# If there is no license_key value, parse the license_expression
1228+
# and get the parsed license key
1229+
if 'license_expression' in data:
1230+
if not license_key and data['license_expression']:
1231+
_spec_char, lic_list = parse_license_expression(
1232+
data['license_expression'])
1233+
license_key = lic_list
12251234

12261235
# Group the same license information in a list
12271236
# This `licenses_dict` is a dictionary with license key as the key and the
@@ -1244,20 +1253,35 @@ def dumps(self, licenses_dict=None):
12441253
lic_dict['spdx_license_key'] = spdx_lic_key
12451254

12461255
# Remove the license information if it has been handled
1247-
lic_key_copy.remove(lic_key)
1248-
if lic_name in license_name:
1249-
license_name.remove(lic_name)
1250-
if lic_url in license_url:
1251-
license_url.remove(lic_url)
1252-
if lic_filename in license_file:
1253-
license_file.remove(lic_filename)
1254-
if spdx_lic_key in spdx_license_key:
1255-
spdx_license_key.remove(spdx_lic_key)
1256-
lic_dict_list.append(lic_dict)
1256+
# The following condition is to check if license information
1257+
# has been fetched, the license key is invalid or custom if
1258+
# no value for lic_name
1259+
if lic_name:
1260+
lic_key_copy.remove(lic_key)
1261+
if lic_name in license_name:
1262+
license_name.remove(lic_name)
1263+
if lic_url in license_url:
1264+
license_url.remove(lic_url)
1265+
if lic_filename in license_file:
1266+
license_file.remove(lic_filename)
1267+
if spdx_lic_key in spdx_license_key:
1268+
spdx_license_key.remove(spdx_lic_key)
1269+
lic_dict_list.append(lic_dict)
12571270

12581271
# Handle license information that have not been handled.
1259-
license_group = list(zip_longest(
1260-
lic_key_copy, license_name, license_file, license_url, spdx_license_key))
1272+
# If the len of the lic_key is the same as the lic_file, the tool should
1273+
# assume the lic_file (custom license) is referring this specific lic_key
1274+
# otherwise, the tool shouldn't group them
1275+
if len(lic_key_copy) == len(license_file):
1276+
license_group = list(zip_longest(
1277+
lic_key_copy, license_name, license_file, license_url, spdx_license_key))
1278+
else:
1279+
license_group = list(zip_longest(
1280+
lic_key_copy, license_name, [], license_url, spdx_license_key))
1281+
# Add the unhandled_lic_file if any
1282+
if license_file:
1283+
for lic_file in license_file:
1284+
license_group.append((None, None, lic_file, None, None))
12611285

12621286
for lic_group in license_group:
12631287
lic_dict = {}
@@ -1278,15 +1302,15 @@ def dumps(self, licenses_dict=None):
12781302
lic_dict_list.append(lic_dict)
12791303

12801304
# Format the license information in the same order of the license expression
1281-
if license_key:
1282-
for key in license_key:
1283-
for lic_dict in lic_dict_list:
1284-
if key == lic_dict['key']:
1285-
data.setdefault('licenses', []).append(lic_dict)
1286-
break
1287-
else:
1305+
for key in license_key:
12881306
for lic_dict in lic_dict_list:
1289-
data.setdefault('licenses', []).append(lic_dict)
1307+
if key == lic_dict['key']:
1308+
data.setdefault('licenses', []).append(lic_dict)
1309+
lic_dict_list.remove(lic_dict)
1310+
break
1311+
1312+
for lic_dict in lic_dict_list:
1313+
data.setdefault('licenses', []).append(lic_dict)
12901314

12911315
return saneyaml.dump(data)
12921316

@@ -1764,6 +1788,7 @@ def pre_process_and_fetch_license_dict(abouts, from_check=False, api_url=None, a
17641788
if errors:
17651789
return key_text_dict, errors
17661790

1791+
spdx_sclickey_dict = get_spdx_key_and_lic_key_from_licdb()
17671792
for about in abouts:
17681793
# No need to go through all the about objects if '--api_key' is invalid
17691794
auth_error = Error(
@@ -1779,6 +1804,27 @@ def pre_process_and_fetch_license_dict(abouts, from_check=False, api_url=None, a
17791804
about.license_expression.value = lic_exp
17801805
about.license_expression.present = True
17811806

1807+
if not about.license_expression.value and about.spdx_license_expression.value:
1808+
lic_exp_value = ""
1809+
special_char_in_expression, lic_list = parse_license_expression(
1810+
about.spdx_license_expression.value)
1811+
if special_char_in_expression:
1812+
msg = (about.about_file_path + u": The following character(s) cannot be in the spdx_license_expression: " +
1813+
str(special_char_in_expression))
1814+
errors.append(Error(ERROR, msg))
1815+
else:
1816+
spdx_lic_exp_segment = about.spdx_license_expression.value.split()
1817+
for spdx_lic_key in spdx_lic_exp_segment:
1818+
if lic_exp_value:
1819+
lic_exp_value = lic_exp_value + " " + convert_spdx_expression_to_lic_expression(
1820+
spdx_lic_key, spdx_sclickey_dict)
1821+
else:
1822+
lic_exp_value = convert_spdx_expression_to_lic_expression(
1823+
spdx_lic_key, spdx_sclickey_dict)
1824+
if lic_exp_value:
1825+
about.license_expression.value = lic_exp_value
1826+
about.license_expression.present = True
1827+
17821828
if about.license_expression.value:
17831829
special_char_in_expression, lic_list = parse_license_expression(
17841830
about.license_expression.value)
@@ -1855,6 +1901,30 @@ def pre_process_and_fetch_license_dict(abouts, from_check=False, api_url=None, a
18551901
return key_text_dict, errors
18561902

18571903

1904+
def convert_spdx_expression_to_lic_expression(spdx_key, spdx_lic_dict):
1905+
"""
1906+
Translate the spdx_license_expression to license_expression and return
1907+
errors if spdx_license_key is not matched
1908+
"""
1909+
value = ""
1910+
if spdx_key in spdx_lic_dict:
1911+
value = spdx_lic_dict[spdx_key]
1912+
else:
1913+
if spdx_key.startswith('('):
1914+
mod_key = spdx_key.partition('(')[2]
1915+
value = '(' + \
1916+
convert_spdx_expression_to_lic_expression(
1917+
mod_key, spdx_lic_dict)
1918+
elif spdx_key.endswith(')'):
1919+
mod_key = spdx_key.rpartition(')')[0]
1920+
value = convert_spdx_expression_to_lic_expression(
1921+
mod_key, spdx_lic_dict) + ')'
1922+
else:
1923+
# This can be operator or key that don't have match
1924+
value = spdx_key
1925+
return value
1926+
1927+
18581928
def parse_license_expression(lic_expression):
18591929
licensing = Licensing()
18601930
lic_list = []

src/attributecode/util.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,50 @@ def norm(p):
192192
return p
193193

194194

195+
def get_spdx_key_and_lic_key_from_licdb():
196+
"""
197+
Return a dictionary list that fetch all licenses from licenseDB. The
198+
"spdx_license_key" will be the key of the dictionary and the "license_key"
199+
will be the value of the directionary
200+
"""
201+
import requests
202+
lic_dict = dict()
203+
204+
# URL of the license index
205+
url = "https://scancode-licensedb.aboutcode.org/index.json"
206+
207+
"""
208+
Sample of one of the license in the index.json
209+
{
210+
"license_key": "bsd-new",
211+
"category": "Permissive",
212+
"spdx_license_key": "BSD-3-Clause",
213+
"other_spdx_license_keys": [
214+
"LicenseRef-scancode-libzip"
215+
],
216+
"is_exception": false,
217+
"is_deprecated": false,
218+
"json": "bsd-new.json",
219+
"yaml": "bsd-new.yml",
220+
"html": "bsd-new.html",
221+
"license": "bsd-new.LICENSE"
222+
},
223+
"""
224+
response = requests.get(url)
225+
# Check if the request was successful (status code 200)
226+
if response.status_code == 200:
227+
# Retrieve the JSON data from the response
228+
licenses_index = response.json()
229+
230+
for license in licenses_index:
231+
lic_dict[license['spdx_license_key']] = license['license_key']
232+
if license['other_spdx_license_keys']:
233+
for other_spdx in license['other_spdx_license_keys']:
234+
lic_dict[other_spdx] = license['license_key']
235+
236+
return lic_dict
237+
238+
195239
def get_relative_path(base_loc, full_loc):
196240
"""
197241
Return a posix path for a given full location relative to a base location.

0 commit comments

Comments
 (0)