19
19
import os
20
20
from typing import Any , Dict , Optional
21
21
22
+ import jellyfish
23
+ import requests # type: ignore[import-untyped]
24
+
22
25
from ros_license_toolkit .checks import Check , Status
23
26
from ros_license_toolkit .common import get_spdx_license_name
24
27
from ros_license_toolkit .license_tag import LicenseTag , is_license_name_in_spdx_list
25
28
from ros_license_toolkit .package import Package
26
29
from ros_license_toolkit .ui_elements import red
27
30
31
+ # Value for minimal percentage between license texts for them to be accepted
32
+ SIMILARITY_THRESHOLD = 90 # in percent
33
+
28
34
29
35
class LicenseTextExistsCheck (Check ):
30
36
"""This ensures that the license text file referenced by the tag exists."""
@@ -85,30 +91,43 @@ def _check_licenses(self, package: Package) -> None:
85
91
)
86
92
self .missing_license_texts_status [license_tag ] = Status .FAILURE
87
93
continue
94
+
88
95
if actual_license != license_tag .get_license_id ():
89
- self .license_tags_without_license_text [license_tag ] = (
90
- f"License text file '{ license_text_file } ' is "
91
- + f"of license { actual_license } but tag is "
92
- + f"{ license_tag .get_license_id ()} ."
93
- )
94
- # If Tag and File both are in SPDX but don't match -> Error
95
- if is_license_name_in_spdx_list (license_tag .get_license_id ()):
96
- self .missing_license_texts_status [license_tag ] = Status .FAILURE
97
- else :
98
- self .missing_license_texts_status [license_tag ] = Status .WARNING
99
- self .files_with_wrong_tags [license_tag ] = {
100
- "actual_license" : actual_license ,
101
- "license_tag" : license_tag .get_license_id (),
102
- }
103
- continue
96
+ if license_tag .has_license_text_file ():
97
+ license_file_for_tag = (
98
+ package .abspath + "/" + license_tag .get_license_text_file ()
99
+ )
100
+ with open (license_file_for_tag , "r" , encoding = "utf-8" ) as f :
101
+ content = f .read ()
102
+ similarity_of_texts = self .compare_text_with_spdx_text (license_tag , content )
103
+
104
+ # IDEA: if accepted, add the tag to the package.found_license_texts, since scanning
105
+ # has failed to do so. Also solves problem of license_file_referenced check
106
+
107
+ # if similarity couldn't be determined or is too low --> fail, else success
108
+ if similarity_of_texts is None or similarity_of_texts < SIMILARITY_THRESHOLD :
109
+ self .license_tags_without_license_text [license_tag ] = (
110
+ f"License text file '{ license_text_file } ' is "
111
+ + f"of license { actual_license } but tag is "
112
+ + f"{ license_tag .get_license_id ()} ."
113
+ )
114
+ # If Tag and File both are in SPDX but don't match -> Error
115
+ if is_license_name_in_spdx_list (license_tag .get_license_id ()):
116
+ self .missing_license_texts_status [license_tag ] = Status .FAILURE
117
+ else :
118
+ self .missing_license_texts_status [license_tag ] = Status .WARNING
119
+ self .files_with_wrong_tags [license_tag ] = {
120
+ "actual_license" : actual_license ,
121
+ "license_tag" : license_tag .get_license_id (),
122
+ }
123
+ continue
104
124
105
125
def _evaluate_results (self ):
106
126
if len (self .license_tags_without_license_text ) > 0 :
107
127
if max (self .missing_license_texts_status .values ()) == Status .WARNING :
108
128
self ._warning (
109
- "Since they are not in the SPDX list, "
110
- "we can not check if these tags have the correct "
111
- "license text:\n "
129
+ "Since they are not in the SPDX list, we can not check if these tags have the"
130
+ " correct license text:\n "
112
131
+ "\n " .join (
113
132
[
114
133
f" '{ x [0 ]} ': { x [1 ]} "
@@ -118,18 +137,47 @@ def _evaluate_results(self):
118
137
)
119
138
else :
120
139
self ._failed (
121
- "The following license tags do not "
122
- "have a valid license text "
123
- "file:\n "
140
+ "The following license tags do not have a valid license text file:\n "
124
141
+ "\n " .join (
125
142
[
126
143
f" '{ x [0 ]} ': { x [1 ]} "
127
144
for x in self .license_tags_without_license_text .items ()
128
145
]
129
146
)
130
147
)
131
- self .verbose_output = red (
148
+ self .verbose_output = red ( # pylint: disable=attribute-defined-outside-init
132
149
"\n " .join ([f" '{ x [0 ]} ': { x [1 ]} " for x in self .found_license_texts .items ()])
133
150
)
134
151
else :
135
152
self ._success ("All license tags have a valid license text file." )
153
+
154
+ def compare_text_with_spdx_text (self , tag , found_lic_text ):
155
+ """Get similarity percent between original license text (from spdx api) and given license
156
+ text."""
157
+ cache_dir : str = os .path .expanduser ("~/.cache/ros_license_toolkit" )
158
+ os .makedirs (cache_dir , exist_ok = True )
159
+ license_file = os .path .join (cache_dir , f"license_{ tag } .txt" )
160
+
161
+ if not os .path .exists (license_file ):
162
+ url = f"https://spdx.org/licenses/{ tag } .json"
163
+ response = requests .get (url , timeout = 100 )
164
+ if response is not None and response .status_code == 200 :
165
+ parsed_response = response .json ()
166
+ original_text = parsed_response ["licenseText" ]
167
+ with open (license_file , "w" , encoding = "utf-8" ) as f :
168
+ f .write (original_text )
169
+ else :
170
+ return None
171
+ else :
172
+ with open (license_file , "r" , encoding = "utf-8" ) as f :
173
+ original_text = f .read ()
174
+ difference = self .get_similarity_percent (original_text , found_lic_text )
175
+ return difference
176
+
177
+ def get_similarity_percent (self , text1 , text2 ):
178
+ """Levenshtein distance based similarity percent of text1 and text2, regularized to longer
179
+ text for percent value."""
180
+ lev_dis = float (jellyfish .levenshtein_distance (text1 , text2 ))
181
+ bigger = float (max (len (text1 ), len (text2 )))
182
+ similarity_percentage = round (100 * (bigger - lev_dis ) / bigger , 2 )
183
+ return similarity_percentage
0 commit comments