Skip to content

Commit 6e70949

Browse files
authored
Merge pull request #75 from scanoss/bug/mdaloia/fix-raw-scan-results-json-parsing
Fix parsing issues with file names with backslashes on windows
2 parents 89b688d + 0acfc81 commit 6e70949

File tree

4 files changed

+73
-52
lines changed

4 files changed

+73
-52
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
88
## [Unreleased]
99
### Added
1010
- Upcoming changes...
11+
12+
## [1.17.4] - 2024-11-08
13+
### Fixed
14+
- Fix backslashes in file paths on Windows
1115

1216
## [1.17.3] - 2024-11-05
1317
### Fixed

src/scanoss/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,4 +22,4 @@
2222
THE SOFTWARE.
2323
"""
2424

25-
__version__ = "1.17.3"
25+
__version__ = "1.17.4"

src/scanoss/scanner.py

Lines changed: 59 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import os
2626
import sys
2727
import datetime
28+
from typing import Any, Dict, List, Optional
2829
import importlib_resources
2930

3031
from progress.bar import Bar
@@ -490,66 +491,41 @@ def __run_scan_threaded(self, scan_started: bool, file_count: int) -> bool:
490491
success = False
491492
return success
492493

493-
def __finish_scan_threaded(self, file_map: dict = None) -> bool:
494-
"""
495-
Wait for the threaded scans to complete
496-
:param file_map: mapping of obfuscated files back into originals
497-
:return: True if successful, False otherwise
494+
def __finish_scan_threaded(self, file_map: Optional[Dict[Any, Any]] = None) -> bool:
495+
"""Wait for the threaded scan to complete and process the results
496+
497+
Args:
498+
file_map: Mapping of obfuscated files back to originals
499+
500+
Returns:
501+
bool: True if successful, False otherwise
502+
503+
Raises:
504+
ValueError: If output format is invalid
498505
"""
499-
success = True
500-
responses = None
506+
success: bool = True
507+
scan_responses = None
501508
dep_responses = None
502509
if self.is_file_or_snippet_scan():
503510
if not self.threaded_scan.complete(): # Wait for the scans to complete
504511
self.print_stderr(f'Warning: Scanning analysis ran into some trouble.')
505512
success = False
506513
self.threaded_scan.complete_bar()
507-
responses = self.threaded_scan.responses
514+
scan_responses = self.threaded_scan.responses
508515
if self.is_dependency_scan():
509516
self.print_msg('Retrieving dependency data...')
510517
if not self.threaded_deps.complete():
511-
self.print_stderr(f'Warning: Dependency analysis ran into some trouble.')
518+
self.print_stderr(
519+
f'Warning: Dependency analysis ran into some trouble.'
520+
)
512521
success = False
513522
dep_responses = self.threaded_deps.responses
514-
# self.print_stderr(f'Dep Data: {dep_responses}')
515-
# TODO change to dictionary
516-
raw_output = "{\n"
517-
# TODO look into merging the two dictionaries. See https://favtutor.com/blogs/merge-dictionaries-python
518-
if responses or dep_responses:
519-
first = True
520-
if responses:
521-
for scan_resp in responses:
522-
if scan_resp is not None:
523-
for key, value in scan_resp.items():
524-
if file_map: # We have a map for obfuscated files. Check if we can revert it
525-
fm = file_map.get(key)
526-
if fm:
527-
key = fm # Replace the obfuscated filename
528-
if first:
529-
raw_output += " \"%s\":%s" % (key, json.dumps(value, indent=2))
530-
first = False
531-
else:
532-
raw_output += ",\n \"%s\":%s" % (key, json.dumps(value, indent=2))
533-
# End for loop
534-
if dep_responses:
535-
dep_files = dep_responses.get("files")
536-
if dep_files and len(dep_files) > 0:
537-
for dep_file in dep_files:
538-
file = dep_file.pop("file", None)
539-
if file is not None:
540-
if first:
541-
raw_output += " \"%s\":[%s]" % (file, json.dumps(dep_file, indent=2))
542-
first = False
543-
else:
544-
raw_output += ",\n \"%s\":[%s]" % (file, json.dumps(dep_file, indent=2))
545-
# End for loop
546-
raw_output += "\n}"
547-
try:
548-
raw_results = json.loads(raw_output)
549-
except Exception as e:
550-
raise Exception(f'ERROR: Problem decoding parsed json: {e}')
551523

552-
results = self.post_processor.load_results(raw_results).post_process()
524+
raw_scan_results = self._merge_scan_results(
525+
scan_responses, dep_responses, file_map
526+
)
527+
528+
results = self.post_processor.load_results(raw_scan_results).post_process()
553529

554530
if self.output_format == 'plain':
555531
self.__log_result(json.dumps(results, indent=2, sort_keys=True))
@@ -567,6 +543,42 @@ def __finish_scan_threaded(self, file_map: dict = None) -> bool:
567543
success = False
568544
return success
569545

546+
def _merge_scan_results(
547+
self,
548+
scan_responses: Optional[List],
549+
dep_responses: Optional[Dict[str,Any]],
550+
file_map: Optional[Dict[str, Any]],
551+
) -> Dict[str, Any]:
552+
"""Merge scan and dependency responses into a single dictionary"""
553+
results: Dict[str, Any] = {}
554+
555+
if scan_responses:
556+
for response in scan_responses:
557+
if response is not None:
558+
if file_map:
559+
response = self._deobfuscate_filenames(response, file_map)
560+
results.update(response)
561+
562+
dep_files = dep_responses.get("files", None) if dep_responses else None
563+
if dep_files:
564+
for dep_file in dep_files:
565+
file = dep_file.pop("file", None)
566+
if file:
567+
results[file] = dep_file
568+
569+
return results
570+
571+
def _deobfuscate_filenames(self, response: dict, file_map: dict) -> dict:
572+
"""Convert obfuscated filenames back to original names"""
573+
deobfuscated = {}
574+
for key, value in response.items():
575+
deobfuscated_name = file_map.get(key, None)
576+
if deobfuscated_name:
577+
deobfuscated[deobfuscated_name] = value
578+
else:
579+
deobfuscated[key] = value
580+
return deobfuscated
581+
570582
def scan_file_with_options(self, file: str, deps_file: str = None, file_map: dict = None, dep_scope: SCOPE = None,
571583
dep_scope_include: str = None, dep_scope_exclude: str = None) -> bool:
572584
"""

src/scanoss/winnowing.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
"""
3030
import hashlib
3131
import pathlib
32+
import platform
3233
import re
3334

3435
from crc32c import crc32c
@@ -307,11 +308,15 @@ def wfp_for_contents(self, file: str, bin_file: bool, contents: bytes) -> str:
307308
return ''
308309
# Print file line
309310
content_length = len(contents)
310-
wfp_filename = repr(file).strip("'") # return a utf-8 compatible version of the filename
311+
original_filename = file
312+
313+
if platform.system() == 'Windows':
314+
original_filename = file.replace('\\', '/')
315+
wfp_filename = repr(original_filename).strip("'") # return a utf-8 compatible version of the filename
311316
if self.obfuscate: # hide the real size of the file and its name, but keep the suffix
312-
wfp_filename = f'{self.ob_count}{pathlib.Path(file).suffix}'
317+
wfp_filename = f'{self.ob_count}{pathlib.Path(original_filename).suffix}'
313318
self.ob_count = self.ob_count + 1
314-
self.file_map[wfp_filename] = file # Save the file name map for later (reverse lookup)
319+
self.file_map[wfp_filename] = original_filename # Save the file name map for later (reverse lookup)
315320

316321
wfp = 'file={0},{1},{2}\n'.format(file_md5, content_length, wfp_filename)
317322
# We don't process snippets for binaries, or other uninteresting files, or if we're requested to skip
@@ -464,7 +469,7 @@ def crc8_buffer(self, buffer):
464469
crc = self.crc8_byte(crc, buffer[index])
465470
crc ^= CRC8_MAXIM_DOW_FINAL # Bitwise OR (XOR) of crc in Maxim Dow Final
466471
return crc
467-
472+
468473
#
469474
# End of Winnowing Class
470475
#

0 commit comments

Comments
 (0)