Skip to content

Huawei Pipeline Added with Tests #1770

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions vulnerabilities/importers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
from vulnerabilities.pipelines import alpine_linux_importer
from vulnerabilities.pipelines import github_importer
from vulnerabilities.pipelines import gitlab_importer
from vulnerabilities.pipelines import huawei_bulletin_importer
from vulnerabilities.pipelines import nginx_importer
from vulnerabilities.pipelines import npm_importer
from vulnerabilities.pipelines import nvd_importer
Expand Down Expand Up @@ -78,6 +79,20 @@
ubuntu_usn.UbuntuUSNImporter,
fireeye.FireyeImporter,
oss_fuzz.OSSFuzzImporter,
ruby.RubyImporter,
github_osv.GithubOSVImporter,
curl.CurlImporter,
epss.EPSSImporter,
vulnrichment.VulnrichImporter,
pypa_importer.PyPaImporterPipeline,
npm_importer.NpmImporterPipeline,
nginx_importer.NginxImporterPipeline,
gitlab_importer.GitLabImporterPipeline,
github_importer.GitHubAPIImporterPipeline,
nvd_importer.NVDImporterPipeline,
pysec_importer.PyPIImporterPipeline,
alpine_linux_importer.AlpineLinuxImporterPipeline,
huawei_bulletin_importer.HuaweiBulletinImporterPipeline,
]

IMPORTERS_REGISTRY = {
Expand Down
187 changes: 187 additions & 0 deletions vulnerabilities/pipelines/huawei_bulletin_importer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
#
# Copyright (c) nexB Inc. and others. All rights reserved.
# VulnerableCode is a trademark of nexB Inc.
# SPDX-License-Identifier: Apache-2.0
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
# See https://github.com/aboutcode-org/vulnerablecode for support or download.
# See https://aboutcode.org for more information about nexB OSS projects.
#
import logging
from datetime import datetime
from datetime import timezone
from typing import Iterable
from typing import Tuple

import requests
from bs4 import BeautifulSoup
from packageurl import PackageURL
from univers.version_range import VersionRange
from univers.versions import GenericVersion

from vulnerabilities.importer import AdvisoryData
from vulnerabilities.importer import AffectedPackage
from vulnerabilities.importer import Reference
from vulnerabilities.importer import VulnerabilitySeverity
from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline
from vulnerabilities.severity_systems import GENERIC

logger = logging.getLogger(__name__)


def extract_version(version_str: str) -> Tuple[str, str]:
"""
Gets the version type and number from a messy Huawei version string.
Returns tuple of (type, version) - returns empty type if cant parse
"""
version_str = version_str.lower().strip()
if version_str.startswith("harmonyos"):
return "harmonyos", version_str.split("harmonyos")[-1].strip()
elif version_str.startswith("emui"):
return "emui", version_str.split("emui")[-1].strip()
return "", version_str


class HuaweiBulletinImporterPipeline(VulnerableCodeBaseImporterPipeline):
"""
Scrapes security bullitin data from Huawei's website:
https://consumer.huawei.com/en/support/bulletin/
"""

pipeline_id = "huawei_bulletin_importer"
spdx_license_expression = "LicenseRef-Terms-Of-Use"
license_url = "https://consumer.huawei.com/en/legal/terms-of-use/"
Comment on lines +51 to +52
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not the license for the security bulletin. We should leave this empty for now and reach out to them later to clarify the license for the security advisory.

url = "https://consumer.huawei.com/en/support/bulletin/"
importer_name = "Huawei Bulletin Importer"

def __init__(self):
super().__init__()
self.raw_data = None
Comment on lines +56 to +58
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is unnecessary.


@classmethod
def steps(cls):
"""The steps we need to run in order"""
return (
cls.fetch_bulletin,
cls.collect_and_store_advisories,
cls.import_new_advisories,
)

def fetch_bulletin(self):
"""
Gets the bullitin data from Huawei's site.
Stores raw html for processing later.
"""
self.log(f"Fetching {self.url}")
try:
response = requests.get(f"{self.url}2024/9/")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It does not make sense to collect advisories only from Sep-2024, we should collect all the security bulletins from https://consumer.huawei.com/en/support/bulletin/.

response.raise_for_status()
self.raw_data = BeautifulSoup(response.text, "html.parser")
except Exception as e:
self.log(f"Failed to fetch Huawei bulletin: {e}", logging.ERROR)
raise

def advisories_count(self) -> int:
"""
Counts how many advisorys we found.
Returns 0 if something went wrong.
"""
if not self.raw_data:
return 0

tables = self.raw_data.find_all("table")
if not tables:
return 0
huawei_rows = len(tables[0].find_all("tr")[1:])
thirdparty_rows = len(tables[1].find_all("tr")[1:])
return huawei_rows + thirdparty_rows

def collect_advisories(self) -> Iterable[AdvisoryData]:
"""
Parse the bullitin and extract all the advisorys.
Returns empty list if something goes wrong.
"""
if not self.raw_data:
return []

tables = self.raw_data.find_all("table")
if len(tables) < 2:
return []

for row in tables[0].find_all("tr")[1:]:
cols = row.find_all("td")
if len(cols) != 5:
continue
advisory = {
"cve_id": cols[0].text.strip(),
"description": cols[1].text.strip(),
"impact": cols[2].text.strip(),
"severity": cols[3].text.strip(),
"affected_versions": cols[4].text.strip(),
"is_huawei": True,
}
advisory_data = self.to_advisory_data(advisory)
if advisory_data:
yield advisory_data
for row in tables[1].find_all("tr")[1:]:
cols = row.find_all("td")
if len(cols) != 3:
continue

advisory = {
"cve_id": cols[0].text.strip(),
"severity": cols[1].text.strip(),
"affected_versions": cols[2].text.strip(),
"is_huawei": False,
}
advisory_data = self.to_advisory_data(advisory)
if advisory_data:
yield advisory_data

def to_advisory_data(self, data) -> AdvisoryData:
"""
Takes raw data and makes it into propper advisory format.
Returns None if theres any problems.
"""
try:
if not data.get("cve_id"):
return None
affected_packages = []
versions = [v.strip() for v in data["affected_versions"].split(",") if v.strip()]

for version in versions:
system_type, version_number = extract_version(version)
if not system_type:
continue
affected_packages.append(
AffectedPackage(
package=PackageURL(type="huawei", name=system_type),
affected_version_range=VersionRange.from_string(
f"vers:generic/={version_number}"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead, we should add the huawei VersionRange in univers and use that here.

),
)
)
if not affected_packages:
return None
severity = VulnerabilitySeverity(system=GENERIC, value=data["severity"].lower())
references = [
Reference(
reference_id=data["cve_id"],
url=f"https://nvd.nist.gov/vuln/detail/{data['cve_id']}",
severities=[severity],
Copy link
Member

@keshav-space keshav-space Apr 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is completely wrong. This severity score is coming from huawei. why would you add NVD url to it?

)
]
if data.get("is_huawei"):
summary = "\n".join(filter(None, [data.get("description"), data.get("impact")]))
else:
summary = f"Third-party vulnerability affecting {''.join(data['affected_versions'].split())}"
return AdvisoryData(
summary=summary if summary else f"Security update for {data['cve_id']}",
aliases=[data["cve_id"]],
affected_packages=affected_packages,
references=references,
date_published=None,
url=f"{self.url}2024/9/",
)
except Exception as e:
self.log(f"Failed to process advisory {data.get('cve_id')}: {e}", logging.ERROR)
return None
Loading