Skip to content

Commit

Permalink
First refactor for multiple backend (part of #213) (#233)
Browse files Browse the repository at this point in the history
* [CveXplore-231] added stub file for dynamically generated attributes in collection classes

* [CveXplore-231] updated readme with new init parameter

* [CveXplore-231] make data source configurable via config

* [CveXplore-231] altered generic attribute; should be fixed in cve-search/cve-search#1038 as well on new release

* [CveXplore-231] all collections moved to specific_db class

* [CveXplore-231] moved post tasks to separate calls

* [CveXplore-231] minor

* [CveXplore-231] minor

* [CveXplore-231] make data source configurable via config
  • Loading branch information
P-T-I authored Dec 19, 2023
1 parent d635c19 commit bfaa247
Show file tree
Hide file tree
Showing 16 changed files with 218 additions and 90 deletions.
2 changes: 1 addition & 1 deletion CveXplore/VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.3.20.dev5
0.3.20.dev6
11 changes: 11 additions & 0 deletions CveXplore/common/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,17 @@ class Configuration(object):

CPE_FILTER_DEPRECATED = getenv_bool("CPE_FILTER_DEPRECATED", "True")

DATASOURCE = os.getenv("DATASOURCE", "mongodb")

DATASOURCE_PROTOCOL = os.getenv("DATASOURCE_PROTOCOL", "mongodb")
DATASOURCE_HOST = os.getenv(
"DATASOURCE_HOST", os.getenv("MONGODB_HOST", "127.0.0.1")
)
DATASOURCE_PORT = int(
os.getenv("DATASOURCE_PORT", int(os.getenv("MONGODB_PORT", 27017)))
)

# keep these for now to maintain backwards compatibility
MONGODB_HOST = os.getenv("MONGODB_HOST", "127.0.0.1")
MONGODB_PORT = int(os.getenv("MONGODB_PORT", 27017))

Expand Down
19 changes: 14 additions & 5 deletions CveXplore/common/data_source_connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import os

from CveXplore.api.connection.api_db import ApiDatabaseSource
from CveXplore.database.connection.database_connection import DatabaseConnection
from CveXplore.database.connection.mongo_db import MongoDBConnection
from CveXplore.objects.cvexplore_object import CveXploreObject

Expand All @@ -18,11 +19,19 @@ class DatasourceConnection(CveXploreObject):

# hack for documentation building
if json.loads(os.getenv("DOC_BUILD"))["DOC_BUILD"] != "YES":
__DATA_SOURCE_CONNECTION = (
ApiDatabaseSource(**json.loads(os.getenv("API_CON_DETAILS")))
if os.getenv("API_CON_DETAILS")
else MongoDBConnection(**json.loads(os.getenv("MONGODB_CON_DETAILS")))
)
try:
__DATA_SOURCE_CONNECTION = (
ApiDatabaseSource(**json.loads(os.getenv("API_CON_DETAILS")))
if os.getenv("API_CON_DETAILS")
else MongoDBConnection(**json.loads(os.getenv("MONGODB_CON_DETAILS")))
)
except TypeError:
__DATA_SOURCE_CONNECTION = DatabaseConnection(
database_type=os.getenv("DATASOURCE_TYPE"),
database_init_parameters=json.loads(
os.getenv("DATASOURCE_CON_DETAILS")
),
).database_connection

def to_dict(self, *print_keys: str) -> dict:
"""
Expand Down
7 changes: 2 additions & 5 deletions CveXplore/core/database_indexer/db_indexer.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,9 @@
import json
import os
from collections import namedtuple

from pymongo import TEXT, ASCENDING

from CveXplore.core.database_maintenance.update_base_class import UpdateBaseClass
from CveXplore.core.general.utils import sanitize
from CveXplore.database.connection.mongo_db import MongoDBConnection

MongoUniqueIndex = namedtuple("MongoUniqueIndex", "index name unique")
MongoAddIndex = namedtuple("MongoAddIndex", "index name")
Expand All @@ -17,10 +14,10 @@ class DatabaseIndexer(UpdateBaseClass):
Class processing the Mongodb indexes
"""

def __init__(self):
def __init__(self, datasource):
super().__init__(__name__)

database = MongoDBConnection(**json.loads(os.getenv("MONGODB_CON_DETAILS")))
database = datasource
self.database = database._dbclient

self.indexes = {
Expand Down
10 changes: 8 additions & 2 deletions CveXplore/core/database_maintenance/download_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,9 @@
from CveXplore.common.config import Configuration
from CveXplore.core.general.utils import sanitize
from CveXplore.core.worker_queue.worker_q import WorkerQueue
from CveXplore.database.connection.mongo_db import MongoDBConnection
from ..database_indexer.db_indexer import DatabaseIndexer
from ..logging.logger_class import AppLogger
from ...database.connection.database_connection import DatabaseConnection

thread_local = threading.local()
logging.setLoggerClass(AppLogger)
Expand Down Expand Up @@ -63,10 +64,15 @@ def __init__(self, feed_type: str, logger_name: str, prefix: str = None):

self.do_process = True

database = MongoDBConnection(**json.loads(os.getenv("MONGODB_CON_DETAILS")))
database = DatabaseConnection(
database_type=os.getenv("DATASOURCE_TYPE"),
database_init_parameters=json.loads(os.getenv("DATASOURCE_CON_DETAILS")),
).database_connection

self.database = database._dbclient

self.database_indexer = DatabaseIndexer(datasource=database)

self.config = Configuration()

self.logger = logging.getLogger(logger_name)
Expand Down
24 changes: 10 additions & 14 deletions CveXplore/core/database_maintenance/main_updater.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,8 @@ def __init__(self, datasource):
{"name": "epss", "updater": EPSSDownloads},
]

self.posts = [
{"name": "ensureindex", "updater": DatabaseIndexer},
{"name": "schema", "updater": SchemaChecker},
]

self.schema_checker = SchemaChecker()
self.database_indexer = DatabaseIndexer(datasource=datasource)
self.schema_checker = SchemaChecker(datasource=datasource)

def validate_schema(self):
return self.schema_checker.validate_schema()
Expand Down Expand Up @@ -98,10 +94,10 @@ def update(self, update_source: str | list = None):
)
except UpdateSourceNotFound:
raise
else:
for post in self.posts:
indexer = post["updater"]()
indexer.create_indexes()

self.database_indexer.create_indexes()

self.schema_checker.update()

self.datasource.set_handlers_for_collections()

Expand Down Expand Up @@ -153,10 +149,10 @@ def populate(self, populate_source: str | list = None):
)
except UpdateSourceNotFound:
raise
else:
for post in self.posts:
indexer = post["updater"]()
indexer.create_indexes()

self.database_indexer.create_indexes()

self.schema_checker.update()

self.datasource.set_handlers_for_collections()

Expand Down
11 changes: 6 additions & 5 deletions CveXplore/core/database_maintenance/sources_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
from tqdm import tqdm

from CveXplore.core.database_actions.db_action import DatabaseAction
from CveXplore.core.database_indexer.db_indexer import DatabaseIndexer
from CveXplore.core.database_maintenance.api_handlers import NVDApiHandler
from CveXplore.core.database_maintenance.content_handlers import (
CapecHandler,
Expand Down Expand Up @@ -263,7 +262,7 @@ def update(self, **kwargs):

# if collection is non-existent; assume it's not an update
if self.feed_type.lower() not in self.getTableNames():
DatabaseIndexer().create_indexes(collection=self.feed_type.lower())
self.database_indexer.create_indexes(collection=self.feed_type.lower())
self.is_update = False

self.logger.info("Finished CPE database update")
Expand All @@ -283,7 +282,7 @@ def populate(self, **kwargs):

self.process_downloads()

DatabaseIndexer().create_indexes(collection=self.feed_type.lower())
self.database_indexer.create_indexes(collection=self.feed_type.lower())

self.logger.info("Finished CPE database population")

Expand Down Expand Up @@ -820,7 +819,7 @@ def update(self):

# if collection is non-existent; assume it's not an update
if self.feed_type.lower() not in self.getTableNames():
DatabaseIndexer().create_indexes(collection=self.feed_type.lower())
self.database_indexer.create_indexes(collection=self.feed_type.lower())
self.is_update = False

self.logger.info("Finished CVE database update")
Expand All @@ -844,7 +843,7 @@ def populate(self):

self.process_downloads()

DatabaseIndexer().create_indexes(collection=self.feed_type.lower())
self.database_indexer.create_indexes(collection=self.feed_type.lower())

self.logger.info("Finished CVE database population")

Expand Down Expand Up @@ -1016,6 +1015,8 @@ def file_to_queue(self, file_tuple: Tuple[str, str]):
for cwe in self.ch.cwe:
try:
cwe["related_weaknesses"] = list(set(cwe["related_weaknesses"]))
cwe["description"] = cwe["Description"]
cwe.pop("Description")
except KeyError:
pass
self.process_item(cwe)
Expand Down
13 changes: 5 additions & 8 deletions CveXplore/core/database_schema/db_schema_checker.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,18 @@
import os

from CveXplore.core.database_maintenance.update_base_class import UpdateBaseClass
from CveXplore.database.connection.mongo_db import MongoDBConnection
from CveXplore.errors import DatabaseSchemaError

runPath = os.path.dirname(os.path.realpath(__file__))


class SchemaChecker(UpdateBaseClass):
def __init__(self):
def __init__(self, datasource):
super().__init__(__name__)
with open(os.path.join(runPath, "../../.schema_version")) as f:
self.schema_version = json.loads(f.read())

database = MongoDBConnection(**json.loads(os.getenv("MONGODB_CON_DETAILS")))
database = datasource

self.dbh = database._dbclient["schema"]

Expand All @@ -40,13 +39,9 @@ def validate_schema(self):
"Database schema is not up to date; please re-populate the database!"
)

def create_indexes(self):
# hack for db_updater.py to put this class in the posts variable and run the update method
def update(self):
self.logger.info("Updating schema version")
self.update()
self.logger.info("Update schema version done!")

def update(self):
try:
current_record = list(self.dbh.find({}))

Expand All @@ -72,3 +67,5 @@ def update(self):
"rebuild_needed": self.schema_version["rebuild_needed"],
}
self.dbh.insert_one(current_record)

self.logger.info("Update schema version done!")
6 changes: 5 additions & 1 deletion CveXplore/database/connection/database_connection.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from CveXplore.api.connection.api_db import ApiDatabaseSource
from CveXplore.database.connection.base.db_connection_base import DatabaseConnectionBase
from CveXplore.database.connection.mongo_db import MongoDBConnection

Expand All @@ -7,7 +8,10 @@ def __init__(self, database_type: str, database_init_parameters: dict):
self.database_type = database_type
self.database_init_parameters = database_init_parameters

self._database_connnections = {"mongodb": MongoDBConnection}
self._database_connnections = {
"mongodb": MongoDBConnection,
"api": ApiDatabaseSource,
}

self._database_connection = self._database_connnections[self.database_type](
**self.database_init_parameters
Expand Down
4 changes: 2 additions & 2 deletions CveXplore/database/helpers/generic_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def __init__(self, collection: str):
"typical_severity",
],
"cpe": ["title", "cpeName", "vendor", "product", "stem"],
"cwe": ["name", "status", "Description"],
"cwe": ["name", "status", "description"],
"cves": [
"cvss",
"cvss3",
Expand Down Expand Up @@ -139,7 +139,7 @@ def mapped_fields(self, collection: str) -> list:

def __repr__(self):
"""String representation of object"""
return f"<< GenericDatabaseFactory:{self._collection} >>"
return f"<< {self.__class__.__name__}:{self._collection} >>"


class GenericDatabaseFieldsFunctions(DatasourceConnection):
Expand Down
26 changes: 19 additions & 7 deletions CveXplore/database/helpers/specific_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,10 +94,6 @@ def _field_list(self, doc_id: str) -> list:
)
)

def __repr__(self):
"""String representation of object"""
return f"<< CvesDatabaseFunctions:{self._collection} >>"


class CpeDatabaseFunctions(GenericDatabaseFactory):
"""
Expand Down Expand Up @@ -150,6 +146,22 @@ def find_active_cpes(
else:
return None

def __repr__(self):
"""String representation of object"""
return f"<< CpeDatabaseFunctions:{self._collection} >>"

class CapecDatabaseFunctions(GenericDatabaseFactory):
"""
The CapecDatabaseFunctions is a specific class that provides the capec attribute of a CveXplore instance additional
functions that only apply to the 'capec' collection
"""

def __init__(self, collection: str):
super().__init__(collection)


class CWEDatabaseFunctions(GenericDatabaseFactory):
"""
The CWEDatabaseFunctions is a specific class that provides the cwe attribute of a CveXplore instance additional
functions that only apply to the 'cwe' collection
"""

def __init__(self, collection: str):
super().__init__(collection)
45 changes: 45 additions & 0 deletions CveXplore/database/helpers/specific_db.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
from CveXplore.database.helpers.generic_db import GenericDatabaseFieldsFunctions

class CvesDatabaseFunctions:
id: GenericDatabaseFieldsFunctions
cvss: GenericDatabaseFieldsFunctions
cvss3: GenericDatabaseFieldsFunctions
summary: GenericDatabaseFieldsFunctions
vendors: GenericDatabaseFieldsFunctions
products: GenericDatabaseFieldsFunctions
lastModified: GenericDatabaseFieldsFunctions
modified: GenericDatabaseFieldsFunctions
published: GenericDatabaseFieldsFunctions
status: GenericDatabaseFieldsFunctions
assigner: GenericDatabaseFieldsFunctions
cwe: GenericDatabaseFieldsFunctions
epss: GenericDatabaseFieldsFunctions

def __init__(self, collection: str): ...

class CpeDatabaseFunctions:
id: GenericDatabaseFieldsFunctions
title: GenericDatabaseFieldsFunctions
cpeName: GenericDatabaseFieldsFunctions
vendor: GenericDatabaseFieldsFunctions
product: GenericDatabaseFieldsFunctions
stem: GenericDatabaseFieldsFunctions

def __init__(self, collection: str): ...

class CapecDatabaseFunctions:
name: GenericDatabaseFieldsFunctions
summary: GenericDatabaseFieldsFunctions
prerequisites: GenericDatabaseFieldsFunctions
solutions: GenericDatabaseFieldsFunctions
loa: GenericDatabaseFieldsFunctions
typical_severity: GenericDatabaseFieldsFunctions

def __init__(self, collection: str): ...

class CWEDatabaseFunctions:
name: GenericDatabaseFieldsFunctions
status: GenericDatabaseFieldsFunctions
description: GenericDatabaseFieldsFunctions

def __init__(self, collection: str): ...
Loading

0 comments on commit bfaa247

Please sign in to comment.