4
4
import tempfile
5
5
import xml .etree .ElementTree as ET
6
6
import zipfile
7
- from dataclasses import InitVar , dataclass , field
8
7
from pathlib import Path
9
- from typing import Any , ClassVar , Dict , Iterator , List , Set , Tuple , Union , cast
8
+ from typing import Any , ClassVar , Dict , Iterator , List , Optional , Set , Tuple , Union , cast
10
9
11
10
import pandas as pd
12
11
19
18
logger = logging .getLogger (__name__ )
20
19
21
20
22
- @dataclass
23
21
class CPEDataset (ComplexSerializableType ):
24
22
"""
25
23
Dataset of CPE records. Includes look-up dictionaries for fast search.
26
24
"""
27
25
28
- was_enhanced_with_vuln_cpes : bool
29
- json_path : Path
30
- cpes : Dict [str , CPE ]
31
- vendor_to_versions : Dict [str , Set [str ]] = field (
32
- init = False , default_factory = dict
33
- ) # Look-up dict cpe_vendor: list of viable versions
34
- vendor_version_to_cpe : Dict [Tuple [str , str ], Set [CPE ]] = field (
35
- init = False , default_factory = dict
36
- ) # Look-up dict (cpe_vendor, cpe_version): List of viable cpe items
37
- title_to_cpes : Dict [str , Set [CPE ]] = field (
38
- init = False , default_factory = dict
39
- ) # Look-up dict title: List of cert items
40
- vendors : Set [str ] = field (init = False , default_factory = set )
41
-
42
- init_lookup_dicts : InitVar [bool ] = True
43
- cpe_xml_basename : ClassVar [str ] = "official-cpe-dictionary_v2.3.xml"
44
- cpe_url : ClassVar [str ] = "https://nvd.nist.gov/feeds/xml/cpe/dictionary/" + cpe_xml_basename + ".zip"
26
+ CPE_XML_BASENAME : ClassVar [str ] = "official-cpe-dictionary_v2.3.xml"
27
+ CPE_URL : ClassVar [str ] = "https://nvd.nist.gov/feeds/xml/cpe/dictionary/" + CPE_XML_BASENAME + ".zip"
28
+
29
+ def __init__ (
30
+ self ,
31
+ was_enhanced_with_vuln_cpes : bool ,
32
+ cpes : Dict [str , CPE ],
33
+ json_path : Optional [Union [str , Path ]] = None ,
34
+ ):
35
+ self .was_enhanced_with_vuln_cpes = was_enhanced_with_vuln_cpes
36
+ self .cpes = cpes
37
+ self ._json_path = Path (json_path ) if json_path else Path .cwd () / (type (self ).__name__ ).lower ()
38
+
39
+ self .vendor_to_versions : Dict [str , Set [str ]] = dict ()
40
+ self .vendor_version_to_cpe : Dict [Tuple [str , str ], Set [CPE ]] = dict ()
41
+ self .title_to_cpes : Dict [str , Set [CPE ]] = dict ()
42
+ self .vendors : Set [str ] = set ()
43
+
44
+ self .build_lookup_dicts ()
45
+
46
+ @property
47
+ def json_path (self ) -> Path :
48
+ return self ._json_path
49
+
50
+ @json_path .setter
51
+ def json_path (self , new_json_path : Union [str , Path ]) -> None :
52
+ self ._json_path = Path (new_json_path )
53
+ self .to_json ()
45
54
46
55
def __iter__ (self ) -> Iterator [CPE ]:
47
56
yield from self .cpes .values ()
@@ -65,11 +74,7 @@ def __eq__(self, other: object) -> bool:
65
74
66
75
@property
67
76
def serialized_attributes (self ) -> List [str ]:
68
- return ["was_enhanced_with_vuln_cpes" , "json_path" , "cpes" ]
69
-
70
- def __post_init__ (self , init_lookup_dicts : bool ):
71
- if init_lookup_dicts :
72
- self .build_lookup_dicts ()
77
+ return ["was_enhanced_with_vuln_cpes" , "cpes" ]
73
78
74
79
def build_lookup_dicts (self ) -> None :
75
80
"""
@@ -94,28 +99,25 @@ def build_lookup_dicts(self) -> None:
94
99
self .title_to_cpes [cpe .title ].add (cpe )
95
100
96
101
@classmethod
97
- def from_web (cls , json_path : Union [str , Path ], init_lookup_dicts : bool = True ) -> "CPEDataset" :
102
+ def from_web (cls , json_path : Optional [ Union [str , Path ]] = None ) -> "CPEDataset" :
98
103
"""
99
104
Creates CPEDataset from NIST resources published on-line
100
105
101
106
:param Union[str, Path] json_path: Path to store the dataset to
102
- :param bool init_lookup_dicts: If dictionaries for fast matching should be computed, defaults to True
103
107
:return CPEDataset: The resulting dataset
104
108
"""
105
109
with tempfile .TemporaryDirectory () as tmp_dir :
106
- xml_path = Path (tmp_dir ) / cls .cpe_xml_basename
107
- zip_path = Path (tmp_dir ) / (cls .cpe_xml_basename + ".zip" )
108
- helpers .download_file (cls .cpe_url , zip_path )
110
+ xml_path = Path (tmp_dir ) / cls .CPE_XML_BASENAME
111
+ zip_path = Path (tmp_dir ) / (cls .CPE_XML_BASENAME + ".zip" )
112
+ helpers .download_file (cls .CPE_URL , zip_path )
109
113
110
114
with zipfile .ZipFile (zip_path , "r" ) as zip_ref :
111
115
zip_ref .extractall (tmp_dir )
112
116
113
- return cls ._from_xml (xml_path , json_path , init_lookup_dicts )
117
+ return cls ._from_xml (xml_path , json_path )
114
118
115
119
@classmethod
116
- def _from_xml (
117
- cls , xml_path : Union [str , Path ], json_path : Union [str , Path ], init_lookup_dicts : bool = True
118
- ) -> "CPEDataset" :
120
+ def _from_xml (cls , xml_path : Union [str , Path ], json_path : Optional [Union [str , Path ]] = None ) -> "CPEDataset" :
119
121
logger .info ("Loading CPE dataset from XML." )
120
122
root = ET .parse (xml_path ).getroot ()
121
123
dct = {}
@@ -136,7 +138,7 @@ def _from_xml(
136
138
137
139
dct [cpe_uri ] = cached_cpe (cpe_uri , title )
138
140
139
- return cls (False , Path ( json_path ), dct , init_lookup_dicts )
141
+ return cls (False , dct , json_path )
140
142
141
143
@classmethod
142
144
def from_json (cls , input_path : Union [str , Path ]) -> "CPEDataset" :
@@ -147,19 +149,22 @@ def from_json(cls, input_path: Union[str, Path]) -> "CPEDataset":
147
149
:return CPEDataset: the resulting dataset.
148
150
"""
149
151
dset = cast ("CPEDataset" , ComplexSerializableType .from_json (input_path ))
150
- dset .json_path = Path (input_path )
152
+ dset ._json_path = Path (input_path )
151
153
return dset
152
154
153
155
@classmethod
154
- def from_dict (cls , dct : Dict [str , Any ], init_lookup_dicts : bool = True ) -> "CPEDataset" :
156
+ def from_dict (cls , dct : Dict [str , Any ]) -> "CPEDataset" :
155
157
"""
156
158
Loads dataset from dictionary.
157
159
158
160
:param Dict[str, Any] dct: Dictionary that holds the dataset
159
- :param bool init_lookup_dicts: Whether look-up dicts should be computed as a part of initialization, defaults to True
160
161
:return CPEDataset: the resulting dataset.
161
162
"""
162
- return cls (dct ["was_enhanced_with_vuln_cpes" ], Path ("../" ), dct ["cpes" ], init_lookup_dicts )
163
+ return cls (
164
+ dct ["was_enhanced_with_vuln_cpes" ],
165
+ dct ["cpes" ],
166
+ Path ("../" ),
167
+ )
163
168
164
169
def to_pandas (self ) -> pd .DataFrame :
165
170
"""
0 commit comments