-
Notifications
You must be signed in to change notification settings - Fork 61
/
Copy pathgenerate_package_description.py
197 lines (178 loc) · 7.6 KB
/
generate_package_description.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
from collections import defaultdict
import csv
import json
import os
from pathlib import Path
import re
import sys
help_statement ="""
Usage: {script_name} <language> <package_name>
A tool for generating a rule package description, in JSON format.
The rules.csv file at the root of this repository is processed, and rules with a matching
language and package name are processed and used to generate a "rule package description"
file in the 'rule_packages' directory, using the name of the package.
The file will contain a JSON dictionary for each standard where the key is the rule ID and
the value is a dictionary describing the rule metadata. The dictionary will also contain a
list of query dictionaries, with a single auto-generated entry.
The generated file should be reviewed, and updated to:
1. Add additional query entries as required
2. Confirm that the following auto-generated properties are appropriate:")
- 'precision'
- 'short_name'
- 'severity'
3. Add additional 'tags' as required, particularly 'security' or 'correctness'.
If the file already exists, it will not be overwritten.
Arguments:
<language> - the programming language of the package (currently 'cpp' or 'c')
<package_name> - the name of the package to generate a package file for.
"""
if len(sys.argv) == 1 or (len(sys.argv) == 2 and sys.argv[1] == "--help"):
print(help_statement.format(script_name=sys.argv[0]))
sys.exit(0)
language_name = sys.argv[1]
package_name = sys.argv[2]
repo_root = Path(__file__).parent.parent.parent
rules_file_path = repo_root.joinpath('rules.csv')
rule_packages_file_path = repo_root.joinpath('rule_packages', language_name)
if not os.path.isdir(rule_packages_file_path):
os.mkdir(rule_packages_file_path)
rule_package_file_path = rule_packages_file_path.joinpath(package_name + ".json")
def generate_short_name(title):
"""Generate a short, camel-case string from a rule title, suitable for a file name or id."""
# Only consider the first sentence
first_sentence = title.split(". ")[0]
# Replace some common strings with text more suitable for a short name
replacements = [
("C++ ", ""),
("std::", ""),
("_", " "),
("-", " "),
("&&", "and"),
("&&", "or"),
(" shall not be ", " "),
(" shall not ", " "),
(" shall be ", " not "),
(" as a ", " as "),
(" as an ", " as "),
(" in a ", " in "),
(" in an ", " in "),
(" as the ", " as ")
]
for replacement in replacements:
first_sentence = first_sentence.replace(replacement[0], replacement[1])
# Remove any other characters not suitable for a short name
only_safe_chars = re.sub(r"[^a-zA-Z ]*", "", first_sentence)
# Remove anything after an "if", to keep it short
only_first_part = only_safe_chars.split(" if ")[0]
components = only_first_part.split(" ")
if re.match(r"^(an|a|the)",components[0].lower(), re.IGNORECASE):
# Remove "if", "an" and "a" if they occur at the start of the sentence
del components[0]
# Convert to CamelCase and join without spaces
return "".join([x.title() for x in components])
try:
rules_file = open(rules_file_path, "r")
except PermissionError:
print("Error: No permission to read the rules file located at '" + str(rules_file_path) + "'")
sys.exit(1)
else:
package_description = defaultdict(dict)
with rules_file:
rules_reader = csv.reader(rules_file)
# Skip header row
next(rules_reader, None)
for rule in rules_reader:
language = rule[0]
standard = rule[1]
rule_id = rule[2]
supportable = rule[3]
obligation_level = rule[4]
enforcement_level = rule[5]
allocated_targets = rule[6]
rule_title = rule[7]
similar_query = rule[8]
package = rule[9]
difficulty = rule[10]
# Find all rules in the given language and package
if language == language_name and package == package_name:
if not supportable == "Yes":
print("Error: " + standard + " " + rule_id + " is marked as part of package " + package_name + " but is not marked as supportable.")
sys.exit(1)
# Add the AUTOSAR obligation, enforcement and allocated target as query properties.
properties = {}
if obligation_level:
properties["obligation"] = obligation_level.lower()
if enforcement_level:
properties["enforcement"] = enforcement_level.lower()
if allocated_targets:
properties["allocated-target"] = [target.strip(' ').lower() for target in allocated_targets.split("/")]
if difficulty == "Audit":
properties["audit"] = ""
# Default to a severity of error, unless the obligation level is considered "advisory"
severity = "error"
if obligation_level == "advisory":
severity = "warning"
# Default to a precision of very-high, but reduce it if the difficulty is marked as Hard or Very Hard
precision = "very-high"
if difficulty == "Very Hard":
precision = "medium"
elif difficulty == "Hard":
precision = "high"
# Default assumption is that we use the rule title as the query name
query_name = rule_title
description = ""
if ". " in query_name:
# This is probably a multi-sentence title, so use only the first sentence as
# the name, and include the full text in the description
query_name = query_name.split(". ")[0]
description = rule_title
if len(query_name) > 100:
# Long query names are not good, truncate to 100 on a space boundary
# Take one more character, so we can check whether it's a space
query_name_shorter = query_name[0:101]
# Reports the last space, or -1
last_space = query_name_shorter.rfind(" ")
# If last space is -1, then we lose the extra character as required
query_name = query_name_shorter[0:last_space]
description = rule_title
# Query names should never end in a dot
query_name = query_name.rstrip(".")
package_description[standard][rule_id] = {
"title" : rule_title,
"properties" : properties,
"queries" : [
{
"short_name" : generate_short_name(rule_title),
"name" : query_name,
"precision" : precision,
"severity" : severity,
"description" : description,
"kind" : "problem",
"tags" : []
}
]
}
if not package_description:
print("Error: No rules found for package '" + package_name + "'")
sys.exit(1)
if rule_package_file_path.exists():
print("Error: Rule package file already exists at '" + str(rule_package_file_path) + "'.")
sys.exit(1)
try:
rule_package_file = open(rule_package_file_path, "w")
except PermissionError:
print("Error: No permission to write the package rules file located at '" + str(rule_package_file_path) + "'")
sys.exit(1)
else:
with rule_package_file:
json.dump(package_description, rule_package_file, indent=2, sort_keys=True)
print("Rule package file generated at " + str(rule_package_file_path) + ".")
print("")
print("A default query has been generated for each for each rule. Please review each rule in the generated JSON file and:")
print(" (1) Add additional queries as required")
print(" (2) Confirm that the following auto-generated properties are appropriate:")
print(" - 'camel_name'.")
print(" - 'precision'.")
print(" - 'query_name'.")
print(" - 'severity'.")
print(" (3) Add additional 'tags' as required, particularly 'security' or 'correctness'.")