Skip to content

Commit df15e49

Browse files
committed
Split long lines into shorter ones
1 parent 067012f commit df15e49

File tree

6 files changed

+47
-18
lines changed

6 files changed

+47
-18
lines changed

src/analyses.py

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,8 @@ def save(self):
3434
class WordFrequencyAnalysis(Analysis):
3535
def __init__(self):
3636
self.words = {}
37-
self.stopwords = load_txt_into_set(f"{Directories.raw_data}/stopwords.txt")
37+
self.stopwords =\
38+
load_txt_into_set(f"{Directories.raw_data}/stopwords.txt")
3839

3940
@property
4041
def name(self):
@@ -54,7 +55,8 @@ def analyze_commit(self, author, repo, lines, message):
5455

5556
def finalize(self):
5657
sorted_keys = sorted(self.words, key=self.words.get, reverse=True)
57-
self.words = [{'word': word, 'count': self.words[word]} for word in sorted_keys]
58+
self.words = [{'word': word, 'count': self.words[word]}
59+
for word in sorted_keys]
5860

5961
@property
6062
def state(self):
@@ -91,7 +93,8 @@ def __init__(self):
9193

9294
self.lists = {}
9395
for form in self.forms:
94-
self.lists[form] = load_txt_into_set(f"{Directories.processed_data}/{form}.txt")
96+
self.lists[form] =\
97+
load_txt_into_set(f"{Directories.processed_data}/{form}.txt")
9598

9699
self.counts = {}
97100
for form in self.forms:
@@ -131,10 +134,16 @@ def count_frequency(self, word, form):
131134

132135
def sort_frequencies(self):
133136
for form in self.forms:
134-
sorted_keys = \
135-
sorted(self.frequencies[form], key=self.frequencies[form].get, reverse=True)
136-
self.frequencies[form] = \
137-
[{'word': word, 'count': self.frequencies[form][word]} for word in sorted_keys]
137+
sorted_keys = sorted(
138+
self.frequencies[form],
139+
key=self.frequencies[form].get,
140+
reverse=True
141+
)
142+
143+
self.frequencies[form] = [
144+
{'word': word, 'count': self.frequencies[form][word]}
145+
for word in sorted_keys
146+
]
138147

139148
def finalize(self):
140149
self.sort_frequencies()

src/analyzer.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,9 @@ def analyze(self):
1818
if author not in self.authors:
1919
self.authors[author] = 0
2020
if self.authors[author] < self.MAX_COMMITS_BY_AUTHOR:
21+
message = message.strip()
2122
for analysis in self.analyses:
22-
analysis.analyze_commit(author, repo, lines, message.strip())
23+
analysis.analyze_commit(author, repo, lines, message)
2324
self.authors[author] += 1
2425
self.analyzed_number += 1
2526
self.total_number += 1
@@ -28,4 +29,5 @@ def analyze(self):
2829
analysis.finalize()
2930
analysis.save()
3031

31-
print(f"Analyzed {self.analyzed_number} commits (out of {self.total_number}).")
32+
print(f"Analyzed {self.analyzed_number} commits"
33+
f" (out of {self.total_number}).")

src/download_and_parse.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,4 +34,5 @@
3434
if len(first_line) > 300:
3535
continue
3636

37-
output_file.write(f"{author}::{repo}::{len(message_lines)}::{first_line}\n")
37+
output_file.write(f"{author}::{repo}::"
38+
f"{len(message_lines)}::{first_line}\n")

src/plotters.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@
88

99
class Plotter(ABC):
1010
def __init__(self):
11-
self.data = load_json(f"{Directories.json_outputs}/{self.input_file_name}.json")
11+
self.data = \
12+
load_json(f"{Directories.json_outputs}/{self.input_file_name}.json")
1213

1314
@property
1415
@abstractmethod
@@ -119,7 +120,10 @@ def compute_values(self):
119120
total_count = sum(x['count'] for x in self.data)
120121

121122
top_words = self.data[:self.n_top_words]
122-
values = [(x['word'], x['count'] * 100 / total_count) for x in top_words]
123+
values = [
124+
(x['word'], x['count'] * 100 / total_count)
125+
for x in top_words
126+
]
123127

124128
return values
125129

@@ -155,8 +159,10 @@ def compute_values(self):
155159
counts = self.data['total_counts']
156160
total_count = sum(counts.values())
157161

158-
values = [(x.replace("_", " ").capitalize(), counts[x] * 100 / total_count)
159-
for x in counts]
162+
values = [
163+
(x.replace("_", " ").capitalize(), counts[x] * 100 / total_count)
164+
for x in counts
165+
]
160166
values.sort(key=lambda x: x[1], reverse=True)
161167

162168
return values
@@ -185,7 +191,10 @@ def compute_values(self):
185191
total_count = self.data['total_counts'][self.form]
186192

187193
top_words = self.data['frequencies'][self.form][:self.n_top_words]
188-
values = [(x['word'], x['count'] * 100 / total_count) for x in top_words]
194+
values = [
195+
(x['word'], x['count'] * 100 / total_count)
196+
for x in top_words
197+
]
189198

190199
return values
191200

src/process_irregular_verbs.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,11 @@
1313

1414
words = {}
1515
for text in texts:
16-
groups = re.findall("""(?:\w|/|\*|(?: /))+ – (?:\w|/|\*)+ – (?:\w|/|\*)+""", text)
16+
groups = re.findall(
17+
"""(?:\w|/|\*|(?: /))+ – (?:\w|/|\*)+ – (?:\w|/|\*)+""",
18+
text
19+
)
20+
1721
for group in groups:
1822
if group[0] == '*':
1923
continue # archaic

src/process_verbs.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,15 @@
55
from file_utils import open_file_dir_safe
66

77
input_file = open(f"{Directories.raw_data}/verbs.txt")
8-
output_file = open_file_dir_safe(f"{Directories.processed_data}/infinitive.txt", "w")
8+
output_file =\
9+
open_file_dir_safe(f"{Directories.processed_data}/infinitive.txt", "w")
910

1011
with input_file, output_file:
1112
for line in input_file:
12-
match = re.match("""^[0-9]{8}\s[0-9]{2}\s[a-z]\s[0-9]{2}\s([a-zA-Z]*)\s""", line)
13+
match = re.match(
14+
"""^[0-9]{8}\s[0-9]{2}\s[a-z]\s[0-9]{2}\s([a-zA-Z]*)\s""",
15+
line
16+
)
1317
if match:
1418
word = match.group(1)
1519
if not word == "initial":

0 commit comments

Comments
 (0)