Split long lines into shorter ones

xxdavid · xxdavid · commit df15e49a15cd · 2017-11-29T19:22:43.000+01:00
diff --git a/src/analyses.py b/src/analyses.py
@@ -34,7 +34,8 @@ def save(self):
 class WordFrequencyAnalysis(Analysis):
     def __init__(self):
         self.words = {}
-        self.stopwords = load_txt_into_set(f"{Directories.raw_data}/stopwords.txt")
+        self.stopwords =\
+            load_txt_into_set(f"{Directories.raw_data}/stopwords.txt")
 
     @property
     def name(self):
@@ -54,7 +55,8 @@ def analyze_commit(self, author, repo, lines, message):
 
     def finalize(self):
         sorted_keys = sorted(self.words, key=self.words.get, reverse=True)
-        self.words = [{'word': word, 'count': self.words[word]} for word in sorted_keys]
+        self.words = [{'word': word, 'count': self.words[word]}
+                      for word in sorted_keys]
 
     @property
     def state(self):
@@ -91,7 +93,8 @@ def __init__(self):
 
         self.lists = {}
         for form in self.forms:
-            self.lists[form] = load_txt_into_set(f"{Directories.processed_data}/{form}.txt")
+            self.lists[form] =\
+                load_txt_into_set(f"{Directories.processed_data}/{form}.txt")
 
         self.counts = {}
         for form in self.forms:
@@ -131,10 +134,16 @@ def count_frequency(self, word, form):
 
     def sort_frequencies(self):
         for form in self.forms:
-            sorted_keys = \
-                sorted(self.frequencies[form], key=self.frequencies[form].get, reverse=True)
-            self.frequencies[form] = \
-                [{'word': word, 'count': self.frequencies[form][word]} for word in sorted_keys]
+            sorted_keys = sorted(
+                self.frequencies[form],
+                key=self.frequencies[form].get,
+                reverse=True
+            )
+
+            self.frequencies[form] = [
+                {'word': word, 'count': self.frequencies[form][word]}
+                for word in sorted_keys
+            ]
 
     def finalize(self):
         self.sort_frequencies()
diff --git a/src/analyzer.py b/src/analyzer.py
@@ -18,8 +18,9 @@ def analyze(self):
                 if author not in self.authors:
                     self.authors[author] = 0
                 if self.authors[author] < self.MAX_COMMITS_BY_AUTHOR:
+                    message = message.strip()
                     for analysis in self.analyses:
-                        analysis.analyze_commit(author, repo, lines, message.strip())
+                        analysis.analyze_commit(author, repo, lines, message)
                     self.authors[author] += 1
                     self.analyzed_number += 1
                 self.total_number += 1
@@ -28,4 +29,5 @@ def analyze(self):
             analysis.finalize()
             analysis.save()
 
-        print(f"Analyzed {self.analyzed_number} commits (out of {self.total_number}).")
+        print(f"Analyzed {self.analyzed_number} commits"
+              f" (out of {self.total_number}).")
diff --git a/src/download_and_parse.py b/src/download_and_parse.py
@@ -34,4 +34,5 @@
                 if len(first_line) > 300:
                     continue
 
-                output_file.write(f"{author}::{repo}::{len(message_lines)}::{first_line}\n")
+                output_file.write(f"{author}::{repo}::"
+                                  f"{len(message_lines)}::{first_line}\n")
diff --git a/src/plotters.py b/src/plotters.py
@@ -8,7 +8,8 @@
 
 class Plotter(ABC):
     def __init__(self):
-        self.data = load_json(f"{Directories.json_outputs}/{self.input_file_name}.json")
+        self.data = \
+            load_json(f"{Directories.json_outputs}/{self.input_file_name}.json")
 
     @property
     @abstractmethod
@@ -119,7 +120,10 @@ def compute_values(self):
         total_count = sum(x['count'] for x in self.data)
 
         top_words = self.data[:self.n_top_words]
-        values = [(x['word'], x['count'] * 100 / total_count) for x in top_words]
+        values = [
+            (x['word'], x['count'] * 100 / total_count)
+            for x in top_words
+        ]
 
         return values
 
@@ -155,8 +159,10 @@ def compute_values(self):
         counts = self.data['total_counts']
         total_count = sum(counts.values())
 
-        values = [(x.replace("_", " ").capitalize(), counts[x] * 100 / total_count)
-                  for x in counts]
+        values = [
+            (x.replace("_", " ").capitalize(), counts[x] * 100 / total_count)
+            for x in counts
+        ]
         values.sort(key=lambda x: x[1], reverse=True)
 
         return values
@@ -185,7 +191,10 @@ def compute_values(self):
         total_count = self.data['total_counts'][self.form]
 
         top_words = self.data['frequencies'][self.form][:self.n_top_words]
-        values = [(x['word'], x['count'] * 100 / total_count) for x in top_words]
+        values = [
+            (x['word'], x['count'] * 100 / total_count)
+            for x in top_words
+        ]
 
         return values
 
diff --git a/src/process_irregular_verbs.py b/src/process_irregular_verbs.py
@@ -13,7 +13,11 @@
 
 words = {}
 for text in texts:
-    groups = re.findall("""(?:\w|/|\*|(?: /))+ – (?:\w|/|\*)+ – (?:\w|/|\*)+""", text)
+    groups = re.findall(
+        """(?:\w|/|\*|(?: /))+ – (?:\w|/|\*)+ – (?:\w|/|\*)+""",
+        text
+    )
+
     for group in groups:
         if group[0] == '*':
             continue  # archaic
diff --git a/src/process_verbs.py b/src/process_verbs.py
@@ -5,11 +5,15 @@
 from file_utils import open_file_dir_safe
 
 input_file = open(f"{Directories.raw_data}/verbs.txt")
-output_file = open_file_dir_safe(f"{Directories.processed_data}/infinitive.txt", "w")
+output_file =\
+    open_file_dir_safe(f"{Directories.processed_data}/infinitive.txt", "w")
 
 with input_file, output_file:
     for line in input_file:
-        match = re.match("""^[0-9]{8}\s[0-9]{2}\s[a-z]\s[0-9]{2}\s([a-zA-Z]*)\s""", line)
+        match = re.match(
+            """^[0-9]{8}\s[0-9]{2}\s[a-z]\s[0-9]{2}\s([a-zA-Z]*)\s""",
+            line
+        )
         if match:
             word = match.group(1)
             if not word == "initial":