-
Notifications
You must be signed in to change notification settings - Fork 74
/
Copy pathInfoExtractor.py
116 lines (98 loc) · 3.95 KB
/
InfoExtractor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import os
from coalib.parsing.Globbing import glob, fnmatch
from coala_quickstart.info_extraction.Info import Info
class InfoExtractor:
# tuple of file globs supported by the extractor.
supported_file_globs = tuple()
# Links to the issues/documentations for relevant specs of supported files.
spec_references = []
# tuple of ``Info`` classes that can be extracted.
supported_info_kinds = (Info,)
def __init__(self,
target_globs,
project_directory):
"""
:param target_globs: list of file globs to extract information
from.
:param project_directory: Absolute path to project directory in which
the target files will be searched.
"""
target_files = self.retrieve_files(target_globs, project_directory)
for fname in target_files:
if not fnmatch(fname, self.supported_file_globs):
raise ValueError('The target file {} does not match the '
'supported file globs {} of {}'.format(
fname,
self.supported_file_globs,
self.__class__.__name__))
self.target_files = [
os.path.join(project_directory, f) for f in target_files]
self.directory = project_directory
self._information = dict()
@property
def information(self):
"""
Return extracted information (if any)
"""
return self._information
def parse_file(self, fname, file_content):
"""
Parses the given file and returns the parsed file.
"""
raise NotImplementedError
def _add_info(self, fname, info_to_add):
"""
Organize and add the supplied information in self.information
attribute.
:param fname: Name of the file from which information is
extracted.
:param info_to_add: list of ``Info`` instances to add.
"""
for info in info_to_add:
if not isinstance(info, self.supported_info_kinds):
raise ValueError('The class {} is not present in '
'supported information kinds of {}'.format(
info.name,
self.__class__.__name__))
if not info.extractor:
info.extractor = self
if self._information.get(fname):
if self._information[fname].get(info.name):
self._information[fname][info.name].append(info)
else:
self._information[fname][info.name] = [info]
else:
self._information[fname] = {
info.name: [info]
}
def extract_information(self):
"""
Extracts the information, saves in the object and returns it.
"""
for fpath in self.target_files:
with open(fpath, 'r') as f:
pfile = self.parse_file(fpath, f.read())
fname = os.path.relpath(fpath, self.directory)
file_info = self.find_information(fname, pfile)
if file_info:
self._add_info(fname, file_info)
return self.information
def find_information(self, fname, parsed_file):
"""
Returns a list of ``Info`` instances.
"""
raise NotImplementedError
@staticmethod
def retrieve_files(file_globs, directory):
"""
Returns matched filenames according to the list of file globs and
supported files of the extractor.
"""
matches = []
cwd = os.getcwd()
os.chdir(directory)
for g in file_globs:
matches += glob(g)
matched_files = [f for f in matches if not os.path.isdir(f)]
os.chdir(cwd)
return matched_files