-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathcode_metrics.py
executable file
·151 lines (138 loc) · 5.49 KB
/
code_metrics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os, sys, re, stat
from optparse import OptionParser
import datetime
import json
import codecs
sys.stdout = codecs.getwriter('utf-8')(sys.stdout)
ret_code = '\n'
def auto_decode(s):
for codec in ['utf-8', 'shift-jis', 'euc-jp', 'iso2022-jp']:
try:
return s.decode(codec)
except: pass
raise Exception("failed to decode")
class Processor(object):
def metric(self, f, ng_words):
code = auto_decode(open(f, 'r').read())
code_no_comment = self.remove_comment(code)
return dict([('loc', len(code.split(ret_code))),
('loc_no_comment', len(code_no_comment.split(ret_code)))] +
[(k, len(re.findall(r'\b%s\b' % k, code_no_comment))) for k in self.keywords] +
[(k, len(re.findall(r'\b%s\b' % k, code, flags=re.U))) for k in ng_words])
def remove_comment(self, code):
s = code
if self.comment_region_start and self.comment_region_end:
s = re.sub(r'%s(?:.|%s)*?%s' % (re.escape(self.comment_region_start), ret_code, re.escape(self.comment_region_end)),
r'', s, flags=re.M)
if self.comment_line:
s = re.sub(r'^\s*%s.*%s' % (re.escape(self.comment_line), ret_code), r'', s, flags=re.M)
s = re.sub(r'%s.*' % re.escape(self.comment_line), r'', s)
return s
class Java(Processor):
"""Processor class for Java"""
language = 'JAVA'
keywords = ['if', 'else', 'try', 'catch', 'finally']
comment_region_start = '/*'
comment_region_end = '/*'
comment_line = '//'
class CPP(Processor):
"""Processor class for C++"""
language = 'C++'
keywords = ['if', 'else', 'try', 'catch']
comment_region_start = '/*'
comment_region_end = '/*'
comment_line = '//'
class C(Processor):
"""Processor class for C"""
language = 'C'
keywords = ['if', 'else']
comment_region_start = '/*'
comment_region_end = '/*'
comment_line = None
class Python(Processor):
"""Processor class for Python"""
language = 'Python'
keywords = ['if', 'else', 'try', 'except']
comment_region_start = None
comment_region_end = None
comment_line = '#'
class Perl(Processor):
"""Processor class for Perl"""
language = 'Perl'
keywords = ['if', 'else']
comment_region_start = None
comment_region_end = None
comment_line = '#'
class Lua(Processor):
"""Processor class for Lua"""
language = 'Lua'
keywords = ['if', 'else']
comment_region_start = None
comment_region_end = None
comment_line = '--'
def csv_write(out, processors, ng_words, data):
colnames = ['loc', 'loc_no_comment'] + list(reduce(set.union, [set(x.keywords) for x in processors.values()])) + ng_words
keys = ['path', 'language', 'mod_date', 'mod_time'] + colnames
out.writelines(','.join(keys) + ret_code)
for d in data:
out.write(','.join([str(d.get(k, 0)) for k in keys]) + ret_code)
def json_write(out, processors, ng_words, data):
class DatetimeEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, datetime.date) or isinstance(obj, datetime.time):
return str(obj)
return json.JSONEncoder.default(self, obj)
for d in data:
json.dump(d, out, ensure_ascii=False, skipkeys=['mod_date', 'mod_time'], indent=True, sort_keys=True, cls=DatetimeEncoder)
def main(processors):
try:
parser = OptionParser("usage: %prog [options] dir")
parser.add_option("-o", "--out", dest="out",
help="write report to FILE", metavar="FILE")
parser.add_option("-n", "--ng", dest="ng_file",
help="NG words FILE", metavar="FILE")
parser.add_option("-f", "--format", dest="format", default="csv",
help="output format", metavar="FORMAT")
(options, args) = parser.parse_args()
target_dir = args[0]
if options.out:
out = open(options.out, 'w')
else:
out = sys.stdout
if options.ng_file:
ng_words = filter(len, auto_decode(open(options.ng_file, 'r').read()).split('\n'))
else:
ng_words = ['TODO', u'未完成']
data = []
for root, dirs, files in os.walk(target_dir, topdown=False):
for path in [os.path.join(root, f) for f in files]:
ext = os.path.splitext(path)[1]
if not ext in processors:
continue
processor = processors[ext]
d = processor.metric(path, ng_words)
d['path'] = path
d['language'] = processor.language
mod_dt = datetime.datetime.fromtimestamp(os.stat(path).st_mtime)
d['mod_date'] = mod_dt.date()
d['mod_time'] = mod_dt.time()
data.append(d)
if options.format == 'csv':
csv_write(out, processors, ng_words, data)
elif options.format == 'json':
json_write(out, processors, ng_words, data)
else:
raise Exception("%s is not supported format" % options.format)
except Exception, e:
sys.stderr.write(str(e) + ret_code)
if __name__ == '__main__':
main({ ".java" : Java(),
".cpp" : CPP(),
".cc" : CPP(),
".c" : C(),
".py" : Python(),
".pl" : Perl(),
".pm" : Perl(),
".lua" : Lua()})