|
|
@@ -92,7 +92,54 @@ class DataCollector:
|
|
92
|
92
|
def __init__(self):
|
|
93
|
93
|
self.stamp_created = time.time()
|
|
94
|
94
|
self.cache = {}
|
|
95
|
|
-
|
|
|
95
|
+ self.total_authors = 0
|
|
|
96
|
+ self.activity_by_hour_of_day = {} # hour -> commits
|
|
|
97
|
+ self.activity_by_day_of_week = {} # day -> commits
|
|
|
98
|
+ self.activity_by_month_of_year = {} # month [1-12] -> commits
|
|
|
99
|
+ self.activity_by_hour_of_week = {} # weekday -> hour -> commits
|
|
|
100
|
+ self.activity_by_hour_of_day_busiest = 0
|
|
|
101
|
+ self.activity_by_hour_of_week_busiest = 0
|
|
|
102
|
+ self.activity_by_year_week = {} # yy_wNN -> commits
|
|
|
103
|
+ self.activity_by_year_week_peak = 0
|
|
|
104
|
+
|
|
|
105
|
+ self.authors = {} # name -> {commits, first_commit_stamp, last_commit_stamp, last_active_day, active_days, lines_added, lines_removed}
|
|
|
106
|
+
|
|
|
107
|
+ self.total_commits = 0
|
|
|
108
|
+ self.total_files = 0
|
|
|
109
|
+ self.authors_by_commits = 0
|
|
|
110
|
+
|
|
|
111
|
+ # domains
|
|
|
112
|
+ self.domains = {} # domain -> commits
|
|
|
113
|
+
|
|
|
114
|
+ # author of the month
|
|
|
115
|
+ self.author_of_month = {} # month -> author -> commits
|
|
|
116
|
+ self.author_of_year = {} # year -> author -> commits
|
|
|
117
|
+ self.commits_by_month = {} # month -> commits
|
|
|
118
|
+ self.commits_by_year = {} # year -> commits
|
|
|
119
|
+ self.first_commit_stamp = 0
|
|
|
120
|
+ self.last_commit_stamp = 0
|
|
|
121
|
+ self.last_active_day = None
|
|
|
122
|
+ self.active_days = set()
|
|
|
123
|
+
|
|
|
124
|
+ # lines
|
|
|
125
|
+ self.total_lines = 0
|
|
|
126
|
+ self.total_lines_added = 0
|
|
|
127
|
+ self.total_lines_removed = 0
|
|
|
128
|
+
|
|
|
129
|
+ # timezone
|
|
|
130
|
+ self.commits_by_timezone = {} # timezone -> commits
|
|
|
131
|
+
|
|
|
132
|
+ # tags
|
|
|
133
|
+ self.tags = {}
|
|
|
134
|
+
|
|
|
135
|
+ self.files_by_stamp = {} # stamp -> files
|
|
|
136
|
+
|
|
|
137
|
+ # extensions
|
|
|
138
|
+ self.extensions = {} # extension -> files, lines
|
|
|
139
|
+
|
|
|
140
|
+ # line statistics
|
|
|
141
|
+ self.changes_by_date = {} # stamp -> { files, ins, del }
|
|
|
142
|
+
|
|
96
|
143
|
##
|
|
97
|
144
|
# This should be the main function to extract data from the repository.
|
|
98
|
145
|
def collect(self, dir):
|
|
|
@@ -181,45 +228,12 @@ class GitDataCollector(DataCollector):
|
|
181
|
228
|
DataCollector.collect(self, dir)
|
|
182
|
229
|
|
|
183
|
230
|
try:
|
|
184
|
|
- self.total_authors = int(getpipeoutput(['git shortlog -s %s' % getcommitrange(), 'wc -l']))
|
|
|
231
|
+ self.total_authors += int(getpipeoutput(['git shortlog -s %s' % getcommitrange(), 'wc -l']))
|
|
185
|
232
|
except:
|
|
186
|
233
|
self.total_authors = 0
|
|
187
|
234
|
#self.total_lines = int(getoutput('git-ls-files -z |xargs -0 cat |wc -l'))
|
|
188
|
235
|
|
|
189
|
|
- self.activity_by_hour_of_day = {} # hour -> commits
|
|
190
|
|
- self.activity_by_day_of_week = {} # day -> commits
|
|
191
|
|
- self.activity_by_month_of_year = {} # month [1-12] -> commits
|
|
192
|
|
- self.activity_by_hour_of_week = {} # weekday -> hour -> commits
|
|
193
|
|
- self.activity_by_hour_of_day_busiest = 0
|
|
194
|
|
- self.activity_by_hour_of_week_busiest = 0
|
|
195
|
|
- self.activity_by_year_week = {} # yy_wNN -> commits
|
|
196
|
|
- self.activity_by_year_week_peak = 0
|
|
197
|
|
-
|
|
198
|
|
- self.authors = {} # name -> {commits, first_commit_stamp, last_commit_stamp, last_active_day, active_days, lines_added, lines_removed}
|
|
199
|
|
-
|
|
200
|
|
- # domains
|
|
201
|
|
- self.domains = {} # domain -> commits
|
|
202
|
|
-
|
|
203
|
|
- # author of the month
|
|
204
|
|
- self.author_of_month = {} # month -> author -> commits
|
|
205
|
|
- self.author_of_year = {} # year -> author -> commits
|
|
206
|
|
- self.commits_by_month = {} # month -> commits
|
|
207
|
|
- self.commits_by_year = {} # year -> commits
|
|
208
|
|
- self.first_commit_stamp = 0
|
|
209
|
|
- self.last_commit_stamp = 0
|
|
210
|
|
- self.last_active_day = None
|
|
211
|
|
- self.active_days = set()
|
|
212
|
|
-
|
|
213
|
|
- # lines
|
|
214
|
|
- self.total_lines = 0
|
|
215
|
|
- self.total_lines_added = 0
|
|
216
|
|
- self.total_lines_removed = 0
|
|
217
|
|
-
|
|
218
|
|
- # timezone
|
|
219
|
|
- self.commits_by_timezone = {} # timezone -> commits
|
|
220
|
|
-
|
|
221
|
236
|
# tags
|
|
222
|
|
- self.tags = {}
|
|
223
|
237
|
lines = getpipeoutput(['git show-ref --tags']).split('\n')
|
|
224
|
238
|
for line in lines:
|
|
225
|
239
|
if len(line) == 0:
|
|
|
@@ -252,7 +266,7 @@ class GitDataCollector(DataCollector):
|
|
252
|
266
|
parts = re.split('\s+', line, 2)
|
|
253
|
267
|
commits = int(parts[1])
|
|
254
|
268
|
author = parts[2]
|
|
255
|
|
- self.tags[tag]['commits'] += commits
|
|
|
269
|
+ self.tags[tag]['commits'] = commits
|
|
256
|
270
|
self.tags[tag]['authors'][author] = commits
|
|
257
|
271
|
|
|
258
|
272
|
# Collect revision statistics
|
|
|
@@ -359,7 +373,6 @@ class GitDataCollector(DataCollector):
|
|
359
|
373
|
|
|
360
|
374
|
# TODO Optimize this, it's the worst bottleneck
|
|
361
|
375
|
# outputs "<stamp> <files>" for each revision
|
|
362
|
|
- self.files_by_stamp = {} # stamp -> files
|
|
363
|
376
|
revlines = getpipeoutput(['git rev-list --pretty=format:"%%at %%T" %s' % getcommitrange('HEAD'), 'grep -v ^commit']).strip().split('\n')
|
|
364
|
377
|
lines = []
|
|
365
|
378
|
for revline in revlines:
|
|
|
@@ -367,7 +380,7 @@ class GitDataCollector(DataCollector):
|
|
367
|
380
|
linecount = self.getFilesInCommit(rev)
|
|
368
|
381
|
lines.append('%d %d' % (int(time), linecount))
|
|
369
|
382
|
|
|
370
|
|
- self.total_commits = len(lines)
|
|
|
383
|
+ self.total_commits += len(lines)
|
|
371
|
384
|
for line in lines:
|
|
372
|
385
|
parts = line.split(' ')
|
|
373
|
386
|
if len(parts) != 2:
|
|
|
@@ -379,9 +392,8 @@ class GitDataCollector(DataCollector):
|
|
379
|
392
|
print 'Warning: failed to parse line "%s"' % line
|
|
380
|
393
|
|
|
381
|
394
|
# extensions
|
|
382
|
|
- self.extensions = {} # extension -> files, lines
|
|
383
|
395
|
lines = getpipeoutput(['git ls-tree -r -z %s' % getcommitrange('HEAD', end_only = True)]).split('\000')
|
|
384
|
|
- self.total_files = len(lines)
|
|
|
396
|
+ self.total_files += len(lines)
|
|
385
|
397
|
for line in lines:
|
|
386
|
398
|
if len(line) == 0:
|
|
387
|
399
|
continue
|
|
|
@@ -498,9 +510,9 @@ class GitDataCollector(DataCollector):
|
|
498
|
510
|
def refine(self):
|
|
499
|
511
|
# authors
|
|
500
|
512
|
# name -> {place_by_commits, commits_frac, date_first, date_last, timedelta}
|
|
501
|
|
- authors_by_commits = getkeyssortedbyvaluekey(self.authors, 'commits')
|
|
502
|
|
- authors_by_commits.reverse() # most first
|
|
503
|
|
- for i, name in enumerate(authors_by_commits):
|
|
|
513
|
+ self.authors_by_commits = getkeyssortedbyvaluekey(self.authors, 'commits')
|
|
|
514
|
+ self.authors_by_commits.reverse() # most first
|
|
|
515
|
+ for i, name in enumerate(self.authors_by_commits):
|
|
504
|
516
|
self.authors[name]['place_by_commits'] = i + 1
|
|
505
|
517
|
|
|
506
|
518
|
for name in self.authors.keys():
|
|
|
@@ -1027,8 +1039,8 @@ class HTMLReportCreator(ReportCreator):
|
|
1027
|
1039
|
tags_sorted_by_date_desc = map(lambda el : el[1], reversed(sorted(map(lambda el : (el[1]['date'], el[0]), data.tags.items()))))
|
|
1028
|
1040
|
for tag in tags_sorted_by_date_desc:
|
|
1029
|
1041
|
authorinfo = []
|
|
1030
|
|
- authors_by_commits = getkeyssortedbyvalues(data.tags[tag]['authors'])
|
|
1031
|
|
- for i in reversed(authors_by_commits):
|
|
|
1042
|
+ self.authors_by_commits = getkeyssortedbyvalues(data.tags[tag]['authors'])
|
|
|
1043
|
+ for i in reversed(self.authors_by_commits):
|
|
1032
|
1044
|
authorinfo.append('%s (%d)' % (i, data.tags[tag]['authors'][i]))
|
|
1033
|
1045
|
f.write('<tr><td>%s</td><td>%s</td><td>%d</td><td>%s</td></tr>' % (tag, data.tags[tag]['date'], data.tags[tag]['commits'], ', '.join(authorinfo)))
|
|
1034
|
1046
|
f.write('</table>')
|
|
|
@@ -1276,7 +1288,7 @@ class GitStats:
|
|
1276
|
1288
|
|
|
1277
|
1289
|
if len(args) < 2:
|
|
1278
|
1290
|
print """
|
|
1279
|
|
-Usage: gitstats [options] <gitpath> <outputpath>
|
|
|
1291
|
+Usage: gitstats [options] <gitpath> [<gitpath>] <outputpath>
|
|
1280
|
1292
|
|
|
1281
|
1293
|
Options:
|
|
1282
|
1294
|
-c key=value Override configuration value
|
|
|
@@ -1286,8 +1298,7 @@ Default config values:
|
|
1286
|
1298
|
""" % conf
|
|
1287
|
1299
|
sys.exit(0)
|
|
1288
|
1300
|
|
|
1289
|
|
- gitpath = args[0]
|
|
1290
|
|
- outputpath = os.path.abspath(args[1])
|
|
|
1301
|
+ outputpath = os.path.abspath(args[-1])
|
|
1291
|
1302
|
rundir = os.getcwd()
|
|
1292
|
1303
|
|
|
1293
|
1304
|
try:
|
|
|
@@ -1298,17 +1309,22 @@ Default config values:
|
|
1298
|
1309
|
print 'FATAL: Output path is not a directory or does not exist'
|
|
1299
|
1310
|
sys.exit(1)
|
|
1300
|
1311
|
|
|
1301
|
|
- print 'Git path: %s' % gitpath
|
|
1302
|
1312
|
print 'Output path: %s' % outputpath
|
|
1303
|
|
-
|
|
1304
|
|
- os.chdir(gitpath)
|
|
1305
|
|
-
|
|
1306
|
1313
|
cachefile = os.path.join(outputpath, 'gitstats.cache')
|
|
1307
|
1314
|
|
|
1308
|
|
- print 'Collecting data...'
|
|
1309
|
1315
|
data = GitDataCollector()
|
|
1310
|
1316
|
data.loadCache(cachefile)
|
|
1311
|
|
- data.collect(gitpath)
|
|
|
1317
|
+
|
|
|
1318
|
+ for path in args[0:-1]:
|
|
|
1319
|
+ gitpath = path
|
|
|
1320
|
+
|
|
|
1321
|
+ print 'Git path: %s' % gitpath
|
|
|
1322
|
+
|
|
|
1323
|
+ os.chdir(gitpath)
|
|
|
1324
|
+
|
|
|
1325
|
+ print 'Collecting data...'
|
|
|
1326
|
+ data.collect(gitpath)
|
|
|
1327
|
+
|
|
1312
|
1328
|
print 'Refining data...'
|
|
1313
|
1329
|
data.saveCache(cachefile)
|
|
1314
|
1330
|
data.refine()
|