|
|
@@ -5,8 +5,9 @@ import os
|
|
5
|
5
|
|
|
6
|
6
|
from multiprocessing import Pool
|
|
7
|
7
|
|
|
8
|
|
-from .datacollector import DataCollector
|
|
9
|
|
-from .miscfuncs import getcommitrange, getkeyssortedbyvaluekey, getlogrange, getnumoffilesfromrev, getnumoflinesinblob, \
|
|
|
8
|
+from gitstats.data.author import Author
|
|
|
9
|
+from gitstats.datacollector import DataCollector
|
|
|
10
|
+from gitstats.miscfuncs import getcommitrange, getlogrange, getnumoffilesfromrev, getnumoflinesinblob, \
|
|
10
|
11
|
getpipeoutput, getstatsummarycounts
|
|
11
|
12
|
|
|
12
|
13
|
|
|
|
@@ -19,51 +20,179 @@ class GitDataCollector(DataCollector):
|
|
19
|
20
|
|
|
20
|
21
|
self.total_authors += int(getpipeoutput(['git shortlog -s %s' % getlogrange(self.conf), 'wc -l']))
|
|
21
|
22
|
# self.total_lines = int(getoutput('git-ls-files -z |xargs -0 cat |wc -l'))
|
|
|
23
|
+ self.get_tags()
|
|
|
24
|
+ self.get_revision_info()
|
|
|
25
|
+ self.get_file_info()
|
|
|
26
|
+ self.get_loc_info()
|
|
|
27
|
+ self.get_author_info()
|
|
22
|
28
|
|
|
23
|
|
- # tags
|
|
24
|
|
- lines = getpipeoutput(['git show-ref --tags']).split('\n')
|
|
|
29
|
+ def get_author_info(self):
|
|
|
30
|
+ # Per-author statistics
|
|
|
31
|
+ # defined for stamp, author only if author commited at this timestamp.
|
|
|
32
|
+ self.changes_by_date_by_author = {} # stamp -> author -> lines_added
|
|
|
33
|
+ # Similar to the above, but never use --first-parent
|
|
|
34
|
+ # (we need to walk through every commit to know who
|
|
|
35
|
+ # committed what, not just through mainline)
|
|
|
36
|
+ lines = getpipeoutput(
|
|
|
37
|
+ ['git log --shortstat --date-order --pretty=format:"%%at %%aN" %s' % (
|
|
|
38
|
+ getlogrange(self.conf, 'HEAD'))]).split('\n')
|
|
|
39
|
+ lines.reverse()
|
|
|
40
|
+ inserted = 0
|
|
|
41
|
+ deleted = 0
|
|
|
42
|
+ stamp = 0
|
|
25
|
43
|
for line in lines:
|
|
26
|
44
|
if len(line) == 0:
|
|
27
|
45
|
continue
|
|
28
|
|
- (line_hash, tag) = line.split(' ')
|
|
29
|
46
|
|
|
30
|
|
- tag = tag.replace('refs/tags/', '')
|
|
31
|
|
- output = getpipeoutput(['git log "%s" --pretty=format:"%%at %%aN" -n 1' % line_hash])
|
|
32
|
|
- if len(output) > 0:
|
|
33
|
|
- parts = output.split(' ')
|
|
34
|
|
- try:
|
|
35
|
|
- stamp = int(parts[0])
|
|
36
|
|
- except ValueError:
|
|
37
|
|
- stamp = 0
|
|
38
|
|
- self.tags[tag] = {'stamp': stamp, 'hash': line_hash,
|
|
39
|
|
- 'date': datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d'), 'commits': 0,
|
|
40
|
|
- 'authors': {}}
|
|
|
47
|
+ # <stamp> <author>
|
|
|
48
|
+ if re.search('files? changed', line) is None:
|
|
|
49
|
+ pos = line.find(' ')
|
|
|
50
|
+ if pos != -1:
|
|
|
51
|
+ try:
|
|
|
52
|
+ oldstamp = stamp
|
|
|
53
|
+ (stamp, author) = (int(line[:pos]), line[pos + 1:])
|
|
|
54
|
+ if oldstamp > stamp:
|
|
|
55
|
+ # clock skew, keep old timestamp to avoid having ugly graph
|
|
|
56
|
+ stamp = oldstamp
|
|
|
57
|
+ if author not in self.authors:
|
|
|
58
|
+ self.authors[author] = Author()
|
|
|
59
|
+ self.authors[author].commits += 1
|
|
|
60
|
+ self.authors[author].lines_added += inserted
|
|
|
61
|
+ self.authors[author].lines_removed += deleted
|
|
|
62
|
+ if stamp not in self.changes_by_date_by_author:
|
|
|
63
|
+ self.changes_by_date_by_author[stamp] = {}
|
|
|
64
|
+ if author not in self.changes_by_date_by_author[stamp]:
|
|
|
65
|
+ self.changes_by_date_by_author[stamp][author] = Author()
|
|
|
66
|
+ self.changes_by_date_by_author[stamp][author].lines_added = self.authors[author].lines_added
|
|
|
67
|
+ self.changes_by_date_by_author[stamp][author].commits = self.authors[author].commits
|
|
|
68
|
+ files, inserted, deleted = 0, 0, 0
|
|
|
69
|
+ except ValueError:
|
|
|
70
|
+ logging.warning(f'unexpected line "{line}')
|
|
|
71
|
+ else:
|
|
|
72
|
+ logging.warning(f'unexpected line "{line}')
|
|
|
73
|
+ else:
|
|
|
74
|
+ numbers = getstatsummarycounts(line)
|
|
41
|
75
|
|
|
42
|
|
- # collect info on tags, starting from latest
|
|
43
|
|
- tags_sorted_by_date_asc = [tup[1] for tup in sorted([(el[1]['date'], el[0]) for el in self.tags.items()])]
|
|
44
|
|
- # tags_sorted_by_date_desc = map(lambda el: el[1],
|
|
45
|
|
- # reversed(sorted(map(lambda el: (el[1]['date'], el[0]), self.tags.items()))))
|
|
46
|
|
- prev = None
|
|
47
|
|
-# for tag in reversed(tags_sorted_by_date_desc):
|
|
48
|
|
- for tag in tags_sorted_by_date_asc:
|
|
49
|
|
- cmd = 'git shortlog -s "%s"' % tag
|
|
50
|
|
- if prev is not None:
|
|
51
|
|
- cmd += ' "^%s"' % prev
|
|
52
|
|
- output = getpipeoutput([cmd])
|
|
53
|
|
- if len(output) == 0:
|
|
|
76
|
+ if len(numbers) == 3:
|
|
|
77
|
+ (files, inserted, deleted) = map(lambda el: int(el), numbers)
|
|
|
78
|
+ else:
|
|
|
79
|
+ logging.warning(f'Failed to handle line "{line}"')
|
|
|
80
|
+ (files, inserted, deleted) = (0, 0, 0)
|
|
|
81
|
+
|
|
|
82
|
+ def get_loc_info(self):
|
|
|
83
|
+ # line statistics
|
|
|
84
|
+ # outputs:
|
|
|
85
|
+ # N files changed, N insertions (+), N deletions(-)
|
|
|
86
|
+ # <stamp> <author>
|
|
|
87
|
+ self.changes_by_date = {} # stamp -> { files, ins, del }
|
|
|
88
|
+ # computation of lines of code by date is better done
|
|
|
89
|
+ # on a linear history.
|
|
|
90
|
+ extra = ''
|
|
|
91
|
+ if self.conf['linear_linestats']:
|
|
|
92
|
+ extra = '--first-parent -m'
|
|
|
93
|
+ lines = getpipeoutput(
|
|
|
94
|
+ ['git log --shortstat %s --pretty=format:"%%at %%aN" %s' % (extra, getlogrange(self.conf, 'HEAD'))]).split(
|
|
|
95
|
+ '\n')
|
|
|
96
|
+ lines.reverse()
|
|
|
97
|
+ files = 0
|
|
|
98
|
+ inserted = 0
|
|
|
99
|
+ deleted = 0
|
|
|
100
|
+ total_lines = 0
|
|
|
101
|
+ for line in lines:
|
|
|
102
|
+ if len(line) == 0:
|
|
54
|
103
|
continue
|
|
55
|
|
- prev = tag
|
|
56
|
|
- for line in output.split('\n'):
|
|
57
|
|
- parts = re.split('\s+', line, 2)
|
|
58
|
|
- commits = int(parts[1])
|
|
59
|
|
- author = parts[2]
|
|
60
|
|
- self.tags[tag]['commits'] += commits
|
|
61
|
|
- self.tags[tag]['authors'][author] = commits
|
|
62
|
104
|
|
|
|
105
|
+ # <stamp> <author>
|
|
|
106
|
+ if re.search('files? changed', line) is None:
|
|
|
107
|
+ pos = line.find(' ')
|
|
|
108
|
+ if pos != -1:
|
|
|
109
|
+ try:
|
|
|
110
|
+ (stamp, author) = (int(line[:pos]), line[pos + 1:])
|
|
|
111
|
+ self.changes_by_date[stamp] = {'files': files, 'ins': inserted, 'del': deleted,
|
|
|
112
|
+ 'lines': total_lines}
|
|
|
113
|
+
|
|
|
114
|
+ date = datetime.datetime.fromtimestamp(stamp)
|
|
|
115
|
+ yymm = date.strftime('%Y-%m')
|
|
|
116
|
+ self.lines_added_by_month[yymm] = self.lines_added_by_month.get(yymm, 0) + inserted
|
|
|
117
|
+ self.lines_removed_by_month[yymm] = self.lines_removed_by_month.get(yymm, 0) + deleted
|
|
|
118
|
+
|
|
|
119
|
+ yy = date.year
|
|
|
120
|
+ self.lines_added_by_year[yy] = self.lines_added_by_year.get(yy, 0) + inserted
|
|
|
121
|
+ self.lines_removed_by_year[yy] = self.lines_removed_by_year.get(yy, 0) + deleted
|
|
|
122
|
+
|
|
|
123
|
+ files, inserted, deleted = 0, 0, 0
|
|
|
124
|
+ except ValueError:
|
|
|
125
|
+ logging.warning(f'unexpected line "{line}')
|
|
|
126
|
+ else:
|
|
|
127
|
+ logging.warning(f'unexpected line "{line}')
|
|
|
128
|
+ else:
|
|
|
129
|
+ numbers = getstatsummarycounts(line)
|
|
|
130
|
+
|
|
|
131
|
+ if len(numbers) == 3:
|
|
|
132
|
+ (files, inserted, deleted) = map(lambda el: int(el), numbers)
|
|
|
133
|
+ total_lines += inserted
|
|
|
134
|
+ total_lines -= deleted
|
|
|
135
|
+ self.total_lines_added += inserted
|
|
|
136
|
+ self.total_lines_removed += deleted
|
|
|
137
|
+
|
|
|
138
|
+ else:
|
|
|
139
|
+ logging.warning(f'Failed to handle line "{line}"')
|
|
|
140
|
+ (files, inserted, deleted) = (0, 0, 0)
|
|
|
141
|
+ # self.changes_by_date[stamp] = { 'files': files, 'ins': inserted, 'del': deleted }
|
|
|
142
|
+ self.total_lines += total_lines
|
|
|
143
|
+
|
|
|
144
|
+ def get_file_info(self):
|
|
|
145
|
+ # extensions and size of files
|
|
|
146
|
+ lines = getpipeoutput(['git ls-tree -r -l -z %s' % getcommitrange(self.conf, 'HEAD', end_only=True)]).split(
|
|
|
147
|
+ '\000')
|
|
|
148
|
+ blobs_to_read = []
|
|
|
149
|
+ for line in lines:
|
|
|
150
|
+ if len(line) == 0:
|
|
|
151
|
+ continue
|
|
|
152
|
+ parts = re.split('\s+', line, 4)
|
|
|
153
|
+ if parts[0] == '160000' and parts[3] == '-':
|
|
|
154
|
+ # skip submodules
|
|
|
155
|
+ continue
|
|
|
156
|
+ blob_id = parts[2]
|
|
|
157
|
+ size = int(parts[3])
|
|
|
158
|
+ fullpath = parts[4]
|
|
|
159
|
+
|
|
|
160
|
+ self.total_size += size
|
|
|
161
|
+ self.total_files += 1
|
|
|
162
|
+
|
|
|
163
|
+ _, ext = os.path.splitext(fullpath)
|
|
|
164
|
+ if len(ext) > self.conf['max_ext_length']:
|
|
|
165
|
+ ext = ''
|
|
|
166
|
+ if ext not in self.extensions:
|
|
|
167
|
+ self.extensions[ext] = {'files': 0, 'lines': 0}
|
|
|
168
|
+ self.extensions[ext]['files'] += 1
|
|
|
169
|
+ # if cache empty then add ext and blob id to list of new blob's
|
|
|
170
|
+ # otherwise try to read needed info from cache
|
|
|
171
|
+ if 'lines_in_blob' not in self.cache.keys():
|
|
|
172
|
+ blobs_to_read.append((ext, blob_id))
|
|
|
173
|
+ continue
|
|
|
174
|
+ if blob_id in self.cache['lines_in_blob'].keys():
|
|
|
175
|
+ self.extensions[ext]['lines'] += self.cache['lines_in_blob'][blob_id]
|
|
|
176
|
+ else:
|
|
|
177
|
+ blobs_to_read.append((ext, blob_id))
|
|
|
178
|
+ # Get info abount line count for new blob's that wasn't found in cache
|
|
|
179
|
+ pool = Pool(processes=self.conf['processes'])
|
|
|
180
|
+ ext_blob_linecount = pool.map(getnumoflinesinblob, blobs_to_read)
|
|
|
181
|
+ pool.terminate()
|
|
|
182
|
+ pool.join()
|
|
|
183
|
+ # Update cache and write down info about number of number of lines
|
|
|
184
|
+ for (ext, blob_id, linecount) in ext_blob_linecount:
|
|
|
185
|
+ if 'lines_in_blob' not in self.cache:
|
|
|
186
|
+ self.cache['lines_in_blob'] = {}
|
|
|
187
|
+ self.cache['lines_in_blob'][blob_id] = linecount
|
|
|
188
|
+ self.extensions[ext]['lines'] += self.cache['lines_in_blob'][blob_id]
|
|
|
189
|
+
|
|
|
190
|
+ def get_revision_info(self):
|
|
63
|
191
|
# Collect revision statistics
|
|
64
|
192
|
# Outputs "<stamp> <date> <time> <timezone> <author> '<' <mail> '>'"
|
|
65
|
193
|
lines = getpipeoutput(
|
|
66
|
|
- ['git rev-list --pretty=format:"%%at %%ai %%aN <%%aE>" %s' % getlogrange(self.conf, 'HEAD'), 'grep -v ^commit']).split(
|
|
|
194
|
+ ['git rev-list --pretty=format:"%%at %%ai %%aN <%%aE>" %s' % getlogrange(self.conf, 'HEAD'),
|
|
|
195
|
+ 'grep -v ^commit']).split(
|
|
67
|
196
|
'\n')
|
|
68
|
197
|
for line in lines:
|
|
69
|
198
|
parts = line.split(' ', 4)
|
|
|
@@ -124,16 +253,16 @@ class GitDataCollector(DataCollector):
|
|
124
|
253
|
|
|
125
|
254
|
# author stats
|
|
126
|
255
|
if author not in self.authors:
|
|
127
|
|
- self.authors[author] = {}
|
|
|
256
|
+ self.authors[author] = Author()
|
|
128
|
257
|
# commits, note again that commits may be in any date order because of cherry-picking and patches
|
|
129
|
|
- if 'last_commit_stamp' not in self.authors[author]:
|
|
130
|
|
- self.authors[author]['last_commit_stamp'] = stamp
|
|
131
|
|
- if stamp > self.authors[author]['last_commit_stamp']:
|
|
132
|
|
- self.authors[author]['last_commit_stamp'] = stamp
|
|
133
|
|
- if 'first_commit_stamp' not in self.authors[author]:
|
|
134
|
|
- self.authors[author]['first_commit_stamp'] = stamp
|
|
135
|
|
- if stamp < self.authors[author]['first_commit_stamp']:
|
|
136
|
|
- self.authors[author]['first_commit_stamp'] = stamp
|
|
|
258
|
+ if not self.authors[author].last_commit_stamp:
|
|
|
259
|
+ self.authors[author].last_commit_stamp = stamp
|
|
|
260
|
+ if stamp > self.authors[author].last_commit_stamp:
|
|
|
261
|
+ self.authors[author].last_commit_stamp = stamp
|
|
|
262
|
+ if not self.authors[author].first_commit_stamp:
|
|
|
263
|
+ self.authors[author].first_commit_stamp = stamp
|
|
|
264
|
+ if stamp < self.authors[author].first_commit_stamp:
|
|
|
265
|
+ self.authors[author].first_commit_stamp = stamp
|
|
137
|
266
|
|
|
138
|
267
|
# author of the month/year
|
|
139
|
268
|
yymm = date.strftime('%Y-%m')
|
|
|
@@ -154,12 +283,11 @@ class GitDataCollector(DataCollector):
|
|
154
|
283
|
|
|
155
|
284
|
# authors: active days
|
|
156
|
285
|
yymmdd = date.strftime('%Y-%m-%d')
|
|
157
|
|
- if 'last_active_day' not in self.authors[author]:
|
|
158
|
|
- self.authors[author]['last_active_day'] = yymmdd
|
|
159
|
|
- self.authors[author]['active_days'] = {yymmdd}
|
|
160
|
|
- elif yymmdd != self.authors[author]['last_active_day']:
|
|
161
|
|
- self.authors[author]['last_active_day'] = yymmdd
|
|
162
|
|
- self.authors[author]['active_days'].add(yymmdd)
|
|
|
286
|
+ if not self.authors[author].last_active_day:
|
|
|
287
|
+ self.authors[author].last_active_day = yymmdd
|
|
|
288
|
+ elif yymmdd != self.authors[author].last_active_day:
|
|
|
289
|
+ self.authors[author].last_active_day = yymmdd
|
|
|
290
|
+ self.authors[author].active_days.add(yymmdd)
|
|
163
|
291
|
|
|
164
|
292
|
# project: active days
|
|
165
|
293
|
if yymmdd != self.last_active_day:
|
|
|
@@ -168,10 +296,10 @@ class GitDataCollector(DataCollector):
|
|
168
|
296
|
|
|
169
|
297
|
# timezone
|
|
170
|
298
|
self.commits_by_timezone[timezone] = self.commits_by_timezone.get(timezone, 0) + 1
|
|
171
|
|
-
|
|
172
|
299
|
# outputs "<stamp> <files>" for each revision
|
|
173
|
300
|
revlines = getpipeoutput(
|
|
174
|
|
- ['git rev-list --pretty=format:"%%at %%T" %s' % getlogrange(self.conf, 'HEAD'), 'grep -v ^commit']).strip().split('\n')
|
|
|
301
|
+ ['git rev-list --pretty=format:"%%at %%T" %s' % getlogrange(self.conf, 'HEAD'),
|
|
|
302
|
+ 'grep -v ^commit']).strip().split('\n')
|
|
175
|
303
|
lines = []
|
|
176
|
304
|
revs_to_read = []
|
|
177
|
305
|
# Look up rev in cache and take info from cache if found
|
|
|
@@ -187,20 +315,17 @@ class GitDataCollector(DataCollector):
|
|
187
|
315
|
lines.append('%d %d' % (int(time), self.cache['files_in_tree'][rev]))
|
|
188
|
316
|
else:
|
|
189
|
317
|
revs_to_read.append((time, rev))
|
|
190
|
|
-
|
|
191
|
318
|
# Read revisions from repo
|
|
192
|
319
|
pool = Pool(processes=self.conf['processes'])
|
|
193
|
320
|
time_rev_count = pool.map(getnumoffilesfromrev, revs_to_read)
|
|
194
|
321
|
pool.terminate()
|
|
195
|
322
|
pool.join()
|
|
196
|
|
-
|
|
197
|
323
|
# Update cache with new revisions and append then to general list
|
|
198
|
324
|
for (time, rev, count) in time_rev_count:
|
|
199
|
325
|
if 'files_in_tree' not in self.cache:
|
|
200
|
326
|
self.cache['files_in_tree'] = {}
|
|
201
|
327
|
self.cache['files_in_tree'][rev] = count
|
|
202
|
328
|
lines.append('%d %d' % (int(time), count))
|
|
203
|
|
-
|
|
204
|
329
|
self.total_commits += len(lines)
|
|
205
|
330
|
for line in lines:
|
|
206
|
331
|
parts = line.split(' ')
|
|
|
@@ -212,187 +337,64 @@ class GitDataCollector(DataCollector):
|
|
212
|
337
|
except ValueError:
|
|
213
|
338
|
logging.warning(f'Failed to parse line "{line}"')
|
|
214
|
339
|
|
|
215
|
|
- # extensions and size of files
|
|
216
|
|
- lines = getpipeoutput(['git ls-tree -r -l -z %s' % getcommitrange(self.conf, 'HEAD', end_only=True)]).split('\000')
|
|
217
|
|
- blobs_to_read = []
|
|
218
|
|
- for line in lines:
|
|
219
|
|
- if len(line) == 0:
|
|
220
|
|
- continue
|
|
221
|
|
- parts = re.split('\s+', line, 4)
|
|
222
|
|
- if parts[0] == '160000' and parts[3] == '-':
|
|
223
|
|
- # skip submodules
|
|
224
|
|
- continue
|
|
225
|
|
- blob_id = parts[2]
|
|
226
|
|
- size = int(parts[3])
|
|
227
|
|
- fullpath = parts[4]
|
|
228
|
|
-
|
|
229
|
|
- self.total_size += size
|
|
230
|
|
- self.total_files += 1
|
|
231
|
|
-
|
|
232
|
|
- _, ext = os.path.splitext(fullpath)
|
|
233
|
|
- if len(ext) > self.conf['max_ext_length']:
|
|
234
|
|
- ext = ''
|
|
235
|
|
- if ext not in self.extensions:
|
|
236
|
|
- self.extensions[ext] = {'files': 0, 'lines': 0}
|
|
237
|
|
- self.extensions[ext]['files'] += 1
|
|
238
|
|
- # if cache empty then add ext and blob id to list of new blob's
|
|
239
|
|
- # otherwise try to read needed info from cache
|
|
240
|
|
- if 'lines_in_blob' not in self.cache.keys():
|
|
241
|
|
- blobs_to_read.append((ext, blob_id))
|
|
242
|
|
- continue
|
|
243
|
|
- if blob_id in self.cache['lines_in_blob'].keys():
|
|
244
|
|
- self.extensions[ext]['lines'] += self.cache['lines_in_blob'][blob_id]
|
|
245
|
|
- else:
|
|
246
|
|
- blobs_to_read.append((ext, blob_id))
|
|
247
|
|
-
|
|
248
|
|
- # Get info abount line count for new blob's that wasn't found in cache
|
|
249
|
|
- pool = Pool(processes=self.conf['processes'])
|
|
250
|
|
- ext_blob_linecount = pool.map(getnumoflinesinblob, blobs_to_read)
|
|
251
|
|
- pool.terminate()
|
|
252
|
|
- pool.join()
|
|
253
|
|
-
|
|
254
|
|
- # Update cache and write down info about number of number of lines
|
|
255
|
|
- for (ext, blob_id, linecount) in ext_blob_linecount:
|
|
256
|
|
- if 'lines_in_blob' not in self.cache:
|
|
257
|
|
- self.cache['lines_in_blob'] = {}
|
|
258
|
|
- self.cache['lines_in_blob'][blob_id] = linecount
|
|
259
|
|
- self.extensions[ext]['lines'] += self.cache['lines_in_blob'][blob_id]
|
|
260
|
|
-
|
|
261
|
|
- # line statistics
|
|
262
|
|
- # outputs:
|
|
263
|
|
- # N files changed, N insertions (+), N deletions(-)
|
|
264
|
|
- # <stamp> <author>
|
|
265
|
|
- self.changes_by_date = {} # stamp -> { files, ins, del }
|
|
266
|
|
- # computation of lines of code by date is better done
|
|
267
|
|
- # on a linear history.
|
|
268
|
|
- extra = ''
|
|
269
|
|
- if self.conf['linear_linestats']:
|
|
270
|
|
- extra = '--first-parent -m'
|
|
271
|
|
- lines = getpipeoutput(
|
|
272
|
|
- ['git log --shortstat %s --pretty=format:"%%at %%aN" %s' % (extra, getlogrange(self.conf, 'HEAD'))]).split('\n')
|
|
273
|
|
- lines.reverse()
|
|
274
|
|
- files = 0
|
|
275
|
|
- inserted = 0
|
|
276
|
|
- deleted = 0
|
|
277
|
|
- total_lines = 0
|
|
|
340
|
+ def get_tags(self):
|
|
|
341
|
+ # tags
|
|
|
342
|
+ lines = getpipeoutput(['git show-ref --tags']).split('\n')
|
|
278
|
343
|
for line in lines:
|
|
279
|
344
|
if len(line) == 0:
|
|
280
|
345
|
continue
|
|
|
346
|
+ (line_hash, tag) = line.split(' ')
|
|
281
|
347
|
|
|
282
|
|
- # <stamp> <author>
|
|
283
|
|
- if re.search('files? changed', line) is None:
|
|
284
|
|
- pos = line.find(' ')
|
|
285
|
|
- if pos != -1:
|
|
286
|
|
- try:
|
|
287
|
|
- (stamp, author) = (int(line[:pos]), line[pos + 1:])
|
|
288
|
|
- self.changes_by_date[stamp] = {'files': files, 'ins': inserted, 'del': deleted,
|
|
289
|
|
- 'lines': total_lines}
|
|
290
|
|
-
|
|
291
|
|
- date = datetime.datetime.fromtimestamp(stamp)
|
|
292
|
|
- yymm = date.strftime('%Y-%m')
|
|
293
|
|
- self.lines_added_by_month[yymm] = self.lines_added_by_month.get(yymm, 0) + inserted
|
|
294
|
|
- self.lines_removed_by_month[yymm] = self.lines_removed_by_month.get(yymm, 0) + deleted
|
|
295
|
|
-
|
|
296
|
|
- yy = date.year
|
|
297
|
|
- self.lines_added_by_year[yy] = self.lines_added_by_year.get(yy, 0) + inserted
|
|
298
|
|
- self.lines_removed_by_year[yy] = self.lines_removed_by_year.get(yy, 0) + deleted
|
|
299
|
|
-
|
|
300
|
|
- files, inserted, deleted = 0, 0, 0
|
|
301
|
|
- except ValueError:
|
|
302
|
|
- logging.warning(f'unexpected line "{line}')
|
|
303
|
|
- else:
|
|
304
|
|
- logging.warning(f'unexpected line "{line}')
|
|
305
|
|
- else:
|
|
306
|
|
- numbers = getstatsummarycounts(line)
|
|
307
|
|
-
|
|
308
|
|
- if len(numbers) == 3:
|
|
309
|
|
- (files, inserted, deleted) = map(lambda el: int(el), numbers)
|
|
310
|
|
- total_lines += inserted
|
|
311
|
|
- total_lines -= deleted
|
|
312
|
|
- self.total_lines_added += inserted
|
|
313
|
|
- self.total_lines_removed += deleted
|
|
314
|
|
-
|
|
315
|
|
- else:
|
|
316
|
|
- logging.warning(f'Failed to handle line "{line}"')
|
|
317
|
|
- (files, inserted, deleted) = (0, 0, 0)
|
|
318
|
|
- # self.changes_by_date[stamp] = { 'files': files, 'ins': inserted, 'del': deleted }
|
|
319
|
|
- self.total_lines += total_lines
|
|
320
|
|
-
|
|
321
|
|
- # Per-author statistics
|
|
322
|
|
-
|
|
323
|
|
- # defined for stamp, author only if author commited at this timestamp.
|
|
324
|
|
- self.changes_by_date_by_author = {} # stamp -> author -> lines_added
|
|
325
|
|
-
|
|
326
|
|
- # Similar to the above, but never use --first-parent
|
|
327
|
|
- # (we need to walk through every commit to know who
|
|
328
|
|
- # committed what, not just through mainline)
|
|
329
|
|
- lines = getpipeoutput(
|
|
330
|
|
- ['git log --shortstat --date-order --pretty=format:"%%at %%aN" %s' % (getlogrange(self.conf, 'HEAD'))]).split('\n')
|
|
331
|
|
- lines.reverse()
|
|
332
|
|
- inserted = 0
|
|
333
|
|
- deleted = 0
|
|
334
|
|
- stamp = 0
|
|
335
|
|
- for line in lines:
|
|
336
|
|
- if len(line) == 0:
|
|
|
348
|
+ tag = tag.replace('refs/tags/', '')
|
|
|
349
|
+ output = getpipeoutput(['git log "%s" --pretty=format:"%%at %%aN" -n 1' % line_hash])
|
|
|
350
|
+ if len(output) > 0:
|
|
|
351
|
+ parts = output.split(' ')
|
|
|
352
|
+ try:
|
|
|
353
|
+ stamp = int(parts[0])
|
|
|
354
|
+ except ValueError:
|
|
|
355
|
+ stamp = 0
|
|
|
356
|
+ self.tags[tag] = {'stamp': stamp,
|
|
|
357
|
+ 'hash': line_hash,
|
|
|
358
|
+ 'date': datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d'),
|
|
|
359
|
+ 'commits': 0,
|
|
|
360
|
+ 'authors': {}}
|
|
|
361
|
+ # collect info on tags, starting from latest
|
|
|
362
|
+ tags_sorted_by_date_asc = [tup[1] for tup in sorted([(el[1]['date'], el[0]) for el in self.tags.items()])]
|
|
|
363
|
+ # tags_sorted_by_date_desc = map(lambda el: el[1],
|
|
|
364
|
+ # reversed(sorted(map(lambda el: (el[1]['date'], el[0]), self.tags.items()))))
|
|
|
365
|
+ prev = None
|
|
|
366
|
+ # for tag in reversed(tags_sorted_by_date_desc):
|
|
|
367
|
+ for tag in tags_sorted_by_date_asc:
|
|
|
368
|
+ cmd = 'git shortlog -s "%s"' % tag
|
|
|
369
|
+ if prev is not None:
|
|
|
370
|
+ cmd += ' "^%s"' % prev
|
|
|
371
|
+ output = getpipeoutput([cmd])
|
|
|
372
|
+ if len(output) == 0:
|
|
337
|
373
|
continue
|
|
338
|
|
-
|
|
339
|
|
- # <stamp> <author>
|
|
340
|
|
- if re.search('files? changed', line) is None:
|
|
341
|
|
- pos = line.find(' ')
|
|
342
|
|
- if pos != -1:
|
|
343
|
|
- try:
|
|
344
|
|
- oldstamp = stamp
|
|
345
|
|
- (stamp, author) = (int(line[:pos]), line[pos + 1:])
|
|
346
|
|
- if oldstamp > stamp:
|
|
347
|
|
- # clock skew, keep old timestamp to avoid having ugly graph
|
|
348
|
|
- stamp = oldstamp
|
|
349
|
|
- if author not in self.authors:
|
|
350
|
|
- self.authors[author] = {'lines_added': 0, 'lines_removed': 0, 'commits': 0}
|
|
351
|
|
- self.authors[author]['commits'] = self.authors[author].get('commits', 0) + 1
|
|
352
|
|
- self.authors[author]['lines_added'] = self.authors[author].get('lines_added', 0) + inserted
|
|
353
|
|
- self.authors[author]['lines_removed'] = self.authors[author].get('lines_removed', 0) + deleted
|
|
354
|
|
- if stamp not in self.changes_by_date_by_author:
|
|
355
|
|
- self.changes_by_date_by_author[stamp] = {}
|
|
356
|
|
- if author not in self.changes_by_date_by_author[stamp]:
|
|
357
|
|
- self.changes_by_date_by_author[stamp][author] = {}
|
|
358
|
|
- self.changes_by_date_by_author[stamp][author]['lines_added'] = self.authors[author][
|
|
359
|
|
- 'lines_added']
|
|
360
|
|
- self.changes_by_date_by_author[stamp][author]['commits'] = self.authors[author]['commits']
|
|
361
|
|
- files, inserted, deleted = 0, 0, 0
|
|
362
|
|
- except ValueError:
|
|
363
|
|
- logging.warning(f'unexpected line "{line}')
|
|
364
|
|
- else:
|
|
365
|
|
- logging.warning(f'unexpected line "{line}')
|
|
366
|
|
- else:
|
|
367
|
|
- numbers = getstatsummarycounts(line)
|
|
368
|
|
-
|
|
369
|
|
- if len(numbers) == 3:
|
|
370
|
|
- (files, inserted, deleted) = map(lambda el: int(el), numbers)
|
|
371
|
|
- else:
|
|
372
|
|
- logging.warning(f'Failed to handle line "{line}"')
|
|
373
|
|
- (files, inserted, deleted) = (0, 0, 0)
|
|
|
374
|
+ prev = tag
|
|
|
375
|
+ for line in output.split('\n'):
|
|
|
376
|
+ parts = re.split('\s+', line, 2)
|
|
|
377
|
+ commits = int(parts[1])
|
|
|
378
|
+ author = parts[2]
|
|
|
379
|
+ self.tags[tag]['commits'] += commits
|
|
|
380
|
+ self.tags[tag]['authors'][author] = commits
|
|
374
|
381
|
|
|
375
|
382
|
def refine(self):
|
|
376
|
383
|
# authors
|
|
377
|
384
|
# name -> {place_by_commits, commits_frac, date_first, date_last, timedelta}
|
|
378
|
|
- self.authors_by_commits = getkeyssortedbyvaluekey(self.authors, 'commits')
|
|
379
|
|
- self.authors_by_commits.reverse() # most first
|
|
|
385
|
+ self.authors_by_commits = self.getAuthors()
|
|
380
|
386
|
for i, name in enumerate(self.authors_by_commits):
|
|
381
|
|
- self.authors[name]['place_by_commits'] = i + 1
|
|
|
387
|
+ self.authors[name].place_by_commits = i + 1
|
|
382
|
388
|
|
|
383
|
389
|
for name in self.authors.keys():
|
|
384
|
390
|
a = self.authors[name]
|
|
385
|
|
- a['commits_frac'] = (100 * float(a['commits'])) / self.getTotalCommits()
|
|
386
|
|
- date_first = datetime.datetime.fromtimestamp(a['first_commit_stamp'])
|
|
387
|
|
- date_last = datetime.datetime.fromtimestamp(a['last_commit_stamp'])
|
|
|
391
|
+ a.commits_frac = (100 * float(a.commits)) / self.getTotalCommits()
|
|
|
392
|
+ date_first = datetime.datetime.fromtimestamp(a.first_commit_stamp)
|
|
|
393
|
+ date_last = datetime.datetime.fromtimestamp(a.last_commit_stamp)
|
|
388
|
394
|
delta = date_last - date_first
|
|
389
|
|
- a['date_first'] = date_first.strftime('%Y-%m-%d')
|
|
390
|
|
- a['date_last'] = date_last.strftime('%Y-%m-%d')
|
|
391
|
|
- a['timedelta'] = delta
|
|
392
|
|
- if 'lines_added' not in a:
|
|
393
|
|
- a['lines_added'] = 0
|
|
394
|
|
- if 'lines_removed' not in a:
|
|
395
|
|
- a['lines_removed'] = 0
|
|
|
395
|
+ a.date_first = date_first.strftime('%Y-%m-%d')
|
|
|
396
|
+ a.date_last = date_last.strftime('%Y-%m-%d')
|
|
|
397
|
+ a.timedelta = delta
|
|
396
|
398
|
|
|
397
|
399
|
def getActiveDays(self):
|
|
398
|
400
|
return self.active_days
|
|
|
@@ -407,8 +409,7 @@ class GitDataCollector(DataCollector):
|
|
407
|
409
|
return self.authors[author]
|
|
408
|
410
|
|
|
409
|
411
|
def getAuthors(self, limit=None):
|
|
410
|
|
- res = getkeyssortedbyvaluekey(self.authors, 'commits')
|
|
411
|
|
- res.reverse()
|
|
|
412
|
+ res = [el[0] for el in sorted(self.authors.items(), key=lambda x: x[1].commits, reverse=True)]
|
|
412
|
413
|
return res[:limit]
|
|
413
|
414
|
|
|
414
|
415
|
def getCommitDeltaDays(self):
|