Selaa lähdekoodia

Convert Author into a dataclass

Dan Rapp 7 vuotta sitten
vanhempi
commit
cfe1eb2b33

+ 0
- 0
gitstats/data/__init__.py Näytä tiedosto


+ 19
- 0
gitstats/data/author.py Näytä tiedosto

@@ -0,0 +1,19 @@
1
+from dataclasses import dataclass, field
2
+from datetime import timedelta
3
+from typing import Set
4
+
5
+@dataclass
6
+class Author:
7
+    lines_added: int = 0
8
+    lines_removed: int = 0
9
+    commits: int = 0
10
+    first_commit_stamp: int = 0
11
+    last_commit_stamp: int = 0
12
+    last_active_day: str = ''
13
+    active_days: Set[str] = field(default_factory=set)
14
+    place_by_commits: int = 0
15
+    commits_frac: float = 0.0
16
+    date_first: str = ''
17
+    date_last: str = ''
18
+    timedelta: timedelta = None
19
+

+ 4
- 1
gitstats/datacollector.py Näytä tiedosto

@@ -5,6 +5,9 @@ import pickle
5 5
 import time
6 6
 import zlib
7 7
 
8
+from typing import Dict
9
+from gitstats.data.author import Author
10
+
8 11
 
9 12
 class DataCollector:
10 13
     """Manages data collection from a revision control repository."""
@@ -23,7 +26,7 @@ class DataCollector:
23 26
         self.activity_by_year_week = {}  # yy_wNN -> commits
24 27
         self.activity_by_year_week_peak = 0
25 28
 
26
-        self.authors = {}  # name -> {commits, first_commit_stamp, last_commit_stamp, last_active_day, active_days, lines_added, lines_removed}
29
+        self.authors: Dict[Author] = {}  # name -> Author
27 30
 
28 31
         self.total_commits = 0
29 32
         self.total_files = 0

+ 226
- 225
gitstats/gitdatacollector.py Näytä tiedosto

@@ -5,8 +5,9 @@ import os
5 5
 
6 6
 from multiprocessing import Pool
7 7
 
8
-from .datacollector import DataCollector
9
-from .miscfuncs import getcommitrange, getkeyssortedbyvaluekey, getlogrange, getnumoffilesfromrev, getnumoflinesinblob, \
8
+from gitstats.data.author import Author
9
+from gitstats.datacollector import DataCollector
10
+from gitstats.miscfuncs import getcommitrange, getlogrange, getnumoffilesfromrev, getnumoflinesinblob, \
10 11
     getpipeoutput, getstatsummarycounts
11 12
 
12 13
 
@@ -19,51 +20,179 @@ class GitDataCollector(DataCollector):
19 20
 
20 21
         self.total_authors += int(getpipeoutput(['git shortlog -s %s' % getlogrange(self.conf), 'wc -l']))
21 22
         # self.total_lines = int(getoutput('git-ls-files -z |xargs -0 cat |wc -l'))
23
+        self.get_tags()
24
+        self.get_revision_info()
25
+        self.get_file_info()
26
+        self.get_loc_info()
27
+        self.get_author_info()
22 28
 
23
-        # tags
24
-        lines = getpipeoutput(['git show-ref --tags']).split('\n')
29
+    def get_author_info(self):
30
+        # Per-author statistics
31
+        # defined for stamp, author only if author commited at this timestamp.
32
+        self.changes_by_date_by_author = {}  # stamp -> author -> lines_added
33
+        # Similar to the above, but never use --first-parent
34
+        # (we need to walk through every commit to know who
35
+        # committed what, not just through mainline)
36
+        lines = getpipeoutput(
37
+            ['git log --shortstat --date-order --pretty=format:"%%at %%aN" %s' % (
38
+                getlogrange(self.conf, 'HEAD'))]).split('\n')
39
+        lines.reverse()
40
+        inserted = 0
41
+        deleted = 0
42
+        stamp = 0
25 43
         for line in lines:
26 44
             if len(line) == 0:
27 45
                 continue
28
-            (line_hash, tag) = line.split(' ')
29 46
 
30
-            tag = tag.replace('refs/tags/', '')
31
-            output = getpipeoutput(['git log "%s" --pretty=format:"%%at %%aN" -n 1' % line_hash])
32
-            if len(output) > 0:
33
-                parts = output.split(' ')
34
-                try:
35
-                    stamp = int(parts[0])
36
-                except ValueError:
37
-                    stamp = 0
38
-                self.tags[tag] = {'stamp': stamp, 'hash': line_hash,
39
-                                  'date': datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d'), 'commits': 0,
40
-                                  'authors': {}}
47
+            # <stamp> <author>
48
+            if re.search('files? changed', line) is None:
49
+                pos = line.find(' ')
50
+                if pos != -1:
51
+                    try:
52
+                        oldstamp = stamp
53
+                        (stamp, author) = (int(line[:pos]), line[pos + 1:])
54
+                        if oldstamp > stamp:
55
+                            # clock skew, keep old timestamp to avoid having ugly graph
56
+                            stamp = oldstamp
57
+                        if author not in self.authors:
58
+                            self.authors[author] = Author()
59
+                        self.authors[author].commits += 1
60
+                        self.authors[author].lines_added += inserted
61
+                        self.authors[author].lines_removed += deleted
62
+                        if stamp not in self.changes_by_date_by_author:
63
+                            self.changes_by_date_by_author[stamp] = {}
64
+                        if author not in self.changes_by_date_by_author[stamp]:
65
+                            self.changes_by_date_by_author[stamp][author] = Author()
66
+                        self.changes_by_date_by_author[stamp][author].lines_added = self.authors[author].lines_added
67
+                        self.changes_by_date_by_author[stamp][author].commits = self.authors[author].commits
68
+                        files, inserted, deleted = 0, 0, 0
69
+                    except ValueError:
70
+                        logging.warning(f'unexpected line "{line}')
71
+                else:
72
+                    logging.warning(f'unexpected line "{line}')
73
+            else:
74
+                numbers = getstatsummarycounts(line)
41 75
 
42
-        # collect info on tags, starting from latest
43
-        tags_sorted_by_date_asc = [tup[1] for tup in sorted([(el[1]['date'], el[0]) for el in self.tags.items()])]
44
-        # tags_sorted_by_date_desc = map(lambda el: el[1],
45
-        #                                reversed(sorted(map(lambda el: (el[1]['date'], el[0]), self.tags.items()))))
46
-        prev = None
47
-#        for tag in reversed(tags_sorted_by_date_desc):
48
-        for tag in tags_sorted_by_date_asc:
49
-            cmd = 'git shortlog -s "%s"' % tag
50
-            if prev is not None:
51
-                cmd += ' "^%s"' % prev
52
-            output = getpipeoutput([cmd])
53
-            if len(output) == 0:
76
+                if len(numbers) == 3:
77
+                    (files, inserted, deleted) = map(lambda el: int(el), numbers)
78
+                else:
79
+                    logging.warning(f'Failed to handle line "{line}"')
80
+                    (files, inserted, deleted) = (0, 0, 0)
81
+
82
+    def get_loc_info(self):
83
+        # line statistics
84
+        # outputs:
85
+        #  N files changed, N insertions (+), N deletions(-)
86
+        # <stamp> <author>
87
+        self.changes_by_date = {}  # stamp -> { files, ins, del }
88
+        # computation of lines of code by date is better done
89
+        # on a linear history.
90
+        extra = ''
91
+        if self.conf['linear_linestats']:
92
+            extra = '--first-parent -m'
93
+        lines = getpipeoutput(
94
+            ['git log --shortstat %s --pretty=format:"%%at %%aN" %s' % (extra, getlogrange(self.conf, 'HEAD'))]).split(
95
+            '\n')
96
+        lines.reverse()
97
+        files = 0
98
+        inserted = 0
99
+        deleted = 0
100
+        total_lines = 0
101
+        for line in lines:
102
+            if len(line) == 0:
54 103
                 continue
55
-            prev = tag
56
-            for line in output.split('\n'):
57
-                parts = re.split('\s+', line, 2)
58
-                commits = int(parts[1])
59
-                author = parts[2]
60
-                self.tags[tag]['commits'] += commits
61
-                self.tags[tag]['authors'][author] = commits
62 104
 
105
+            # <stamp> <author>
106
+            if re.search('files? changed', line) is None:
107
+                pos = line.find(' ')
108
+                if pos != -1:
109
+                    try:
110
+                        (stamp, author) = (int(line[:pos]), line[pos + 1:])
111
+                        self.changes_by_date[stamp] = {'files': files, 'ins': inserted, 'del': deleted,
112
+                                                       'lines': total_lines}
113
+
114
+                        date = datetime.datetime.fromtimestamp(stamp)
115
+                        yymm = date.strftime('%Y-%m')
116
+                        self.lines_added_by_month[yymm] = self.lines_added_by_month.get(yymm, 0) + inserted
117
+                        self.lines_removed_by_month[yymm] = self.lines_removed_by_month.get(yymm, 0) + deleted
118
+
119
+                        yy = date.year
120
+                        self.lines_added_by_year[yy] = self.lines_added_by_year.get(yy, 0) + inserted
121
+                        self.lines_removed_by_year[yy] = self.lines_removed_by_year.get(yy, 0) + deleted
122
+
123
+                        files, inserted, deleted = 0, 0, 0
124
+                    except ValueError:
125
+                        logging.warning(f'unexpected line "{line}')
126
+                else:
127
+                    logging.warning(f'unexpected line "{line}')
128
+            else:
129
+                numbers = getstatsummarycounts(line)
130
+
131
+                if len(numbers) == 3:
132
+                    (files, inserted, deleted) = map(lambda el: int(el), numbers)
133
+                    total_lines += inserted
134
+                    total_lines -= deleted
135
+                    self.total_lines_added += inserted
136
+                    self.total_lines_removed += deleted
137
+
138
+                else:
139
+                    logging.warning(f'Failed to handle line "{line}"')
140
+                    (files, inserted, deleted) = (0, 0, 0)
141
+            # self.changes_by_date[stamp] = { 'files': files, 'ins': inserted, 'del': deleted }
142
+        self.total_lines += total_lines
143
+
144
+    def get_file_info(self):
145
+        # extensions and size of files
146
+        lines = getpipeoutput(['git ls-tree -r -l -z %s' % getcommitrange(self.conf, 'HEAD', end_only=True)]).split(
147
+            '\000')
148
+        blobs_to_read = []
149
+        for line in lines:
150
+            if len(line) == 0:
151
+                continue
152
+            parts = re.split('\s+', line, 4)
153
+            if parts[0] == '160000' and parts[3] == '-':
154
+                # skip submodules
155
+                continue
156
+            blob_id = parts[2]
157
+            size = int(parts[3])
158
+            fullpath = parts[4]
159
+
160
+            self.total_size += size
161
+            self.total_files += 1
162
+
163
+            _, ext = os.path.splitext(fullpath)
164
+            if len(ext) > self.conf['max_ext_length']:
165
+                ext = ''
166
+            if ext not in self.extensions:
167
+                self.extensions[ext] = {'files': 0, 'lines': 0}
168
+            self.extensions[ext]['files'] += 1
169
+            # if cache empty then add ext and blob id to list of new blob's
170
+            # otherwise try to read needed info from cache
171
+            if 'lines_in_blob' not in self.cache.keys():
172
+                blobs_to_read.append((ext, blob_id))
173
+                continue
174
+            if blob_id in self.cache['lines_in_blob'].keys():
175
+                self.extensions[ext]['lines'] += self.cache['lines_in_blob'][blob_id]
176
+            else:
177
+                blobs_to_read.append((ext, blob_id))
178
+        # Get info abount line count for new blob's that wasn't found in cache
179
+        pool = Pool(processes=self.conf['processes'])
180
+        ext_blob_linecount = pool.map(getnumoflinesinblob, blobs_to_read)
181
+        pool.terminate()
182
+        pool.join()
183
+        # Update cache and write down info about number of number of lines
184
+        for (ext, blob_id, linecount) in ext_blob_linecount:
185
+            if 'lines_in_blob' not in self.cache:
186
+                self.cache['lines_in_blob'] = {}
187
+            self.cache['lines_in_blob'][blob_id] = linecount
188
+            self.extensions[ext]['lines'] += self.cache['lines_in_blob'][blob_id]
189
+
190
+    def get_revision_info(self):
63 191
         # Collect revision statistics
64 192
         # Outputs "<stamp> <date> <time> <timezone> <author> '<' <mail> '>'"
65 193
         lines = getpipeoutput(
66
-            ['git rev-list --pretty=format:"%%at %%ai %%aN <%%aE>" %s' % getlogrange(self.conf, 'HEAD'), 'grep -v ^commit']).split(
194
+            ['git rev-list --pretty=format:"%%at %%ai %%aN <%%aE>" %s' % getlogrange(self.conf, 'HEAD'),
195
+             'grep -v ^commit']).split(
67 196
             '\n')
68 197
         for line in lines:
69 198
             parts = line.split(' ', 4)
@@ -124,16 +253,16 @@ class GitDataCollector(DataCollector):
124 253
 
125 254
             # author stats
126 255
             if author not in self.authors:
127
-                self.authors[author] = {}
256
+                self.authors[author] = Author()
128 257
             # commits, note again that commits may be in any date order because of cherry-picking and patches
129
-            if 'last_commit_stamp' not in self.authors[author]:
130
-                self.authors[author]['last_commit_stamp'] = stamp
131
-            if stamp > self.authors[author]['last_commit_stamp']:
132
-                self.authors[author]['last_commit_stamp'] = stamp
133
-            if 'first_commit_stamp' not in self.authors[author]:
134
-                self.authors[author]['first_commit_stamp'] = stamp
135
-            if stamp < self.authors[author]['first_commit_stamp']:
136
-                self.authors[author]['first_commit_stamp'] = stamp
258
+            if not self.authors[author].last_commit_stamp:
259
+                self.authors[author].last_commit_stamp = stamp
260
+            if stamp > self.authors[author].last_commit_stamp:
261
+                self.authors[author].last_commit_stamp = stamp
262
+            if not self.authors[author].first_commit_stamp:
263
+                self.authors[author].first_commit_stamp = stamp
264
+            if stamp < self.authors[author].first_commit_stamp:
265
+                self.authors[author].first_commit_stamp = stamp
137 266
 
138 267
             # author of the month/year
139 268
             yymm = date.strftime('%Y-%m')
@@ -154,12 +283,11 @@ class GitDataCollector(DataCollector):
154 283
 
155 284
             # authors: active days
156 285
             yymmdd = date.strftime('%Y-%m-%d')
157
-            if 'last_active_day' not in self.authors[author]:
158
-                self.authors[author]['last_active_day'] = yymmdd
159
-                self.authors[author]['active_days'] = {yymmdd}
160
-            elif yymmdd != self.authors[author]['last_active_day']:
161
-                self.authors[author]['last_active_day'] = yymmdd
162
-                self.authors[author]['active_days'].add(yymmdd)
286
+            if not self.authors[author].last_active_day:
287
+                self.authors[author].last_active_day = yymmdd
288
+            elif yymmdd != self.authors[author].last_active_day:
289
+                self.authors[author].last_active_day = yymmdd
290
+            self.authors[author].active_days.add(yymmdd)
163 291
 
164 292
             # project: active days
165 293
             if yymmdd != self.last_active_day:
@@ -168,10 +296,10 @@ class GitDataCollector(DataCollector):
168 296
 
169 297
             # timezone
170 298
             self.commits_by_timezone[timezone] = self.commits_by_timezone.get(timezone, 0) + 1
171
-
172 299
         # outputs "<stamp> <files>" for each revision
173 300
         revlines = getpipeoutput(
174
-            ['git rev-list --pretty=format:"%%at %%T" %s' % getlogrange(self.conf, 'HEAD'), 'grep -v ^commit']).strip().split('\n')
301
+            ['git rev-list --pretty=format:"%%at %%T" %s' % getlogrange(self.conf, 'HEAD'),
302
+             'grep -v ^commit']).strip().split('\n')
175 303
         lines = []
176 304
         revs_to_read = []
177 305
         # Look up rev in cache and take info from cache if found
@@ -187,20 +315,17 @@ class GitDataCollector(DataCollector):
187 315
                 lines.append('%d %d' % (int(time), self.cache['files_in_tree'][rev]))
188 316
             else:
189 317
                 revs_to_read.append((time, rev))
190
-
191 318
         # Read revisions from repo
192 319
         pool = Pool(processes=self.conf['processes'])
193 320
         time_rev_count = pool.map(getnumoffilesfromrev, revs_to_read)
194 321
         pool.terminate()
195 322
         pool.join()
196
-
197 323
         # Update cache with new revisions and append then to general list
198 324
         for (time, rev, count) in time_rev_count:
199 325
             if 'files_in_tree' not in self.cache:
200 326
                 self.cache['files_in_tree'] = {}
201 327
             self.cache['files_in_tree'][rev] = count
202 328
             lines.append('%d %d' % (int(time), count))
203
-
204 329
         self.total_commits += len(lines)
205 330
         for line in lines:
206 331
             parts = line.split(' ')
@@ -212,187 +337,64 @@ class GitDataCollector(DataCollector):
212 337
             except ValueError:
213 338
                 logging.warning(f'Failed to parse line "{line}"')
214 339
 
215
-        # extensions and size of files
216
-        lines = getpipeoutput(['git ls-tree -r -l -z %s' % getcommitrange(self.conf, 'HEAD', end_only=True)]).split('\000')
217
-        blobs_to_read = []
218
-        for line in lines:
219
-            if len(line) == 0:
220
-                continue
221
-            parts = re.split('\s+', line, 4)
222
-            if parts[0] == '160000' and parts[3] == '-':
223
-                # skip submodules
224
-                continue
225
-            blob_id = parts[2]
226
-            size = int(parts[3])
227
-            fullpath = parts[4]
228
-
229
-            self.total_size += size
230
-            self.total_files += 1
231
-
232
-            _, ext = os.path.splitext(fullpath)
233
-            if len(ext) > self.conf['max_ext_length']:
234
-                ext = ''
235
-            if ext not in self.extensions:
236
-                self.extensions[ext] = {'files': 0, 'lines': 0}
237
-            self.extensions[ext]['files'] += 1
238
-            # if cache empty then add ext and blob id to list of new blob's
239
-            # otherwise try to read needed info from cache
240
-            if 'lines_in_blob' not in self.cache.keys():
241
-                blobs_to_read.append((ext, blob_id))
242
-                continue
243
-            if blob_id in self.cache['lines_in_blob'].keys():
244
-                self.extensions[ext]['lines'] += self.cache['lines_in_blob'][blob_id]
245
-            else:
246
-                blobs_to_read.append((ext, blob_id))
247
-
248
-        # Get info abount line count for new blob's that wasn't found in cache
249
-        pool = Pool(processes=self.conf['processes'])
250
-        ext_blob_linecount = pool.map(getnumoflinesinblob, blobs_to_read)
251
-        pool.terminate()
252
-        pool.join()
253
-
254
-        # Update cache and write down info about number of number of lines
255
-        for (ext, blob_id, linecount) in ext_blob_linecount:
256
-            if 'lines_in_blob' not in self.cache:
257
-                self.cache['lines_in_blob'] = {}
258
-            self.cache['lines_in_blob'][blob_id] = linecount
259
-            self.extensions[ext]['lines'] += self.cache['lines_in_blob'][blob_id]
260
-
261
-        # line statistics
262
-        # outputs:
263
-        #  N files changed, N insertions (+), N deletions(-)
264
-        # <stamp> <author>
265
-        self.changes_by_date = {}  # stamp -> { files, ins, del }
266
-        # computation of lines of code by date is better done
267
-        # on a linear history.
268
-        extra = ''
269
-        if self.conf['linear_linestats']:
270
-            extra = '--first-parent -m'
271
-        lines = getpipeoutput(
272
-            ['git log --shortstat %s --pretty=format:"%%at %%aN" %s' % (extra, getlogrange(self.conf, 'HEAD'))]).split('\n')
273
-        lines.reverse()
274
-        files = 0
275
-        inserted = 0
276
-        deleted = 0
277
-        total_lines = 0
340
+    def get_tags(self):
341
+        # tags
342
+        lines = getpipeoutput(['git show-ref --tags']).split('\n')
278 343
         for line in lines:
279 344
             if len(line) == 0:
280 345
                 continue
346
+            (line_hash, tag) = line.split(' ')
281 347
 
282
-            # <stamp> <author>
283
-            if re.search('files? changed', line) is None:
284
-                pos = line.find(' ')
285
-                if pos != -1:
286
-                    try:
287
-                        (stamp, author) = (int(line[:pos]), line[pos + 1:])
288
-                        self.changes_by_date[stamp] = {'files': files, 'ins': inserted, 'del': deleted,
289
-                                                       'lines': total_lines}
290
-
291
-                        date = datetime.datetime.fromtimestamp(stamp)
292
-                        yymm = date.strftime('%Y-%m')
293
-                        self.lines_added_by_month[yymm] = self.lines_added_by_month.get(yymm, 0) + inserted
294
-                        self.lines_removed_by_month[yymm] = self.lines_removed_by_month.get(yymm, 0) + deleted
295
-
296
-                        yy = date.year
297
-                        self.lines_added_by_year[yy] = self.lines_added_by_year.get(yy, 0) + inserted
298
-                        self.lines_removed_by_year[yy] = self.lines_removed_by_year.get(yy, 0) + deleted
299
-
300
-                        files, inserted, deleted = 0, 0, 0
301
-                    except ValueError:
302
-                        logging.warning(f'unexpected line "{line}')
303
-                else:
304
-                    logging.warning(f'unexpected line "{line}')
305
-            else:
306
-                numbers = getstatsummarycounts(line)
307
-
308
-                if len(numbers) == 3:
309
-                    (files, inserted, deleted) = map(lambda el: int(el), numbers)
310
-                    total_lines += inserted
311
-                    total_lines -= deleted
312
-                    self.total_lines_added += inserted
313
-                    self.total_lines_removed += deleted
314
-
315
-                else:
316
-                    logging.warning(f'Failed to handle line "{line}"')
317
-                    (files, inserted, deleted) = (0, 0, 0)
318
-            # self.changes_by_date[stamp] = { 'files': files, 'ins': inserted, 'del': deleted }
319
-        self.total_lines += total_lines
320
-
321
-        # Per-author statistics
322
-
323
-        # defined for stamp, author only if author commited at this timestamp.
324
-        self.changes_by_date_by_author = {}  # stamp -> author -> lines_added
325
-
326
-        # Similar to the above, but never use --first-parent
327
-        # (we need to walk through every commit to know who
328
-        # committed what, not just through mainline)
329
-        lines = getpipeoutput(
330
-            ['git log --shortstat --date-order --pretty=format:"%%at %%aN" %s' % (getlogrange(self.conf, 'HEAD'))]).split('\n')
331
-        lines.reverse()
332
-        inserted = 0
333
-        deleted = 0
334
-        stamp = 0
335
-        for line in lines:
336
-            if len(line) == 0:
348
+            tag = tag.replace('refs/tags/', '')
349
+            output = getpipeoutput(['git log "%s" --pretty=format:"%%at %%aN" -n 1' % line_hash])
350
+            if len(output) > 0:
351
+                parts = output.split(' ')
352
+                try:
353
+                    stamp = int(parts[0])
354
+                except ValueError:
355
+                    stamp = 0
356
+                self.tags[tag] = {'stamp': stamp,
357
+                                  'hash': line_hash,
358
+                                  'date': datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d'),
359
+                                  'commits': 0,
360
+                                  'authors': {}}
361
+        # collect info on tags, starting from latest
362
+        tags_sorted_by_date_asc = [tup[1] for tup in sorted([(el[1]['date'], el[0]) for el in self.tags.items()])]
363
+        # tags_sorted_by_date_desc = map(lambda el: el[1],
364
+        #                                reversed(sorted(map(lambda el: (el[1]['date'], el[0]), self.tags.items()))))
365
+        prev = None
366
+        #        for tag in reversed(tags_sorted_by_date_desc):
367
+        for tag in tags_sorted_by_date_asc:
368
+            cmd = 'git shortlog -s "%s"' % tag
369
+            if prev is not None:
370
+                cmd += ' "^%s"' % prev
371
+            output = getpipeoutput([cmd])
372
+            if len(output) == 0:
337 373
                 continue
338
-
339
-            # <stamp> <author>
340
-            if re.search('files? changed', line) is None:
341
-                pos = line.find(' ')
342
-                if pos != -1:
343
-                    try:
344
-                        oldstamp = stamp
345
-                        (stamp, author) = (int(line[:pos]), line[pos + 1:])
346
-                        if oldstamp > stamp:
347
-                            # clock skew, keep old timestamp to avoid having ugly graph
348
-                            stamp = oldstamp
349
-                        if author not in self.authors:
350
-                            self.authors[author] = {'lines_added': 0, 'lines_removed': 0, 'commits': 0}
351
-                        self.authors[author]['commits'] = self.authors[author].get('commits', 0) + 1
352
-                        self.authors[author]['lines_added'] = self.authors[author].get('lines_added', 0) + inserted
353
-                        self.authors[author]['lines_removed'] = self.authors[author].get('lines_removed', 0) + deleted
354
-                        if stamp not in self.changes_by_date_by_author:
355
-                            self.changes_by_date_by_author[stamp] = {}
356
-                        if author not in self.changes_by_date_by_author[stamp]:
357
-                            self.changes_by_date_by_author[stamp][author] = {}
358
-                        self.changes_by_date_by_author[stamp][author]['lines_added'] = self.authors[author][
359
-                            'lines_added']
360
-                        self.changes_by_date_by_author[stamp][author]['commits'] = self.authors[author]['commits']
361
-                        files, inserted, deleted = 0, 0, 0
362
-                    except ValueError:
363
-                        logging.warning(f'unexpected line "{line}')
364
-                else:
365
-                    logging.warning(f'unexpected line "{line}')
366
-            else:
367
-                numbers = getstatsummarycounts(line)
368
-
369
-                if len(numbers) == 3:
370
-                    (files, inserted, deleted) = map(lambda el: int(el), numbers)
371
-                else:
372
-                    logging.warning(f'Failed to handle line "{line}"')
373
-                    (files, inserted, deleted) = (0, 0, 0)
374
+            prev = tag
375
+            for line in output.split('\n'):
376
+                parts = re.split('\s+', line, 2)
377
+                commits = int(parts[1])
378
+                author = parts[2]
379
+                self.tags[tag]['commits'] += commits
380
+                self.tags[tag]['authors'][author] = commits
374 381
 
375 382
     def refine(self):
376 383
         # authors
377 384
         # name -> {place_by_commits, commits_frac, date_first, date_last, timedelta}
378
-        self.authors_by_commits = getkeyssortedbyvaluekey(self.authors, 'commits')
379
-        self.authors_by_commits.reverse()  # most first
385
+        self.authors_by_commits = self.getAuthors()
380 386
         for i, name in enumerate(self.authors_by_commits):
381
-            self.authors[name]['place_by_commits'] = i + 1
387
+            self.authors[name].place_by_commits = i + 1
382 388
 
383 389
         for name in self.authors.keys():
384 390
             a = self.authors[name]
385
-            a['commits_frac'] = (100 * float(a['commits'])) / self.getTotalCommits()
386
-            date_first = datetime.datetime.fromtimestamp(a['first_commit_stamp'])
387
-            date_last = datetime.datetime.fromtimestamp(a['last_commit_stamp'])
391
+            a.commits_frac = (100 * float(a.commits)) / self.getTotalCommits()
392
+            date_first = datetime.datetime.fromtimestamp(a.first_commit_stamp)
393
+            date_last = datetime.datetime.fromtimestamp(a.last_commit_stamp)
388 394
             delta = date_last - date_first
389
-            a['date_first'] = date_first.strftime('%Y-%m-%d')
390
-            a['date_last'] = date_last.strftime('%Y-%m-%d')
391
-            a['timedelta'] = delta
392
-            if 'lines_added' not in a:
393
-                a['lines_added'] = 0
394
-            if 'lines_removed' not in a:
395
-                a['lines_removed'] = 0
395
+            a.date_first = date_first.strftime('%Y-%m-%d')
396
+            a.date_last = date_last.strftime('%Y-%m-%d')
397
+            a.timedelta = delta
396 398
 
397 399
     def getActiveDays(self):
398 400
         return self.active_days
@@ -407,8 +409,7 @@ class GitDataCollector(DataCollector):
407 409
         return self.authors[author]
408 410
 
409 411
     def getAuthors(self, limit=None):
410
-        res = getkeyssortedbyvaluekey(self.authors, 'commits')
411
-        res.reverse()
412
+        res = [el[0] for el in sorted(self.authors.items(), key=lambda x: x[1].commits, reverse=True)]
412 413
         return res[:limit]
413 414
 
414 415
     def getCommitDeltaDays(self):

+ 2
- 1
gitstats/gitstats.py Näytä tiedosto

@@ -104,8 +104,9 @@ class GitStats:
104 104
 
105 105
             os.chdir(prevdir)
106 106
 
107
-            logging.info('Refining data...')
108 107
         data.saveCache(cachefile)
108
+
109
+        logging.info('Refining data...')
109 110
         data.refine()
110 111
 
111 112
         os.chdir(rundir)

+ 5
- 5
gitstats/htmlreportcreator.py Näytä tiedosto

@@ -282,9 +282,9 @@ class HTMLReportCreator(ReportCreator):
282 282
             info = data.getAuthorInfo(author)
283 283
             f.write(
284 284
                 '<tr><td>%s</td><td>%d (%.2f%%)</td><td>%d</td><td>%d</td><td>%s</td><td>%s</td><td>%s</td><td>%d</td><td>%d</td></tr>' % (
285
-                    author, info['commits'], info['commits_frac'], info['lines_added'], info['lines_removed'],
286
-                    info['date_first'], info['date_last'], info['timedelta'], len(info['active_days']),
287
-                    info['place_by_commits']))
285
+                    author, info.commits, info.commits_frac, info.lines_added, info.lines_removed,
286
+                    info.date_first, info.date_last, info.timedelta, len(info.active_days),
287
+                    info.place_by_commits))
288 288
         f.write('</table>')
289 289
 
290 290
         allauthors = data.getAuthors()
@@ -325,8 +325,8 @@ class HTMLReportCreator(ReportCreator):
325 325
             fgc.write('%d' % stamp)
326 326
             for author in self.authors_to_plot:
327 327
                 if author in data.changes_by_date_by_author[stamp].keys():
328
-                    lines_by_authors[author] = data.changes_by_date_by_author[stamp][author]['lines_added']
329
-                    commits_by_authors[author] = data.changes_by_date_by_author[stamp][author]['commits']
328
+                    lines_by_authors[author] = data.changes_by_date_by_author[stamp][author].lines_added
329
+                    commits_by_authors[author] = data.changes_by_date_by_author[stamp][author].commits
330 330
                 fgl.write(' %d' % lines_by_authors[author])
331 331
                 fgc.write(' %d' % commits_by_authors[author])
332 332
             fgl.write('\n')

+ 1
- 0
setup.py Näytä tiedosto

@@ -37,6 +37,7 @@ setup(
37 37
         'setuptools>=18.0'
38 38
     ],
39 39
     install_requires=[
40
+        'dataclasses',
40 41
         'multiprocessing_logging'
41 42
     ],
42 43