|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+import csv
|
|
|
2
|
+import logging
|
|
|
3
|
+import os
|
|
|
4
|
+
|
|
|
5
|
+from multiprocessing import Pool
|
|
|
6
|
+
|
|
|
7
|
+from gitstats import cli, cd
|
|
|
8
|
+from gitstats.miscfuncs import getlogrange, getpipeoutput, gettimedelta
|
|
|
9
|
+from gitstats.data import PullRequest
|
|
|
10
|
+
|
|
|
11
|
+
|
|
|
12
|
+def gen_pr_data(conf, row_processor):
|
|
|
13
|
+ '''
|
|
|
14
|
+ Given a configuration, pull revision information. For
|
|
|
15
|
+ each author, callback to the row_processor passing an PullRequest
|
|
|
16
|
+
|
|
|
17
|
+ :param conf: configuration (mostly used for date limits)
|
|
|
18
|
+ :param row_processor: function to receive the callback
|
|
|
19
|
+ :return: None
|
|
|
20
|
+ '''
|
|
|
21
|
+
|
|
|
22
|
+ prs = {} # hash -> PullRequest
|
|
|
23
|
+
|
|
|
24
|
+ # DBG: git log --all --grep="Merge pull request .* to master" --shortstat --pretty=format:"%H %at %aN" --since="2017-10-01" "HEAD"', 'grep -v ^commit'
|
|
|
25
|
+ lines = getpipeoutput(
|
|
|
26
|
+ ['git log --all --grep="Merge pull request .* to master" --shortstat '
|
|
|
27
|
+ '--pretty=format:"%%H %%at %%aN|%%P" %s' % getlogrange(conf, 'HEAD'),
|
|
|
28
|
+ 'grep -v ^"files changed"']).split('\n')
|
|
|
29
|
+ for line in lines:
|
|
|
30
|
+ line = line.strip()
|
|
|
31
|
+ if line and not 'files changed' in line:
|
|
|
32
|
+ parts = line.split(' ', 2)
|
|
|
33
|
+ hash = parts[0]
|
|
|
34
|
+ try:
|
|
|
35
|
+ stamp = int(parts[1])
|
|
|
36
|
+ except ValueError:
|
|
|
37
|
+ stamp = 0
|
|
|
38
|
+ (author, parent_hashes) = parts[2].split('|')
|
|
|
39
|
+ parent_hashes = parent_hashes.split(' ')
|
|
|
40
|
+ if len(parent_hashes) == 2:
|
|
|
41
|
+ prs[hash] = PullRequest(stamp, hash, author, parent_hashes)
|
|
|
42
|
+
|
|
|
43
|
+ keys = prs.keys()
|
|
|
44
|
+ for pr in prs.values():
|
|
|
45
|
+ if pr.parent_hashes[0] in keys:
|
|
|
46
|
+ pr.master_rev = pr.parent_hashes[0]
|
|
|
47
|
+ if pr.parent_hashes[1] in keys:
|
|
|
48
|
+ logging.warning(f"Unexpected branching: {pr}")
|
|
|
49
|
+ pr.invalid_pr = True
|
|
|
50
|
+ else:
|
|
|
51
|
+ pr.branch_rev = pr.parent_hashes[1]
|
|
|
52
|
+ else:
|
|
|
53
|
+ pr.branch_rev = pr.parent_hashes[0]
|
|
|
54
|
+ if pr.parent_hashes[1] in keys:
|
|
|
55
|
+ pr.master_rev = pr.parent_hashes[1]
|
|
|
56
|
+ else:
|
|
|
57
|
+ logging.warning(f"Unexpected branching: {pr}")
|
|
|
58
|
+ pr.invalid_pr = True
|
|
|
59
|
+
|
|
|
60
|
+ prs_to_query = [(pr.hash, pr.stamp, pr.branch_rev) for pr in prs.values() if not pr.invalid_pr]
|
|
|
61
|
+
|
|
|
62
|
+ # # todo: consider putting in a cache for this. There was one in the original code
|
|
|
63
|
+ # # DBG: git log -n 1 --format=%at "ceb3165b51ae0680724fd71e16a5ff836a0de41e"
|
|
|
64
|
+ pool = Pool(processes=conf['processes'])
|
|
|
65
|
+ time_deltas = pool.map(gettimedelta, prs_to_query)
|
|
|
66
|
+ pool.terminate()
|
|
|
67
|
+ pool.join()
|
|
|
68
|
+ for (hash, timedelta) in time_deltas:
|
|
|
69
|
+ pr = prs[hash]
|
|
|
70
|
+ pr.duration = timedelta
|
|
|
71
|
+ if pr.duration.total_seconds() < 0:
|
|
|
72
|
+ pr.invalid_pr = True
|
|
|
73
|
+ logging.warning(f"Unexpected. Negative duration: {pr}")
|
|
|
74
|
+ else:
|
|
|
75
|
+ row_processor(pr)
|
|
|
76
|
+
|
|
|
77
|
+
|
|
|
78
|
+
|
|
|
79
|
+if __name__ == "__main__":
|
|
|
80
|
+ conf, paths, outputpath = cli.get_cli()
|
|
|
81
|
+ with open(outputpath, 'w', encoding='utf8') as f:
|
|
|
82
|
+ writer = csv.writer(f)
|
|
|
83
|
+ writer.writerow(['repo', 'hash', 'stamp', 'masterRev', 'branchRev', 'prMergeDuration', 'prMergeDurationHr'])
|
|
|
84
|
+
|
|
|
85
|
+ for path in paths:
|
|
|
86
|
+ repo_name = os.path.split(path)[1]
|
|
|
87
|
+ with (cd.cd(path)):
|
|
|
88
|
+ def row_processor(row: PullRequest):
|
|
|
89
|
+ writer.writerow([repo_name, row.hash, row.stamp, row.master_rev, row.branch_rev, row.duration.total_seconds(), row.duration])
|
|
|
90
|
+ gen_pr_data(conf, row_processor)
|