|
|
@@ -1,17 +1,18 @@
|
|
1
|
1
|
#! /usr/bin/env python3
|
|
2
|
2
|
import csv
|
|
3
|
|
-import glob
|
|
4
|
3
|
import logging
|
|
5
|
4
|
import os
|
|
6
|
5
|
import sys
|
|
7
|
6
|
|
|
8
|
7
|
import multiprocessing_logging
|
|
|
8
|
+
|
|
|
9
|
+from collections import defaultdict
|
|
|
10
|
+
|
|
9
|
11
|
from gitstats.cd import cd
|
|
10
|
12
|
|
|
11
|
13
|
from gitstats import cli
|
|
12
|
|
-from gitstats.data import AuthorTotals, AuthorRow, File, LocByDate, PullRequest, Revision, Tag
|
|
13
|
|
-from gitstats.data_generators import gen_author_data, gen_author_totals_data, gen_tag_data, gen_revision_data, \
|
|
14
|
|
- gen_file_data, gen_loc_data, gen_pr_data
|
|
|
14
|
+from gitstats.data import PullRequest, Revision
|
|
|
15
|
+from gitstats.data_generators import gen_pr_data, gen_revision_graph, gen_complete_file_info
|
|
15
|
16
|
|
|
16
|
17
|
exectime_internal = 0.0
|
|
17
|
18
|
exectime_external = 0.0
|
|
|
@@ -19,48 +20,39 @@ exectime_external = 0.0
|
|
19
|
20
|
|
|
20
|
21
|
class _FileHandles:
|
|
21
|
22
|
def __init__(self, output_dir):
|
|
22
|
|
- self.author_info = open(os.path.join(output_dir, 'authors.csv'), 'w', encoding='utf8')
|
|
23
|
|
- self.author_info_writer = csv.writer(self.author_info)
|
|
24
|
|
- self.author_info_writer.writerow(['Repo', 'CommitHash', 'TimeStamp', 'Author', 'FilesChanged', 'LinesInserted',
|
|
25
|
|
- 'LinesDeleted'])
|
|
26
|
|
-
|
|
27
|
23
|
self.author_totals_info = open(os.path.join(output_dir, 'author_totals.csv'), 'w', encoding='utf8')
|
|
28
|
24
|
self.author_totals_info_writer = csv.writer(self.author_totals_info)
|
|
29
|
25
|
self.author_totals_info_writer.writerow(["Repo", "Author", "Commits"])
|
|
30
|
26
|
|
|
31
|
|
- self.tag_info = open(os.path.join(output_dir, 'tags.csv'), 'w', encoding='utf8')
|
|
32
|
|
- self.tag_info_writer = csv.writer(self.tag_info)
|
|
33
|
|
- self.tag_info_writer.writerow(["Repo", "CommitHash", "Timestamp", "TotalCommits", "Author", "AuthorCommits"])
|
|
34
|
|
-
|
|
35
|
27
|
self.revision_info = open(os.path.join(output_dir, 'revs.csv'), 'w', encoding='utf8')
|
|
36
|
28
|
self.revision_info_writer = csv.writer(self.revision_info)
|
|
37
|
29
|
self.revision_info_writer.writerow(['Repo', 'CommitHash', 'TimeStamp', 'TimeZone', 'Author', 'AuthorEmail',
|
|
38
|
|
- 'Domain', 'FilesChanged'])
|
|
39
|
|
-
|
|
40
|
|
- self.files_info = open(os.path.join(output_dir, 'files.csv'), 'w', encoding='utf8')
|
|
41
|
|
- self.files_info_writer = csv.writer(self.files_info)
|
|
42
|
|
- self.files_info_writer.writerow(['Repo', 'File', 'Ext', 'Size', 'Lines', 'Resource'])
|
|
|
30
|
+ 'Domain'])
|
|
43
|
31
|
|
|
44
|
32
|
self.loc_info = open(os.path.join(output_dir, 'loc.csv'), 'w', encoding='utf8')
|
|
45
|
33
|
self.loc_info_writer = csv.writer(self.loc_info)
|
|
46
|
|
- self.loc_info_writer.writerow(['Repo', 'CommitHash', 'TimeStamp', 'FileCount', 'LinesInserted', 'LinesDeleted',
|
|
47
|
|
- 'TotalLines'])
|
|
|
34
|
+ self.loc_info_writer.writerow(['repo', 'hash', 'stamp', 'language', 'files', 'lines', 'code', 'comments',
|
|
|
35
|
+ 'blanks'])
|
|
|
36
|
+
|
|
|
37
|
+ self.loc_delta = open(os.path.join(output_dir, 'loc_delta.csv'), 'w', encoding='utf8')
|
|
|
38
|
+ self.loc_delta_writer = csv.writer(self.loc_delta)
|
|
|
39
|
+ self.loc_delta_writer.writerow(['repo', 'hash', 'stamp', 'author', 'language', 'files', 'lines', 'code',
|
|
|
40
|
+ 'comments', 'blanks'])
|
|
48
|
41
|
|
|
49
|
42
|
self.repo_info = open(os.path.join(output_dir, 'repo.csv'), 'w', encoding='utf8')
|
|
50
|
43
|
self.repo_info_writer = csv.writer(self.repo_info)
|
|
51
|
|
- self.repo_info_writer.writerow(['Repo', 'TotalFiles', 'TotalLines'])
|
|
|
44
|
+ self.repo_info_writer.writerow(['Repo', 'Language', 'TotalFiles', 'TotalLines', 'TotalCodeLines', 'TotalCommentLlines',
|
|
|
45
|
+ 'TotalBlankLines'])
|
|
52
|
46
|
|
|
53
|
47
|
self.prs_info = open(os.path.join(output_dir, 'prs.csv'), 'w', encoding='utf8')
|
|
54
|
48
|
self.prs_info_writer = csv.writer(self.prs_info)
|
|
55
|
49
|
self.prs_info_writer.writerow(['Repo', 'CommitHash', 'TimeStamp', 'ParentHashMaster', 'ParentHashBranch', 'PrMergeDuration'])
|
|
56
|
50
|
|
|
57
|
51
|
def close(self):
|
|
58
|
|
- self.author_info.close()
|
|
59
|
52
|
self.author_totals_info.close()
|
|
60
|
|
- self.tag_info.close()
|
|
61
|
53
|
self.revision_info.close()
|
|
62
|
|
- self.files_info.close()
|
|
63
|
54
|
self.loc_info.close()
|
|
|
55
|
+ self.loc_delta.close()
|
|
64
|
56
|
self.repo_info.close()
|
|
65
|
57
|
self.prs_info.close()
|
|
66
|
58
|
|
|
|
@@ -69,8 +61,7 @@ class GitCsvGenerator():
|
|
69
|
61
|
self.conf = conf
|
|
70
|
62
|
self.files: _FileHandles = None
|
|
71
|
63
|
self.output_dir = output_dir
|
|
72
|
|
- self.resource_files = []
|
|
73
|
|
- self.igore_files = ''
|
|
|
64
|
+ self.begin, self.end = cli.get_begin_end_timestamps(conf)
|
|
74
|
65
|
|
|
75
|
66
|
def __enter__(self):
|
|
76
|
67
|
self.files = _FileHandles(self.output_dir)
|
|
|
@@ -81,72 +72,109 @@ class GitCsvGenerator():
|
|
81
|
72
|
def collect(self, dir):
|
|
82
|
73
|
|
|
83
|
74
|
with cd(dir):
|
|
84
|
|
- self.resource_files = [file for file in glob.glob(self.conf['resrouce_file_pattern'], recursive=True) if os.path.isfile(file)]
|
|
85
|
|
-
|
|
86
|
|
- if self.resource_files:
|
|
87
|
|
- self.ignore_files = '" "'.join([f":(exclude){file}" for file in self.resource_files])
|
|
88
|
|
- self.ignore_files = f'-- "{self.ignore_files}"'
|
|
89
|
|
-
|
|
90
|
75
|
if len(self.conf['project_name']) == 0:
|
|
91
|
76
|
self.projectname = os.path.basename(os.path.abspath(dir))
|
|
92
|
77
|
else:
|
|
93
|
78
|
self.projectname = self.conf['project_name']
|
|
94
|
79
|
|
|
95
|
|
- self.get_total_authors()
|
|
96
|
|
- self.get_tags()
|
|
97
|
|
- self.get_revision_info()
|
|
98
|
|
- self.get_file_info()
|
|
99
|
|
- self.get_loc_info()
|
|
100
|
|
- self.get_author_info()
|
|
101
|
|
- self.get_pr_info()
|
|
|
80
|
+ graph = gen_revision_graph()
|
|
|
81
|
+ gen_complete_file_info(graph)
|
|
|
82
|
+
|
|
|
83
|
+ self.extract_total_authors(graph)
|
|
|
84
|
+ self.extract_pr_info(graph)
|
|
|
85
|
+ self.extract_code_info(graph)
|
|
|
86
|
+ self.extract_revision_info(graph)
|
|
|
87
|
+ # self.get_revision_info(graph)
|
|
|
88
|
+ # self.get_tags()
|
|
|
89
|
+ # self.get_file_info()
|
|
|
90
|
+ # self.get_loc_info()
|
|
|
91
|
+ # self.get_author_info()
|
|
102
|
92
|
|
|
103
|
|
- def get_total_authors(self):
|
|
|
93
|
+ def extract_total_authors(self, graph):
|
|
104
|
94
|
logging.info(f"Getting author totals for {self.projectname}")
|
|
105
|
|
- def row_processor(row: AuthorTotals):
|
|
106
|
|
- self.files.author_totals_info_writer.writerow([self.projectname, row.author, row.total_commits])
|
|
107
|
|
- gen_author_totals_data(self.conf, row_processor)
|
|
108
|
|
-
|
|
109
|
|
- def get_tags(self):
|
|
110
|
|
- logging.info(f"Getting tag info for {self.projectname}")
|
|
111
|
|
- def row_processor(row: Tag):
|
|
112
|
|
- for author, commits in row.authors.items():
|
|
113
|
|
- self.files.tag_info_writer.writerow([self.projectname, row.hash, row.stamp, row.commits, author, commits])
|
|
114
|
|
- gen_tag_data(self.conf, row_processor)
|
|
115
|
|
-
|
|
116
|
|
- def get_revision_info(self):
|
|
117
|
|
- logging.info(f"Getting rev info for {self.projectname}")
|
|
118
|
|
- def row_processor(row: Revision):
|
|
119
|
|
- self.files.revision_info_writer.writerow([self.projectname, row.hash, row.stamp, row.timezone, row.author,
|
|
120
|
|
- row.email, row.domain, row.file_count])
|
|
121
|
|
- gen_revision_data(self.conf, row_processor)
|
|
122
|
|
-
|
|
123
|
|
- def get_file_info(self):
|
|
124
|
|
- logging.info(f"Getting file info for {self.projectname}")
|
|
125
|
|
- def row_processor(row: File):
|
|
126
|
|
- self.files.files_info_writer.writerow([self.projectname, row.full_path, row.ext, row.size, row.lines, row.full_path in self.resource_files])
|
|
127
|
|
- gen_file_data(self.conf, row_processor)
|
|
128
|
|
-
|
|
129
|
|
- def get_loc_info(self):
|
|
130
|
|
- logging.info(f"Getting LOC info for {self.projectname}")
|
|
131
|
|
- def row_processor(row: LocByDate):
|
|
132
|
|
- self.files.loc_info_writer.writerow([self.projectname, row.hash, row.stamp, row.file_count,
|
|
133
|
|
- row.lines_inserted, row.lines_deleted, row.total_lines])
|
|
134
|
|
- total_files, total_lines = gen_loc_data(self.conf, row_processor, self.ignore_files)
|
|
135
|
|
- self.files.repo_info_writer.writerow([self.projectname, total_files, total_lines])
|
|
136
|
|
-
|
|
137
|
|
- def get_author_info(self):
|
|
138
|
|
- logging.info(f"Getting author info for {self.projectname}")
|
|
139
|
|
- def row_processor(row: AuthorRow):
|
|
140
|
|
- self.files.author_info_writer.writerow([self.projectname, row.hash, row.stamp, row.author,
|
|
141
|
|
- row.files_modified, row.lines_inserted, row.lines_deleted])
|
|
142
|
|
- gen_author_data(self.conf, row_processor, self.ignore_files)
|
|
143
|
|
-
|
|
144
|
|
- def get_pr_info(self):
|
|
|
95
|
+
|
|
|
96
|
+ authors = defaultdict(int)
|
|
|
97
|
+ for rev in graph.revisions.values():
|
|
|
98
|
+ # don't include merge to master as a commit in counting total author
|
|
|
99
|
+ # commits.
|
|
|
100
|
+ if rev.stamp >= self.begin and rev.stamp <= self.end and rev.master_pr == 0:
|
|
|
101
|
+ authors[rev.author] += 1
|
|
|
102
|
+
|
|
|
103
|
+ for author, total_commits in authors.items():
|
|
|
104
|
+ self.files.author_totals_info_writer.writerow([self.projectname, author, total_commits])
|
|
|
105
|
+
|
|
|
106
|
+ def extract_pr_info(self, graph):
|
|
145
|
107
|
logging.info(f"Getting pull request info for {self.projectname}")
|
|
146
|
108
|
def row_processor(row: PullRequest):
|
|
147
|
|
- self.files.prs_info_writer.writerow([self.projectname, row.hash, row.stamp, row.master_rev,
|
|
148
|
|
- row.branch_rev, row.duration.total_seconds()])
|
|
149
|
|
- gen_pr_data(self.conf, row_processor)
|
|
|
109
|
+ if row.stamp >= self.begin and row.stamp <= self.end:
|
|
|
110
|
+ self.files.prs_info_writer.writerow([self.projectname, row.hash, row.stamp, row.master_rev,
|
|
|
111
|
+ row.branch_rev, row.duration.total_seconds()])
|
|
|
112
|
+ gen_pr_data(row_processor, graph)
|
|
|
113
|
+
|
|
|
114
|
+ def extract_code_info(self, graph):
|
|
|
115
|
+ rev_max: Revision = None
|
|
|
116
|
+ for rev in graph.master_revs:
|
|
|
117
|
+ revision: Revision = graph.revisions[rev]
|
|
|
118
|
+ if not rev_max or revision.stamp > rev_max.stamp:
|
|
|
119
|
+ rev_max = revision
|
|
|
120
|
+ if revision.stamp >= self.begin and revision.stamp <= self.end:
|
|
|
121
|
+ for lang, file_info in revision.delta.items():
|
|
|
122
|
+ if file_info.file_count or \
|
|
|
123
|
+ file_info.line_count or \
|
|
|
124
|
+ file_info.code_line_count or \
|
|
|
125
|
+ file_info.comment_line_count or \
|
|
|
126
|
+ file_info.blank_line_count:
|
|
|
127
|
+
|
|
|
128
|
+ if revision.branch_parent in graph.revisions:
|
|
|
129
|
+ parent = revision.branch_parent
|
|
|
130
|
+ else:
|
|
|
131
|
+ parent = revision.master_parent
|
|
|
132
|
+ if parent:
|
|
|
133
|
+ self.files.loc_delta_writer.writerow([self.projectname,
|
|
|
134
|
+ revision.hash,
|
|
|
135
|
+ revision.stamp,
|
|
|
136
|
+ graph.revisions[parent].author,
|
|
|
137
|
+ lang,
|
|
|
138
|
+ file_info.file_count,
|
|
|
139
|
+ file_info.line_count,
|
|
|
140
|
+ file_info.code_line_count,
|
|
|
141
|
+ file_info.comment_line_count,
|
|
|
142
|
+ file_info.blank_line_count])
|
|
|
143
|
+ for lang, file_info in revision.file_infos.items():
|
|
|
144
|
+ if file_info.file_count or \
|
|
|
145
|
+ file_info.line_count or \
|
|
|
146
|
+ file_info.code_line_count or \
|
|
|
147
|
+ file_info.comment_line_count or \
|
|
|
148
|
+ file_info.blank_line_count:
|
|
|
149
|
+ self.files.loc_info_writer.writerow([self.projectname,
|
|
|
150
|
+ revision.hash,
|
|
|
151
|
+ revision.stamp,
|
|
|
152
|
+ lang,
|
|
|
153
|
+ file_info.file_count,
|
|
|
154
|
+ file_info.line_count,
|
|
|
155
|
+ file_info.code_line_count,
|
|
|
156
|
+ file_info.comment_line_count,
|
|
|
157
|
+ file_info.blank_line_count])
|
|
|
158
|
+
|
|
|
159
|
+ for file_info in rev_max.file_infos.values():
|
|
|
160
|
+ self.files.repo_info_writer.writerow([self.projectname,
|
|
|
161
|
+ file_info.language,
|
|
|
162
|
+ file_info.file_count,
|
|
|
163
|
+ file_info.line_count,
|
|
|
164
|
+ file_info.code_line_count,
|
|
|
165
|
+ file_info.comment_line_count,
|
|
|
166
|
+ file_info.blank_line_count])
|
|
|
167
|
+
|
|
|
168
|
+ def extract_revision_info(self, graph):
|
|
|
169
|
+ for revision in graph.revisions.values():
|
|
|
170
|
+ if revision.stamp >= self.begin and revision.stamp <= self.end:
|
|
|
171
|
+ self.files.revision_info_writer.writerow([self.projectname,
|
|
|
172
|
+ revision.hash,
|
|
|
173
|
+ revision.stamp,
|
|
|
174
|
+ revision.timezone,
|
|
|
175
|
+ revision.author,
|
|
|
176
|
+ revision.email,
|
|
|
177
|
+ revision.domain])
|
|
150
|
178
|
|
|
151
|
179
|
def gen_csv():
|
|
152
|
180
|
conf, paths, outputpath = cli.get_cli()
|