Преглед изворни кода

Create csv generator as a parallel to HTML Report Creator

Dan Rapp пре 7 година
родитељ
комит
47dd12abfd

+ 0
- 1
gitstats/cli.py Прегледај датотеку

@@ -25,7 +25,6 @@ Usage: gitstats [options] <gitpath..> <outputpath>
25 25
 
26 26
 Options:
27 27
 -c key=value     Override configuration value
28
--n key=value     Define author name equivalency (key will treated the same as value)
29 28
 
30 29
 Default config values:
31 30
 {conf}

+ 2
- 1
gitstats/data/__init__.py Прегледај датотеку

@@ -4,4 +4,5 @@ from .author_totals import AuthorTotals
4 4
 from .tag import Tag
5 5
 from .revision import Revision
6 6
 from .file import File
7
-from .loc_by_date import LocByDate
7
+from .loc_by_date import LocByDate
8
+from .pr import PullRequest

+ 1
- 1
gitstats/data/author_row.py Прегледај датотеку

@@ -2,7 +2,7 @@ from dataclasses import dataclass
2 2
 
3 3
 @dataclass
4 4
 class AuthorRow:
5
-    sha: str
5
+    hash: str
6 6
     stamp: int
7 7
     author: str
8 8
     files_modified: int

+ 1
- 1
gitstats/data/revision.py Прегледај датотеку

@@ -6,7 +6,7 @@ from typing import Dict
6 6
 
7 7
 @dataclass
8 8
 class Revision:
9
-    sha: str
9
+    hash: str
10 10
     stamp: int
11 11
     timezone: int = 0
12 12
     author: str = ''

+ 2
- 1
gitstats/data_generators/__init__.py Прегледај датотеку

@@ -3,4 +3,5 @@ from .gen_author_totals import gen_author_totals_data
3 3
 from .gen_tag_data import gen_tag_data
4 4
 from .gen_revision_data import gen_revision_data
5 5
 from .gen_file_data import gen_file_data
6
-from .gen_loc_data import gen_loc_data
6
+from .gen_loc_data import gen_loc_data
7
+from .gen_pr_data import gen_pr_data

+ 4
- 5
gitstats/data_generators/gen_author_data.py Прегледај датотеку

@@ -81,8 +81,7 @@ if __name__ == "__main__":
81 81
         for path in paths:
82 82
             repo_name = os.path.split(path)[1]
83 83
             with (cd.cd(path)):
84
-
85
-                gen_author_data(
86
-                    conf,
87
-                    lambda row: writer.writerow([repo_name, row.sha, row.stamp, row.author, row.files_modified,
88
-                                                 row.lines_inserted, row.lines_deleted]))
84
+                def row_processor(row: AuthorRow):
85
+                    writer.writerow([repo_name, row.hash, row.stamp, row.author, row.files_modified,
86
+                                     row.lines_inserted, row.lines_deleted])
87
+                gen_author_data(conf, row_processor)

+ 3
- 3
gitstats/data_generators/gen_author_totals.py Прегледај датотеку

@@ -42,6 +42,6 @@ if __name__ == "__main__":
42 42
         for path in paths:
43 43
             repo_name = os.path.split(path)[1]
44 44
             with (cd.cd(path)):
45
-                gen_author_totals_data(
46
-                    conf,
47
-                    lambda row: writer.writerow([repo_name, row.author, row.total_commits]))
45
+                def row_processor(row: AuthorTotals):
46
+                    writer.writerow([repo_name, row.author, row.total_commits])
47
+                gen_author_totals_data(conf, row_processor)

+ 3
- 4
gitstats/data_generators/gen_file_data.py Прегледај датотеку

@@ -59,7 +59,6 @@ if __name__ == "__main__":
59 59
         for path in paths:
60 60
             repo_name = os.path.split(path)[1]
61 61
             with (cd.cd(path)):
62
-
63
-                gen_file_data(
64
-                    conf,
65
-                    lambda row: writer.writerow([repo_name, row.full_path, row.ext, row.size, row.lines]))
62
+                def row_processor(row: File):
63
+                    writer.writerow([repo_name, row.full_path, row.ext, row.size, row.lines])
64
+                gen_file_data(conf, row_processor)

+ 6
- 4
gitstats/data_generators/gen_loc_data.py Прегледај датотеку

@@ -7,6 +7,8 @@ from gitstats import cli, cd
7 7
 from gitstats.miscfuncs import getlogrange, getpipeoutput, getstatsummarycounts
8 8
 from gitstats.data import LocByDate
9 9
 
10
+# TODO: the author isn't working here because it's the commit that merges to master, so we
11
+# TODO: probably need to back up a commit. Each commit here represents a merged PR
10 12
 
11 13
 def gen_loc_data(conf, row_processor):
12 14
     '''
@@ -74,7 +76,7 @@ if __name__ == "__main__":
74 76
         for path in paths:
75 77
             repo_name = os.path.split(path)[1]
76 78
             with (cd.cd(path)):
77
-
78
-                gen_loc_data(
79
-                    conf,
80
-                    lambda row: writer.writerow([repo_name, row.hash, row.stamp, row.file_count, row.lines_inserted, row.lines_deleted, row.total_lines]))
79
+                def row_processor(row: LocByDate):
80
+                    writer.writerow([repo_name, row.hash, row.stamp, row.file_count, row.lines_inserted,
81
+                                     row.lines_deleted, row.total_lines])
82
+                gen_loc_data(conf, row_processor)

+ 4
- 5
gitstats/data_generators/gen_revision_data.py Прегледај датотеку

@@ -68,8 +68,7 @@ if __name__ == "__main__":
68 68
         for path in paths:
69 69
             repo_name = os.path.split(path)[1]
70 70
             with (cd.cd(path)):
71
-
72
-                gen_revision_data(
73
-                    conf,
74
-                    lambda row: writer.writerow([repo_name, row.sha, row.stamp, row.timezone, row.author, row.email,
75
-                                                 row.domain, row.file_count]))
71
+                def row_processor(row: Revision):
72
+                    writer.writerow([repo_name, row.hash, row.stamp, row.timezone, row.author, row.email,
73
+                                     row.domain, row.file_count])
74
+                gen_revision_data(conf, row_processor)

+ 2
- 4
gitstats/data_generators/gen_tag_data.py Прегледај датотеку

@@ -66,9 +66,7 @@ if __name__ == "__main__":
66 66
         for path in paths:
67 67
             repo_name = os.path.split(path)[1]
68 68
             with (cd.cd(path)):
69
-
70
-                def process_row(row):
69
+                def row_processor(row: Tag):
71 70
                     for author, commits in row.authors.items():
72 71
                         writer.writerow([repo_name, row.hash, row.stamp, row.commits, author, commits])
73
-
74
-                gen_tag_data(conf, process_row)
72
+                gen_tag_data(conf, row_processor)

+ 149
- 0
gitstats/git_csv_generator.py Прегледај датотеку

@@ -0,0 +1,149 @@
1
+import csv
2
+import logging
3
+import os
4
+import sys
5
+
6
+import multiprocessing_logging
7
+
8
+from gitstats import cli
9
+from gitstats.data import AuthorTotals, AuthorRow, File, LocByDate, Revision, Tag
10
+from gitstats.data_generators import gen_author_data, gen_author_totals_data, gen_tag_data, gen_revision_data, \
11
+    gen_file_data, gen_loc_data
12
+
13
+exectime_internal = 0.0
14
+exectime_external = 0.0
15
+
16
+
17
+class _FileHandles:
18
+    def __init__(self, output_dir):
19
+        self.author_info = open(os.path.join(output_dir, 'authors.csv'), 'w', encoding='utf8')
20
+        self.author_info_writer = csv.writer(self.author_info)
21
+        self.author_info_writer.writerow(['Repo', 'CommitHash', 'TimeStamp', 'Author', 'FilesChanged', 'LinesInserted',
22
+                                          'LinesDeleted'])
23
+
24
+        self.author_totals_info = open(os.path.join(output_dir, 'author_totals.csv'), 'w', encoding='utf8')
25
+        self.author_totals_info_writer = csv.writer(self.author_totals_info)
26
+        self.author_totals_info_writer.writerow(["Repo", "Author", "Commits"])
27
+
28
+        self.tag_info = open(os.path.join(output_dir, 'tags.csv'), 'w', encoding='utf8')
29
+        self.tag_info_writer = csv.writer(self.tag_info)
30
+        self.tag_info_writer.writerow(["Repo", "CommitHash", "Timestamp", "TotalCommits", "Author", "AuthorCommits"])
31
+
32
+        self.revision_info = open(os.path.join(output_dir, 'revs.csv'), 'w', encoding='utf8')
33
+        self.revision_info_writer = csv.writer(self.revision_info)
34
+        self.revision_info_writer.writerow(['Repo', 'CommitHash', 'TimeStamp', 'TimeZone', 'Author', 'AuthorEmail',
35
+                                            'Domain', 'FilesChanged'])
36
+
37
+        self.files_info = open(os.path.join(output_dir, 'files.csv'), 'w', encoding='utf8')
38
+        self.files_info_writer = csv.writer(self.files_info)
39
+        self.files_info_writer.writerow(['Repo', 'File', 'Ext', 'Size', 'Lines'])
40
+
41
+        self.loc_info = open(os.path.join(output_dir, 'loc.csv'), 'w', encoding='utf8')
42
+        self.loc_info_writer = csv.writer(self.loc_info)
43
+        self.loc_info_writer.writerow(['Repo', 'CommitHash', 'TimeStamp', 'FileCount', 'LinesInserted', 'LinesDeleted',
44
+                                       'TotalLines'])
45
+
46
+    def close(self):
47
+        self.author_info.close()
48
+        self.author_totals_info.close()
49
+        self.tag_info.close()
50
+        self.revision_info.close()
51
+        self.files_info.close()
52
+        self.loc_info.close()
53
+
54
+
55
+class GitCsvGenerator():
56
+    def __init__(self, conf, output_dir):
57
+        self.conf = conf
58
+        self.files: _FileHandles = None
59
+        self.output_dir = output_dir
60
+
61
+    def __enter__(self):
62
+        self.files = _FileHandles(self.output_dir)
63
+
64
+    def __exit__(self, exc_type, exc_val, exc_tb):
65
+        self.files.close()
66
+
67
+    def collect(self, dir):
68
+        if len(self.conf['project_name']) == 0:
69
+            self.projectname = os.path.basename(os.path.abspath(dir))
70
+        else:
71
+            self.projectname = self.conf['project_name']
72
+
73
+        self.get_total_authors()
74
+        self.get_tags()
75
+        self.get_revision_info()
76
+        self.get_file_info()
77
+        self.get_loc_info()
78
+        self.get_author_info()
79
+
80
+    def get_total_authors(self):
81
+        logging.info(f"Getting author totals for {self.projectname}")
82
+        def row_processor(row: AuthorTotals):
83
+            self.files.author_totals_info_writer.writerow([self.projectname, row.author, row.total_commits])
84
+        gen_author_totals_data(self.conf, row_processor)
85
+
86
+    def get_tags(self):
87
+        logging.info(f"Getting tag info for {self.projectname}")
88
+        def row_processor(row: Tag):
89
+            for author, commits in row.authors.items():
90
+                self.files.tag_info_writer.writerow([self.projectname, row.hash, row.stamp, row.commits, author, commits])
91
+        gen_tag_data(self.conf, row_processor)
92
+
93
+    def get_revision_info(self):
94
+        logging.info(f"Getting rev info for {self.projectname}")
95
+        def row_processor(row: Revision):
96
+            self.files.revision_info_writer.writerow([self.projectname, row.hash, row.stamp, row.timezone, row.author,
97
+                                                      row.email, row.domain, row.file_count])
98
+        gen_revision_data(self.conf, row_processor)
99
+
100
+    def get_file_info(self):
101
+        logging.info(f"Getting file info for {self.projectname}")
102
+        def row_processor(row: File):
103
+            self.files.files_info_writer.writerow([self.projectname, row.full_path, row.ext, row.size, row.lines])
104
+        gen_file_data(self.conf, row_processor)
105
+
106
+    def get_loc_info(self):
107
+        logging.info(f"Getting LOC info for {self.projectname}")
108
+        def row_processor(row: LocByDate):
109
+            self.files.loc_info_writer.writerow([self.projectname, row.hash, row.stamp, row.file_count,
110
+                                                 row.lines_inserted, row.lines_deleted, row.total_lines])
111
+        gen_loc_data(self.conf, row_processor)
112
+
113
+    def get_author_info(self):
114
+        logging.info(f"Getting author info for {self.projectname}")
115
+        def row_processor(row: AuthorRow):
116
+            self.files.author_info_writer.writerow([self.projectname, row.hash, row.stamp, row.author,
117
+                                                    row.files_modified, row.lines_inserted, row.lines_deleted])
118
+        gen_author_data(self.conf, row_processor)
119
+
120
+def gen_csv():
121
+    conf, paths, outputpath = cli.get_cli()
122
+
123
+    logging.basicConfig(level=conf['logging'], format='%(message)s')
124
+    multiprocessing_logging.install_mp_handler()
125
+    try:
126
+        os.makedirs(outputpath)
127
+    except OSError:
128
+        pass
129
+    if not os.path.isdir(outputpath):
130
+        logging.fatal('Output path is not a directory or does not exist')
131
+        sys.exit(1)
132
+
133
+    logging.info(f'Output path: {outputpath}')
134
+
135
+    data = GitCsvGenerator(conf, outputpath)
136
+    with data:
137
+        for gitpath in paths:
138
+            logging.info(f'Git path: {gitpath}')
139
+
140
+            prevdir = os.getcwd()
141
+            os.chdir(gitpath)
142
+
143
+            logging.info('Collecting data...')
144
+            data.collect(gitpath)
145
+
146
+            os.chdir(prevdir)
147
+
148
+if __name__ == '__main__':
149
+    gen_csv()

+ 0
- 1
gitstats/gitdatacollector.py Прегледај датотеку

@@ -15,7 +15,6 @@ class GitDataCollector(DataCollector):
15 15
         
16 16
     def collect(self, directory):
17 17
         super(GitDataCollector, self).collect(directory)
18
-
19 18
         self.total_authors += self.get_total_authors()
20 19
         self.get_tags()
21 20
         self.get_revision_info()