瀏覽代碼

Initial changes for multi-repo awareness.

Initial attempt to make gitstats create cumulative statistics for multiple
repos (the case of a single project consisting of more than just one git
repository.)

Signed-off-by: Heikki Hokkanen <hoxu@users.sf.net>
Wulf C. Krueger 15 年之前
父節點
當前提交
beaf16168a
共有 1 個檔案被更改,包括 70 行新增54 行删除
  1. 70
    54
      gitstats

+ 70
- 54
gitstats 查看文件

@@ -92,7 +92,54 @@ class DataCollector:
92 92
 	def __init__(self):
93 93
 		self.stamp_created = time.time()
94 94
 		self.cache = {}
95
-	
95
+		self.total_authors = 0
96
+		self.activity_by_hour_of_day = {} # hour -> commits
97
+		self.activity_by_day_of_week = {} # day -> commits
98
+		self.activity_by_month_of_year = {} # month [1-12] -> commits
99
+		self.activity_by_hour_of_week = {} # weekday -> hour -> commits
100
+		self.activity_by_hour_of_day_busiest = 0
101
+		self.activity_by_hour_of_week_busiest = 0
102
+		self.activity_by_year_week = {} # yy_wNN -> commits
103
+		self.activity_by_year_week_peak = 0
104
+
105
+		self.authors = {} # name -> {commits, first_commit_stamp, last_commit_stamp, last_active_day, active_days, lines_added, lines_removed}
106
+
107
+		self.total_commits = 0
108
+		self.total_files = 0
109
+		self.authors_by_commits = 0
110
+
111
+		# domains
112
+		self.domains = {} # domain -> commits
113
+
114
+		# author of the month
115
+		self.author_of_month = {} # month -> author -> commits
116
+		self.author_of_year = {} # year -> author -> commits
117
+		self.commits_by_month = {} # month -> commits
118
+		self.commits_by_year = {} # year -> commits
119
+		self.first_commit_stamp = 0
120
+		self.last_commit_stamp = 0
121
+		self.last_active_day = None
122
+		self.active_days = set()
123
+
124
+		# lines
125
+		self.total_lines = 0
126
+		self.total_lines_added = 0
127
+		self.total_lines_removed = 0
128
+
129
+		# timezone
130
+		self.commits_by_timezone = {} # timezone -> commits
131
+
132
+		# tags
133
+		self.tags = {}
134
+
135
+		self.files_by_stamp = {} # stamp -> files
136
+
137
+		# extensions
138
+		self.extensions = {} # extension -> files, lines
139
+
140
+		# line statistics
141
+		self.changes_by_date = {} # stamp -> { files, ins, del }
142
+
96 143
 	##
97 144
 	# This should be the main function to extract data from the repository.
98 145
 	def collect(self, dir):
@@ -181,45 +228,12 @@ class GitDataCollector(DataCollector):
181 228
 		DataCollector.collect(self, dir)
182 229
 
183 230
 		try:
184
-			self.total_authors = int(getpipeoutput(['git shortlog -s %s' % getcommitrange(), 'wc -l']))
231
+			self.total_authors += int(getpipeoutput(['git shortlog -s %s' % getcommitrange(), 'wc -l']))
185 232
 		except:
186 233
 			self.total_authors = 0
187 234
 		#self.total_lines = int(getoutput('git-ls-files -z |xargs -0 cat |wc -l'))
188 235
 
189
-		self.activity_by_hour_of_day = {} # hour -> commits
190
-		self.activity_by_day_of_week = {} # day -> commits
191
-		self.activity_by_month_of_year = {} # month [1-12] -> commits
192
-		self.activity_by_hour_of_week = {} # weekday -> hour -> commits
193
-		self.activity_by_hour_of_day_busiest = 0
194
-		self.activity_by_hour_of_week_busiest = 0
195
-		self.activity_by_year_week = {} # yy_wNN -> commits
196
-		self.activity_by_year_week_peak = 0
197
-
198
-		self.authors = {} # name -> {commits, first_commit_stamp, last_commit_stamp, last_active_day, active_days, lines_added, lines_removed}
199
-
200
-		# domains
201
-		self.domains = {} # domain -> commits
202
-
203
-		# author of the month
204
-		self.author_of_month = {} # month -> author -> commits
205
-		self.author_of_year = {} # year -> author -> commits
206
-		self.commits_by_month = {} # month -> commits
207
-		self.commits_by_year = {} # year -> commits
208
-		self.first_commit_stamp = 0
209
-		self.last_commit_stamp = 0
210
-		self.last_active_day = None
211
-		self.active_days = set()
212
-
213
-		# lines
214
-		self.total_lines = 0
215
-		self.total_lines_added = 0
216
-		self.total_lines_removed = 0
217
-
218
-		# timezone
219
-		self.commits_by_timezone = {} # timezone -> commits
220
-
221 236
 		# tags
222
-		self.tags = {}
223 237
 		lines = getpipeoutput(['git show-ref --tags']).split('\n')
224 238
 		for line in lines:
225 239
 			if len(line) == 0:
@@ -252,7 +266,7 @@ class GitDataCollector(DataCollector):
252 266
 				parts = re.split('\s+', line, 2)
253 267
 				commits = int(parts[1])
254 268
 				author = parts[2]
255
-				self.tags[tag]['commits'] += commits
269
+				self.tags[tag]['commits'] = commits
256 270
 				self.tags[tag]['authors'][author] = commits
257 271
 
258 272
 		# Collect revision statistics
@@ -359,7 +373,6 @@ class GitDataCollector(DataCollector):
359 373
 
360 374
 		# TODO Optimize this, it's the worst bottleneck
361 375
 		# outputs "<stamp> <files>" for each revision
362
-		self.files_by_stamp = {} # stamp -> files
363 376
 		revlines = getpipeoutput(['git rev-list --pretty=format:"%%at %%T" %s' % getcommitrange('HEAD'), 'grep -v ^commit']).strip().split('\n')
364 377
 		lines = []
365 378
 		for revline in revlines:
@@ -367,7 +380,7 @@ class GitDataCollector(DataCollector):
367 380
 			linecount = self.getFilesInCommit(rev)
368 381
 			lines.append('%d %d' % (int(time), linecount))
369 382
 
370
-		self.total_commits = len(lines)
383
+		self.total_commits += len(lines)
371 384
 		for line in lines:
372 385
 			parts = line.split(' ')
373 386
 			if len(parts) != 2:
@@ -379,9 +392,8 @@ class GitDataCollector(DataCollector):
379 392
 				print 'Warning: failed to parse line "%s"' % line
380 393
 
381 394
 		# extensions
382
-		self.extensions = {} # extension -> files, lines
383 395
 		lines = getpipeoutput(['git ls-tree -r -z %s' % getcommitrange('HEAD', end_only = True)]).split('\000')
384
-		self.total_files = len(lines)
396
+		self.total_files += len(lines)
385 397
 		for line in lines:
386 398
 			if len(line) == 0:
387 399
 				continue
@@ -498,9 +510,9 @@ class GitDataCollector(DataCollector):
498 510
 	def refine(self):
499 511
 		# authors
500 512
 		# name -> {place_by_commits, commits_frac, date_first, date_last, timedelta}
501
-		authors_by_commits = getkeyssortedbyvaluekey(self.authors, 'commits')
502
-		authors_by_commits.reverse() # most first
503
-		for i, name in enumerate(authors_by_commits):
513
+		self.authors_by_commits = getkeyssortedbyvaluekey(self.authors, 'commits')
514
+		self.authors_by_commits.reverse() # most first
515
+		for i, name in enumerate(self.authors_by_commits):
504 516
 			self.authors[name]['place_by_commits'] = i + 1
505 517
 
506 518
 		for name in self.authors.keys():
@@ -1027,8 +1039,8 @@ class HTMLReportCreator(ReportCreator):
1027 1039
 		tags_sorted_by_date_desc = map(lambda el : el[1], reversed(sorted(map(lambda el : (el[1]['date'], el[0]), data.tags.items()))))
1028 1040
 		for tag in tags_sorted_by_date_desc:
1029 1041
 			authorinfo = []
1030
-			authors_by_commits = getkeyssortedbyvalues(data.tags[tag]['authors'])
1031
-			for i in reversed(authors_by_commits):
1042
+			self.authors_by_commits = getkeyssortedbyvalues(data.tags[tag]['authors'])
1043
+			for i in reversed(self.authors_by_commits):
1032 1044
 				authorinfo.append('%s (%d)' % (i, data.tags[tag]['authors'][i]))
1033 1045
 			f.write('<tr><td>%s</td><td>%s</td><td>%d</td><td>%s</td></tr>' % (tag, data.tags[tag]['date'], data.tags[tag]['commits'], ', '.join(authorinfo)))
1034 1046
 		f.write('</table>')
@@ -1276,7 +1288,7 @@ class GitStats:
1276 1288
 
1277 1289
 		if len(args) < 2:
1278 1290
 			print """
1279
-Usage: gitstats [options] <gitpath> <outputpath>
1291
+Usage: gitstats [options] <gitpath> [<gitpath>] <outputpath>
1280 1292
 
1281 1293
 Options:
1282 1294
 -c key=value     Override configuration value
@@ -1286,8 +1298,7 @@ Default config values:
1286 1298
 """ % conf
1287 1299
 			sys.exit(0)
1288 1300
 
1289
-		gitpath = args[0]
1290
-		outputpath = os.path.abspath(args[1])
1301
+		outputpath = os.path.abspath(args[-1])
1291 1302
 		rundir = os.getcwd()
1292 1303
 
1293 1304
 		try:
@@ -1298,17 +1309,22 @@ Default config values:
1298 1309
 			print 'FATAL: Output path is not a directory or does not exist'
1299 1310
 			sys.exit(1)
1300 1311
 
1301
-		print 'Git path: %s' % gitpath
1302 1312
 		print 'Output path: %s' % outputpath
1303
-
1304
-		os.chdir(gitpath)
1305
-
1306 1313
 		cachefile = os.path.join(outputpath, 'gitstats.cache')
1307 1314
 
1308
-		print 'Collecting data...'
1309 1315
 		data = GitDataCollector()
1310 1316
 		data.loadCache(cachefile)
1311
-		data.collect(gitpath)
1317
+
1318
+		for path in args[0:-1]:
1319
+			gitpath = path
1320
+
1321
+			print 'Git path: %s' % gitpath
1322
+
1323
+			os.chdir(gitpath)
1324
+
1325
+			print 'Collecting data...'
1326
+			data.collect(gitpath)
1327
+
1312 1328
 		print 'Refining data...'
1313 1329
 		data.saveCache(cachefile)
1314 1330
 		data.refine()