瀏覽代碼

Attempt to optimize file counting.

Two executions of 'git-rev-list' were merged and python used instead of sh
snippet. From initial testing this seems to _slow_ things down.
Heikki Hokkanen 18 年之前
父節點
當前提交
c257429c56
共有 2 個文件被更改,包括 16 次插入16 次删除
  1. 0
    2
      doc/TODO.txt
  2. 16
    14
      gitstats

+ 0
- 2
doc/TODO.txt 查看文件

@@ -1,6 +1,4 @@
1 1
 []
2
-- optimization: merge the two git-rev-list commands and use python instead of shell while loop
3
-	- %at %an, remove %H & grep -v
4 2
 - BUG linux-2.6 repository
5 3
 	- git-log --pretty=format:"%at %an" |grep -C3 unknown
6 4
 	- git-rev-list (for number of files in each revision) says "Warning: failed to parse line "<unknown> 17741"

+ 16
- 14
gitstats 查看文件

@@ -128,9 +128,17 @@ class GitDataCollector(DataCollector):
128 128
 			pass
129 129
 
130 130
 		# Collect revision statistics
131
-		# Outputs "<stamp> <author>"
132
-		lines = getoutput('git-rev-list --pretty=format:"%at %an" HEAD |grep -v ^commit').split('\n')
131
+		# "commit <hash>"
132
+		# "<stamp> <author>"
133
+		self.files_by_stamp = {} # stamp -> files
134
+		lines = getoutput('git-rev-list --pretty=format:"%at %an" HEAD').split('\n')
135
+		self.total_commits = len(lines) / 2
136
+		commit = '0'
133 137
 		for line in lines:
138
+			if line[:6] == 'commit':
139
+				commit = line[7:]
140
+				continue
141
+			
134 142
 			# linux-2.6 says "<unknown>" for one line O_o
135 143
 			parts = line.split(' ')
136 144
 			author = ''
@@ -218,20 +226,14 @@ class GitDataCollector(DataCollector):
218 226
 			else:
219 227
 				self.commits_by_year[yy] = 1
220 228
 
221
-		# TODO Optimize this, it's the worst bottleneck
222
-		# outputs "<stamp> <files>" for each revision
223
-		self.files_by_stamp = {} # stamp -> files
224
-		lines = getoutput('git-rev-list --pretty=format:"%at %H" HEAD |grep -v ^commit |while read line; do set $line; echo "$1 $(git-ls-tree -r "$2" |wc -l)"; done').split('\n')
225
-		self.total_commits = len(lines)
226
-		for line in lines:
227
-			parts = line.split(' ')
228
-			if len(parts) != 2:
229
-				continue
230
-			(stamp, files) = parts[0:2]
229
+			# file statistics
230
+			# "<stamp> <files>"
231 231
 			try:
232
-				self.files_by_stamp[int(stamp)] = int(files)
232
+				files = int(getoutput('git-ls-tree -r "%s" |wc -l' % commit, quiet = True))
233 233
 			except ValueError:
234
-				print 'Warning: failed to parse line "%s"' % line
234
+				files = 0
235
+				print 'Warning: failed to collect file statistics for commit "%s"' % commit
236
+			self.files_by_stamp[stamp] = files
235 237
 
236 238
 		# extensions
237 239
 		self.extensions = {} # extension -> files, lines