Bladeren bron

Count lines using git objects instead of filesystem.

This seemed to be roughly three times slower than a direct "wc -l" from working
tree, but this way we are not limited to full & clean working trees, but can
also handle bare repositories.
Heikki Hokkanen 16 jaren geleden
bovenliggende
commit
5ef1ff8721
1 gewijzigde bestanden met toevoegingen van 10 en 7 verwijderingen
  1. 10
    7
      gitstats

+ 10
- 7
gitstats Bestand weergeven

@@ -323,15 +323,19 @@ class GitDataCollector(DataCollector):
323 323
 
324 324
 		# extensions
325 325
 		self.extensions = {} # extension -> files, lines
326
-		lines = getpipeoutput(['git ls-files']).split('\n')
326
+		lines = getpipeoutput(['git ls-tree -r -z HEAD']).split('\000')
327 327
 		self.total_files = len(lines)
328 328
 		for line in lines:
329
-			base = os.path.basename(line)
330
-			# Ignore extensionless (including .hidden files)
331
-			if base.find('.') == -1 or base.rfind('.') == 0:
329
+			if len(line) == 0:
330
+				continue
331
+			parts = re.split('\s+', line, 4)
332
+			sha1 = parts[2]
333
+			filename = parts[3]
334
+
335
+			if filename.find('.') == -1 or filename.rfind('.') == 0:
332 336
 				ext = ''
333 337
 			else:
334
-				ext = base[(base.rfind('.') + 1):]
338
+				ext = filename[(filename.rfind('.') + 1):]
335 339
 			if len(ext) > MAX_EXT_LENGTH:
336 340
 				ext = ''
337 341
 
@@ -340,8 +344,7 @@ class GitDataCollector(DataCollector):
340 344
 
341 345
 			self.extensions[ext]['files'] += 1
342 346
 			try:
343
-				# Escaping could probably be improved here
344
-				self.extensions[ext]['lines'] += int(getpipeoutput(['wc -l "%s"' % line]).split()[0])
347
+				self.extensions[ext]['lines'] += int(getpipeoutput(['git cat-file blob %s' % sha1, 'wc -l']).split()[0])
345 348
 			except:
346 349
 				print 'Warning: Could not count lines for file "%s"' % line
347 350