| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448 |
- #!/usr/bin/python
- # Copyright (c) 2007 Heikki Hokkanen <hoxu@users.sf.net>
- # GPLv2
- import commands
- import datetime
- import os
- import re
- import sys
-
- def getoutput(cmd):
- print '>> %s' % cmd
- output = commands.getoutput(cmd)
- return output
-
- def getkeyssortedbyvalues(dict):
- return map(lambda el : el[1], sorted(map(lambda el : (el[1], el[0]), dict.items())))
-
- # TODO getdictkeyssortedbyvaluekey(dict, key) - eg. dict['author'] = { 'commits' : 512 } - ...key(dict, 'commits')
-
- class DataCollector:
- def __init__(self):
- pass
-
- ##
- # This should be the main function to extract data from the repository.
- def collect(self, dir):
- self.dir = dir
-
- ##
- # : get a dictionary of author
- def getAuthorInfo(self, author):
- return None
-
- def getActivityByDayOfWeek(self):
- return {}
-
- def getActivityByHourOfDay(self):
- return {}
-
- ##
- # Get a list of authors
- def getAuthors(self):
- return []
-
- def getFirstCommitDate(self):
- return datetime.datetime.now()
-
- def getLastCommitDate(self):
- return datetime.datetime.now()
-
- def getTags(self):
- return []
-
- def getTotalAuthors(self):
- return -1
-
- def getTotalCommits(self):
- return -1
-
- def getTotalFiles(self):
- return -1
-
- def getTotalLOC(self):
- return -1
-
- class GitDataCollector(DataCollector):
- def collect(self, dir):
- DataCollector.collect(self, dir)
-
- self.total_authors = int(getoutput('git-log |git-shortlog -s |wc -l'))
- self.total_commits = int(getoutput('git-rev-list HEAD |wc -l'))
- self.total_files = int(getoutput('git-ls-files |wc -l'))
- self.total_lines = int(getoutput('git-ls-files |xargs cat |wc -l'))
-
- self.activity_by_hour_of_day = {} # hour -> commits
- self.activity_by_day_of_week = {} # day -> commits
-
- self.authors = {} # name -> {commits, first_commit_stamp, last_commit_stamp}
-
- # author of the month
- self.author_of_month = {} # month -> author -> commits
- self.author_of_year = {} # year -> author -> commits
- self.commits_by_month = {} # month -> commits
- self.commits_by_year = {} # year -> commits
- self.first_commit_stamp = 0
- self.last_commit_stamp = 0
-
- # tags
- self.tags = {}
- lines = getoutput('git-show-ref --tags').split('\n')
- for line in lines:
- (hash, tag) = line.split(' ')
- tag = tag.replace('refs/tags/', '')
- output = getoutput('git-log "%s" --pretty=format:"%%at %%an" -n 1' % hash)
- if len(output) > 0:
- parts = output.split(' ')
- stamp = 0
- try:
- stamp = int(parts[0])
- except ValueError:
- stamp = 0
- self.tags[tag] = { 'stamp': stamp, 'hash' : hash, 'date' : datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d') }
- pass
-
- # TODO also collect statistics for "last 30 days"/"last 12 months"
- lines = getoutput('git-rev-list --pretty=format:"%at %an" HEAD |grep -v ^commit').split('\n')
- for line in lines:
- # linux-2.6 says "<unknown>" for one line O_o
- parts = line.split(' ')
- author = ''
- try:
- stamp = int(parts[0])
- except ValueError:
- stamp = 0
- if len(parts) > 1:
- author = ' '.join(parts[1:])
- date = datetime.datetime.fromtimestamp(float(stamp))
-
- # First and last commit stamp
- if self.last_commit_stamp == 0:
- self.last_commit_stamp = stamp
- self.first_commit_stamp = stamp
-
- # activity
- # hour
- hour = date.hour
- if hour in self.activity_by_hour_of_day:
- self.activity_by_hour_of_day[hour] += 1
- else:
- self.activity_by_hour_of_day[hour] = 1
-
- # day
- day = date.weekday()
- if day in self.activity_by_day_of_week:
- self.activity_by_day_of_week[day] += 1
- else:
- self.activity_by_day_of_week[day] = 1
-
- # author stats
- if author not in self.authors:
- self.authors[author] = {}
- # TODO commits
- if 'last_commit_stamp' not in self.authors[author]:
- self.authors[author]['last_commit_stamp'] = stamp
- self.authors[author]['first_commit_stamp'] = stamp
- if 'commits' in self.authors[author]:
- self.authors[author]['commits'] += 1
- else:
- self.authors[author]['commits'] = 1
-
- # author of the month/year
- yymm = datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m')
- if yymm in self.author_of_month:
- if author in self.author_of_month[yymm]:
- self.author_of_month[yymm][author] += 1
- else:
- self.author_of_month[yymm][author] = 1
- else:
- self.author_of_month[yymm] = {}
- self.author_of_month[yymm][author] = 1
- if yymm in self.commits_by_month:
- self.commits_by_month[yymm] += 1
- else:
- self.commits_by_month[yymm] = 1
-
- yy = datetime.datetime.fromtimestamp(stamp).year
- if yy in self.author_of_year:
- if author in self.author_of_year[yy]:
- self.author_of_year[yy][author] += 1
- else:
- self.author_of_year[yy][author] = 1
- else:
- self.author_of_year[yy] = {}
- self.author_of_year[yy][author] = 1
- if yy in self.commits_by_year:
- self.commits_by_year[yy] += 1
- else:
- self.commits_by_year[yy] = 1
-
- def getActivityByDayOfWeek(self):
- return self.activity_by_day_of_week
-
- def getActivityByHourOfDay(self):
- return self.activity_by_hour_of_day
-
- def getAuthorInfo(self, author):
- a = self.authors[author]
-
- commits = a['commits']
- commits_frac = (100 * float(commits)) / self.getTotalCommits()
- date_first = datetime.datetime.fromtimestamp(a['first_commit_stamp']).strftime('%Y-%m-%d')
- date_last = datetime.datetime.fromtimestamp(a['last_commit_stamp']).strftime('%Y-%m-%d')
-
- res = { 'commits': commits, 'commits_frac': commits_frac, 'date_first': date_first, 'date_last': date_last }
- return res
-
- def getAuthors(self):
- return self.authors.keys()
-
- def getFirstCommitDate(self):
- return datetime.datetime.fromtimestamp(self.first_commit_stamp)
-
- def getLastCommitDate(self):
- return datetime.datetime.fromtimestamp(self.last_commit_stamp)
-
- def getTags(self):
- lines = getoutput('git-show-ref --tags |cut -d/ -f3')
- return lines.split('\n')
-
- def getTagDate(self, tag):
- return self.revToDate('tags/' + tag)
-
- def getTotalAuthors(self):
- return self.total_authors
-
- def getTotalCommits(self):
- return self.total_commits
-
- def getTotalFiles(self):
- return self.total_files
-
- def getTotalLOC(self):
- return self.total_lines
-
- def revToDate(self, rev):
- stamp = int(getoutput('git-log --pretty=format:%%at "%s" -n 1' % rev))
- return datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d')
-
- class ReportCreator:
- def __init__(self):
- pass
-
- def create(self, data, path):
- self.data = data
- self.path = path
-
- class HTMLReportCreator(ReportCreator):
- def create(self, data, path):
- ReportCreator.create(self, data, path)
-
- f = open(path + "/index.html", 'w')
- format = '%Y-%m-%d %H:%m:%S'
- self.printHeader(f)
-
- f.write('<h1>StatGit</h1>')
-
- self.printNav(f)
-
- f.write('<dl>');
- f.write('<dt>Generated</dt><dd>%s</dd>' % datetime.datetime.now().strftime(format));
- f.write('<dt>Report Period</dt><dd>%s to %s</dd>' % (data.getFirstCommitDate().strftime(format), data.getLastCommitDate().strftime(format)))
- f.write('<dt>Total Files</dt><dd>%s</dd>' % data.getTotalFiles())
- f.write('<dt>Total Lines of Code</dt><dd>%s</dd>' % data.getTotalLOC())
- f.write('<dt>Total Commits</dt><dd>%s</dd>' % data.getTotalCommits())
- f.write('<dt>Authors</dt><dd>%s</dd>' % data.getTotalAuthors())
- f.write('</dl>');
-
- f.write('</body>\n</html>');
- f.close()
-
- ###
- # Activity
- f = open(path + '/activity.html', 'w')
- self.printHeader(f)
- f.write('<h1>Activity</h1>')
- self.printNav(f)
-
- f.write('<h2>Last 30 days</h2>')
-
- f.write('<h2>Last 12 months</h2>')
-
- # Hour of Day
- f.write('\n<h2>Hour of Day</h2>\n\n')
- hour_of_day = data.getActivityByHourOfDay()
- f.write('<table><tr><th>Hour</th>')
- for i in range(1, 25):
- f.write('<th>%d</th>' % i)
- f.write('</tr>\n<tr><th>Commits</th>')
- fp = open(path + '/hour_of_day.dat', 'w')
- for i in range(0, 24):
- if i in hour_of_day:
- f.write('<td>%d</td>' % hour_of_day[i])
- fp.write('%d %d\n' % (i, hour_of_day[i]))
- else:
- f.write('<td>0</td>')
- fp.write('%d 0\n' % i)
- fp.close()
- f.write('</tr>\n<tr><th>%</th>')
- totalcommits = data.getTotalCommits()
- for i in range(0, 24):
- if i in hour_of_day:
- f.write('<td>%.2f</td>' % ((100.0 * hour_of_day[i]) / totalcommits))
- else:
- f.write('<td>0.00</td>')
- f.write('</tr></table>')
-
- # Day of Week
- # TODO show also by hour of weekday?
- f.write('\n<h2>Day of Week</h2>\n\n')
- day_of_week = data.getActivityByDayOfWeek()
- f.write('<table>')
- f.write('<tr><th>Day</th><th>Total (%)</th></tr>')
- fp = open(path + '/day_of_week.dat', 'w')
- for d in range(0, 7):
- fp.write('%d %d\n' % (d + 1, day_of_week[d]))
- f.write('<tr>')
- f.write('<th>%d</th>' % (d + 1))
- if d in day_of_week:
- f.write('<td>%d (%.2f%%)</td>' % (day_of_week[d], (100.0 * day_of_week[d]) / totalcommits))
- else:
- f.write('<td>0</td>')
- f.write('</tr>')
- f.write('</table>')
- fp.close()
-
- # Commits by year/month
- f.write('<h2>Commits by year/month</h2>')
- f.write('<table><tr><th>Month</th><th>Commits</th></tr>')
- for yymm in reversed(sorted(data.commits_by_month.keys())):
- f.write('<tr><td>%s</td><td>%d</td></tr>' % (yymm, data.commits_by_month[yymm]))
- f.write('</table>')
-
- # Commits by year
- f.write('<h2>Commits by year</h2>')
- f.write('<table><tr><th>Year</th><th>Commits</th></tr>')
- for yy in reversed(sorted(data.commits_by_year.keys())):
- f.write('<tr><td>%s</td><td>%d</td></tr>' % (yy, data.commits_by_year[yy]))
- f.write('</table>')
-
- f.close()
-
- ###
- # Authors
- f = open(path + '/authors.html', 'w')
- self.printHeader(f)
-
- f.write('<h1>Authors</h1>')
- self.printNav(f)
-
- f.write('\n<h2>List of authors</h2>\n\n')
-
- f.write('<table class="authors">')
- f.write('<tr><th>Author</th><th>Commits (%)</th><th>First commit</th><th>Last commit</th></tr>')
- for author in sorted(data.getAuthors()):
- info = data.getAuthorInfo(author)
- f.write('<tr><td>%s</td><td>%d (%.2f%%)</td><td>%s</td><td>%s</td></tr>' % (author, info['commits'], info['commits_frac'], info['date_first'], info['date_last']))
- f.write('</table>')
-
- f.write('\n<h2>Author of Month</h2>\n\n')
- f.write('<table>')
- f.write('<tr><th>Month</th><th>Author</th><th>Commits (%)</th></tr>')
- for yymm in reversed(sorted(data.author_of_month.keys())):
- authordict = data.author_of_month[yymm]
- authors = getkeyssortedbyvalues(authordict)
- authors.reverse()
- commits = data.author_of_month[yymm][authors[0]]
- f.write('<tr><td>%s</td><td>%s</td><td>%d (%.2f%% of %d)</td></tr>' % (yymm, authors[0], commits, (100 * commits) / data.commits_by_month[yymm], data.commits_by_month[yymm]))
-
- f.write('</table>')
-
- f.write('\n<h2>Author of Year</h2>\n\n')
- f.write('<table><tr><th>Year</th><th>Author</th><th>Commits (%)</th></tr>')
- for yy in reversed(sorted(data.author_of_year.keys())):
- authordict = data.author_of_year[yy]
- authors = getkeyssortedbyvalues(authordict)
- authors.reverse()
- commits = data.author_of_year[yy][authors[0]]
- f.write('<tr><td>%s</td><td>%s</td><td>%d (%.2f%% of %d)</td></tr>' % (yy, authors[0], commits, (100 * commits) / data.commits_by_year[yy], data.commits_by_year[yy]))
- f.write('</table>')
-
- f.write('</body></html>')
- f.close()
-
- ###
- # tags.html
- f = open(path + '/tags.html', 'w')
- self.printHeader(f)
- f.write('<h1>Tags</h1>')
- self.printNav(f)
-
- f.write('<dl>')
- f.write('<dt>Total tags</dt><dd>%d</dd>' % len(data.tags))
- f.write('<dt>Average commits per tag</dt><dd>%.2f</dd>' % (data.getTotalCommits() / len(data.tags)))
- f.write('</dl>')
-
- f.write('<table>')
- f.write('<tr><th>Name</th><th>Date</th></tr>')
- # sort the tags by date desc
- tags_sorted_by_date_desc = map(lambda el : el[1], reversed(sorted(map(lambda el : (el[1]['date'], el[0]), data.tags.items()))))
- for tag in tags_sorted_by_date_desc:
- f.write('<tr><td>%s</td><td>%s</td></tr>' % (tag, data.tags[tag]['date']))
- f.write('</table>')
-
- f.write('</body></html>')
- f.close()
- pass
-
- def printHeader(self, f):
- f.write("""<html>
- <head>
- <title>StatGit</title>
- <link rel="stylesheet" href="statgit.css" type="text/css" />
- </head>
- <body>
- """)
-
- def printNav(self, f):
- f.write("""
- <div class="nav">
- <li><a href="index.html">General</a></li>
- <li><a href="activity.html">Activity</a></li>
- <li><a href="authors.html">Authors</a></li>
- <li><a href="files.html">Files</a></li>
- <li><a href="lines.html">Lines</a></li>
- <li><a href="tags.html">Tags</a></li>
- </ul>
- </div>
- """)
-
-
- usage = """
- Usage: statgit [options] <gitpath> <outputpath>
-
- Options:
- -o html
- """
-
- if len(sys.argv) < 3:
- print usage
- sys.exit(0)
-
- gitpath = sys.argv[1]
- outputpath = os.path.abspath(sys.argv[2])
-
- print 'Git path: %s' % gitpath
- print 'Output path: %s' % outputpath
-
- os.chdir(gitpath)
-
- print 'Collecting data...'
- data = GitDataCollector()
- data.collect(gitpath)
-
- print 'Generating report...'
- report = HTMLReportCreator()
- report.create(data, outputpath)
-
|