#!/usr/bin/python # Copyright (c) 2007 Heikki Hokkanen # GPLv2 import commands import datetime import os import re import sys def getoutput(cmd): print '>> %s' % cmd output = commands.getoutput(cmd) return output def getkeyssortedbyvalues(dict): return map(lambda el : el[1], sorted(map(lambda el : (el[1], el[0]), dict.items()))) class DataCollector: def __init__(self): pass ## # This should be the main function to extract data from the repository. def collect(self, dir): self.dir = dir ## # : get a dictionary of author def getAuthorInfo(self, author): return None def getActivityByDayOfWeek(self): return {} def getActivityByHourOfDay(self): return {} ## # Get a list of authors def getAuthors(self): return [] def getFirstCommitDate(self): return datetime.datetime.now() def getLastCommitDate(self): return datetime.datetime.now() def getTags(self): return [] def getTotalAuthors(self): return -1 def getTotalCommits(self): return -1 def getTotalFiles(self): return -1 def getTotalLOC(self): return -1 class GitDataCollector(DataCollector): def collect(self, dir): DataCollector.collect(self, dir) self.total_authors = int(getoutput('git-log |git-shortlog -s |wc -l')) self.total_commits = int(getoutput('git-rev-list HEAD |wc -l')) self.total_files = int(getoutput('git-ls-files |wc -l')) self.total_lines = int(getoutput('git-ls-files |xargs cat |wc -l')) self.activity_by_hour_of_day = {} # hour -> commits self.activity_by_day_of_week = {} # day -> commits # activity lines = getoutput('git-rev-list HEAD --pretty=format:%at |grep -v ^commit').split('\n') for stamp in lines: date = datetime.datetime.fromtimestamp(float(stamp)) # hour hour = date.hour if hour in self.activity_by_hour_of_day: self.activity_by_hour_of_day[hour] += 1 else: self.activity_by_hour_of_day[hour] = 1 # day day = date.weekday() if day in self.activity_by_day_of_week: self.activity_by_day_of_week[day] += 1 else: self.activity_by_day_of_week[day] = 1 # TODO author of the month self.author_of_month = {} # month -> author -> commits self.author_of_year = {} # year -> author -> commits self.commits_by_month = {} # month -> commits self.first_commit_stamp = 0 self.last_commit_stamp = 0 lines = getoutput('git-rev-list --pretty=format:"%at %an" HEAD |grep -v ^commit').split('\n') for line in lines: parts = line.split(' ') stamp = int(parts[0]) author = ' '.join(parts[1:]) if self.last_commit_stamp == 0: self.last_commit_stamp = stamp self.first_commit_stamp = stamp yymm = datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m') if yymm in self.author_of_month: if author in self.author_of_month[yymm]: self.author_of_month[yymm][author] += 1 else: self.author_of_month[yymm][author] = 1 else: self.author_of_month[yymm] = {} self.author_of_month[yymm][author] = 1 if yymm in self.commits_by_month: self.commits_by_month[yymm] += 1 else: self.commits_by_month[yymm] = 1 yy = datetime.datetime.fromtimestamp(stamp).year if yy in self.author_of_year: if author in self.author_of_year[yy]: self.author_of_year[yy][author] += 1 else: self.author_of_year[yy][author] = 1 else: self.author_of_year[yy] = {} self.author_of_year[yy][author] = 1 def getActivityByDayOfWeek(self): return self.activity_by_day_of_week def getActivityByHourOfDay(self): return self.activity_by_hour_of_day def getAuthorInfo(self, author): commits = int(getoutput('git-rev-list HEAD --author="%s" |wc -l' % author)) commits_frac = (100 * float(commits)) / self.getTotalCommits() date_first = '0000-00-00' date_last = '0000-00-00' rev_last = getoutput('git-rev-list --all --author="%s" -n 1' % author) rev_first = getoutput('git-rev-list --all --author="%s" |tail -n 1' % author) date_first = self.revToDate(rev_first) date_last = self.revToDate(rev_last) res = { 'commits': commits, 'commits_frac': commits_frac, 'date_first': date_first, 'date_last': date_last } return res def getAuthors(self): lines = getoutput('git-rev-list --all --pretty=format:%an |grep -v ^commit |sort |uniq') return lines.split('\n') def getFirstCommitDate(self): return datetime.datetime.fromtimestamp(self.first_commit_stamp) def getLastCommitDate(self): return datetime.datetime.fromtimestamp(self.last_commit_stamp) def getTags(self): lines = getoutput('git-show-ref --tags |cut -d/ -f3') return lines.split('\n') def getTagDate(self, tag): return self.revToDate('tags/' + tag) def getTotalAuthors(self): return self.total_authors def getTotalCommits(self): return self.total_commits def getTotalFiles(self): return self.total_files def getTotalLOC(self): return self.total_lines def revToDate(self, rev): stamp = int(getoutput('git-log --pretty=format:%%at "%s" -n 1' % rev)) return datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d') class ReportCreator: def __init__(self): pass def create(self, data, path): self.data = data self.path = path class HTMLReportCreator(ReportCreator): def create(self, data, path): ReportCreator.create(self, data, path) f = open(path + "/index.html", 'w') format = '%Y-%m-%d %H:%m:%S' self.printHeader(f) f.write('

StatGit

') f.write('
'); f.write('
Generated
%s
' % datetime.datetime.now().strftime(format)); f.write('
Report Period
%s to %s
' % (data.getFirstCommitDate().strftime(format), data.getLastCommitDate().strftime(format))) f.write('
Total Files
%s
' % data.getTotalFiles()) f.write('
Total Lines of Code
%s
' % data.getTotalLOC()) f.write('
Total Commits
%s
' % data.getTotalCommits()) f.write('
Authors
%s
' % data.getTotalAuthors()) f.write('
'); f.write(""" """) f.write('

Tags

') f.write('') f.write('') for tag in data.getTags(): f.write('' % tag) f.write('
NameDateDevelopers
%s
') f.write('\n'); f.close() # activity.html f = open(path + '/activity.html', 'w') self.printHeader(f) f.write('

Activity

') f.write('

Last 30 days

') f.write('

Last 12 months

') f.write('\n

Hour of Day

\n\n') hour_of_day = data.getActivityByHourOfDay() f.write('') for i in range(1, 25): f.write('' % i) f.write('\n') for i in range(0, 24): if i in hour_of_day: f.write('' % hour_of_day[i]) else: f.write('') f.write('\n') totalcommits = data.getTotalCommits() for i in range(0, 24): if i in hour_of_day: f.write('' % ((100.0 * hour_of_day[i]) / totalcommits)) else: f.write('') f.write('
Hour%d
Commits%d0
%%.2f0.00
') ### Day of Week # TODO show also by hour of weekday? f.write('\n

Day of Week

\n\n') day_of_week = data.getActivityByDayOfWeek() f.write('') f.write('') for d in range(0, 7): f.write('') f.write('' % (d + 1)) if d in day_of_week: f.write('' % (day_of_week[d], (100.0 * day_of_week[d]) / totalcommits)) else: f.write('') f.write('') f.write('
DayTotal (%)
%d%d (%.2f%%)0
') f.close() # authors.html f = open(path + '/authors.html', 'w') self.printHeader(f) f.write('

Authors

') f.write('\n

List of authors

\n\n') f.write('') f.write('') for author in data.getAuthors(): info = data.getAuthorInfo(author) f.write('' % (author, info['commits'], info['commits_frac'], info['date_first'], info['date_last'])) f.write('
AuthorCommits (%)First commitLast commit
%s%d (%.2f%%)%s%s
') f.write('\n

Author of Month

\n\n') f.write('') f.write('') for yymm in reversed(sorted(data.author_of_month.keys())): authordict = data.author_of_month[yymm] authors = getkeyssortedbyvalues(authordict) authors.reverse() commits = data.author_of_month[yymm][authors[0]] f.write('' % (yymm, authors[0], commits, (100 * commits) / data.commits_by_month[yymm], data.commits_by_month[yymm])) f.write('
MonthAuthorCommits (%)
%s%s%d (%.2f%% of %d)
') f.write('\n

Author of Year

\n\n') f.write('') for yy in reversed(sorted(data.author_of_year.keys())): authordict = data.author_of_year[yy] authors = getkeyssortedbyvalues(authordict) authors.reverse() commits = data.author_of_year[yy][authors[0]] f.write('' % (yy, authors[0], commits)) f.write('
YearAuthorCommits (%)
%s%s%d
') f.write('') f.close() pass def printHeader(self, f): f.write(""" StatGit """) usage = """ Usage: statgit [options] Options: -o html """ if len(sys.argv) < 3: print usage sys.exit(0) gitpath = sys.argv[1] outputpath = sys.argv[2] print 'Git path: %s' % gitpath print 'Output path: %s' % outputpath os.chdir(gitpath) print 'Collecting data...' data = GitDataCollector() data.collect(gitpath) print 'Generating report...' report = HTMLReportCreator() report.create(data, outputpath)