#!/usr/bin/python # Copyright (c) 2007 Heikki Hokkanen # GPLv2 import commands import datetime import glob import os import re import sys import time GNUPLOT_COMMON = 'set terminal png transparent\nset size 0.5,0.5\n' def getoutput(cmd): print '>> %s' % cmd output = commands.getoutput(cmd) return output def getkeyssortedbyvalues(dict): return map(lambda el : el[1], sorted(map(lambda el : (el[1], el[0]), dict.items()))) # TODO getdictkeyssortedbyvaluekey(dict, key) - eg. dict['author'] = { 'commits' : 512 } - ...key(dict, 'commits') class DataCollector: def __init__(self): self.stamp_created = time.time() pass ## # This should be the main function to extract data from the repository. def collect(self, dir): self.dir = dir ## # : get a dictionary of author def getAuthorInfo(self, author): return None def getActivityByDayOfWeek(self): return {} def getActivityByHourOfDay(self): return {} ## # Get a list of authors def getAuthors(self): return [] def getFirstCommitDate(self): return datetime.datetime.now() def getLastCommitDate(self): return datetime.datetime.now() def getStampCreated(self): return self.stamp_created def getTags(self): return [] def getTotalAuthors(self): return -1 def getTotalCommits(self): return -1 def getTotalFiles(self): return -1 def getTotalLOC(self): return -1 class GitDataCollector(DataCollector): def collect(self, dir): DataCollector.collect(self, dir) self.total_authors = int(getoutput('git-log |git-shortlog -s |wc -l')) self.total_commits = int(getoutput('git-rev-list HEAD |wc -l')) self.total_files = int(getoutput('git-ls-files |wc -l')) self.total_lines = int(getoutput('git-ls-files -z |xargs -0 cat |wc -l')) self.activity_by_hour_of_day = {} # hour -> commits self.activity_by_day_of_week = {} # day -> commits self.activity_by_month_of_year = {} # month [1-12] -> commits self.authors = {} # name -> {commits, first_commit_stamp, last_commit_stamp} # author of the month self.author_of_month = {} # month -> author -> commits self.author_of_year = {} # year -> author -> commits self.commits_by_month = {} # month -> commits self.commits_by_year = {} # year -> commits self.first_commit_stamp = 0 self.last_commit_stamp = 0 # tags self.tags = {} lines = getoutput('git-show-ref --tags').split('\n') for line in lines: if len(line) == 0: continue (hash, tag) = line.split(' ') tag = tag.replace('refs/tags/', '') output = getoutput('git-log "%s" --pretty=format:"%%at %%an" -n 1' % hash) if len(output) > 0: parts = output.split(' ') stamp = 0 try: stamp = int(parts[0]) except ValueError: stamp = 0 self.tags[tag] = { 'stamp': stamp, 'hash' : hash, 'date' : datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d') } pass # TODO also collect statistics for "last 30 days"/"last 12 months" lines = getoutput('git-rev-list --pretty=format:"%at %an" HEAD |grep -v ^commit').split('\n') for line in lines: # linux-2.6 says "" for one line O_o parts = line.split(' ') author = '' try: stamp = int(parts[0]) except ValueError: stamp = 0 if len(parts) > 1: author = ' '.join(parts[1:]) date = datetime.datetime.fromtimestamp(float(stamp)) # First and last commit stamp if self.last_commit_stamp == 0: self.last_commit_stamp = stamp self.first_commit_stamp = stamp # activity # hour hour = date.hour if hour in self.activity_by_hour_of_day: self.activity_by_hour_of_day[hour] += 1 else: self.activity_by_hour_of_day[hour] = 1 # day day = date.weekday() if day in self.activity_by_day_of_week: self.activity_by_day_of_week[day] += 1 else: self.activity_by_day_of_week[day] = 1 # month of year month = date.month if month in self.activity_by_month_of_year: self.activity_by_month_of_year[month] += 1 else: self.activity_by_month_of_year[month] = 1 # author stats if author not in self.authors: self.authors[author] = {} # TODO commits if 'last_commit_stamp' not in self.authors[author]: self.authors[author]['last_commit_stamp'] = stamp self.authors[author]['first_commit_stamp'] = stamp if 'commits' in self.authors[author]: self.authors[author]['commits'] += 1 else: self.authors[author]['commits'] = 1 # author of the month/year yymm = datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m') if yymm in self.author_of_month: if author in self.author_of_month[yymm]: self.author_of_month[yymm][author] += 1 else: self.author_of_month[yymm][author] = 1 else: self.author_of_month[yymm] = {} self.author_of_month[yymm][author] = 1 if yymm in self.commits_by_month: self.commits_by_month[yymm] += 1 else: self.commits_by_month[yymm] = 1 yy = datetime.datetime.fromtimestamp(stamp).year if yy in self.author_of_year: if author in self.author_of_year[yy]: self.author_of_year[yy][author] += 1 else: self.author_of_year[yy][author] = 1 else: self.author_of_year[yy] = {} self.author_of_year[yy][author] = 1 if yy in self.commits_by_year: self.commits_by_year[yy] += 1 else: self.commits_by_year[yy] = 1 # outputs " " for each revision self.files_by_stamp = {} # stamp -> files lines = getoutput('git-rev-list --pretty=format:"%at %H" HEAD |grep -v ^commit |while read line; do set $line; echo "$1 $(git-ls-tree -r "$2" |wc -l)"; done').split('\n') for line in lines: parts = line.split(' ') if len(parts) != 2: continue (stamp, files) = parts[0:2] self.files_by_stamp[int(stamp)] = int(files) def getActivityByDayOfWeek(self): return self.activity_by_day_of_week def getActivityByHourOfDay(self): return self.activity_by_hour_of_day def getAuthorInfo(self, author): a = self.authors[author] commits = a['commits'] commits_frac = (100 * float(commits)) / self.getTotalCommits() date_first = datetime.datetime.fromtimestamp(a['first_commit_stamp']).strftime('%Y-%m-%d') date_last = datetime.datetime.fromtimestamp(a['last_commit_stamp']).strftime('%Y-%m-%d') res = { 'commits': commits, 'commits_frac': commits_frac, 'date_first': date_first, 'date_last': date_last } return res def getAuthors(self): return self.authors.keys() def getFirstCommitDate(self): return datetime.datetime.fromtimestamp(self.first_commit_stamp) def getLastCommitDate(self): return datetime.datetime.fromtimestamp(self.last_commit_stamp) def getTags(self): lines = getoutput('git-show-ref --tags |cut -d/ -f3') return lines.split('\n') def getTagDate(self, tag): return self.revToDate('tags/' + tag) def getTotalAuthors(self): return self.total_authors def getTotalCommits(self): return self.total_commits def getTotalFiles(self): return self.total_files def getTotalLOC(self): return self.total_lines def revToDate(self, rev): stamp = int(getoutput('git-log --pretty=format:%%at "%s" -n 1' % rev)) return datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d') class ReportCreator: def __init__(self): pass def create(self, data, path): self.data = data self.path = path class HTMLReportCreator(ReportCreator): def create(self, data, path): ReportCreator.create(self, data, path) f = open(path + "/index.html", 'w') format = '%Y-%m-%d %H:%m:%S' self.printHeader(f) f.write('

StatGit

') self.printNav(f) f.write('
'); f.write('
Generated
%s (in %d seconds)
' % (datetime.datetime.now().strftime(format), time.time() - data.getStampCreated())); f.write('
Report Period
%s to %s
' % (data.getFirstCommitDate().strftime(format), data.getLastCommitDate().strftime(format))) f.write('
Total Files
%s
' % data.getTotalFiles()) f.write('
Total Lines of Code
%s
' % data.getTotalLOC()) f.write('
Total Commits
%s
' % data.getTotalCommits()) f.write('
Authors
%s
' % data.getTotalAuthors()) f.write('
'); f.write('\n'); f.close() ### # Activity f = open(path + '/activity.html', 'w') self.printHeader(f) f.write('

Activity

') self.printNav(f) f.write('

Last 30 days

') f.write('

Last 12 months

') # Hour of Day f.write('\n

Hour of Day

\n\n') hour_of_day = data.getActivityByHourOfDay() f.write('') for i in range(1, 25): f.write('' % i) f.write('\n') fp = open(path + '/hour_of_day.dat', 'w') for i in range(0, 24): if i in hour_of_day: f.write('' % hour_of_day[i]) fp.write('%d %d\n' % (i, hour_of_day[i])) else: f.write('') fp.write('%d 0\n' % i) fp.close() f.write('\n') totalcommits = data.getTotalCommits() for i in range(0, 24): if i in hour_of_day: f.write('' % ((100.0 * hour_of_day[i]) / totalcommits)) else: f.write('') f.write('
Hour%d
Commits%d0
%%.2f0.00
') f.write('') fg = open(path + '/hour_of_day.dat', 'w') for i in range(0, 24): if i in hour_of_day: fg.write('%d %d\n' % (i + 1, hour_of_day[i])) else: fg.write('%d 0\n' % (i + 1)) fg.close() # Day of Week # TODO show also by hour of weekday? f.write('\n

Day of Week

\n\n') day_of_week = data.getActivityByDayOfWeek() f.write('
') f.write('') fp = open(path + '/day_of_week.dat', 'w') for d in range(0, 7): fp.write('%d %d\n' % (d + 1, day_of_week[d])) f.write('') f.write('' % (d + 1)) if d in day_of_week: f.write('' % (day_of_week[d], (100.0 * day_of_week[d]) / totalcommits)) else: f.write('') f.write('') f.write('
DayTotal (%)
%d%d (%.2f%%)0
') f.write('') fp.close() # Month of Year f.write('\n

Month of Year

\n\n') f.write('
') f.write('') fp = open (path + '/month_of_year.dat', 'w') for mm in range(1, 13): commits = 0 if mm in data.activity_by_month_of_year: commits = data.activity_by_month_of_year[mm] f.write('' % (mm, commits, (100.0 * commits) / data.getTotalCommits())) fp.write('%d %d\n' % (mm, commits)) fp.close() f.write('
MonthCommits (%)
%d%d (%.2f %%)
') f.write('') # Commits by year/month f.write('

Commits by year/month

') f.write('
') for yymm in reversed(sorted(data.commits_by_month.keys())): f.write('' % (yymm, data.commits_by_month[yymm])) f.write('
MonthCommits
%s%d
') f.write('') fg = open(path + '/commits_by_year_month.dat', 'w') for yymm in sorted(data.commits_by_month.keys()): fg.write('%s %s\n' % (yymm, data.commits_by_month[yymm])) fg.close() # Commits by year f.write('

Commits by year

') f.write('
') for yy in reversed(sorted(data.commits_by_year.keys())): f.write('' % (yy, data.commits_by_year[yy], (100.0 * data.commits_by_year[yy]) / data.getTotalCommits())) f.write('
YearCommits (% of all)
%s%d (%.2f%%)
') f.write('') fg = open(path + '/commits_by_year.dat', 'w') for yy in sorted(data.commits_by_year.keys()): fg.write('%d %d\n' % (yy, data.commits_by_year[yy])) fg.close() f.write('') f.close() ### # Authors f = open(path + '/authors.html', 'w') self.printHeader(f) f.write('

Authors

') self.printNav(f) f.write('\n

List of authors

\n\n') f.write('') f.write('') for author in sorted(data.getAuthors()): info = data.getAuthorInfo(author) f.write('' % (author, info['commits'], info['commits_frac'], info['date_first'], info['date_last'])) f.write('
AuthorCommits (%)First commitLast commit
%s%d (%.2f%%)%s%s
') f.write('\n

Author of Month

\n\n') f.write('') f.write('') for yymm in reversed(sorted(data.author_of_month.keys())): authordict = data.author_of_month[yymm] authors = getkeyssortedbyvalues(authordict) authors.reverse() commits = data.author_of_month[yymm][authors[0]] f.write('' % (yymm, authors[0], commits, (100 * commits) / data.commits_by_month[yymm], data.commits_by_month[yymm])) f.write('
MonthAuthorCommits (%)
%s%s%d (%.2f%% of %d)
') f.write('\n

Author of Year

\n\n') f.write('') for yy in reversed(sorted(data.author_of_year.keys())): authordict = data.author_of_year[yy] authors = getkeyssortedbyvalues(authordict) authors.reverse() commits = data.author_of_year[yy][authors[0]] f.write('' % (yy, authors[0], commits, (100 * commits) / data.commits_by_year[yy], data.commits_by_year[yy])) f.write('
YearAuthorCommits (%)
%s%s%d (%.2f%% of %d)
') f.write('') f.close() ### # Files f = open(path + '/files.html', 'w') self.printHeader(f) f.write('

Files

') self.printNav(f) f.write('
\n') f.write('
Total files
%d
' % data.getTotalFiles()) f.write('
Total lines
%d
' % data.getTotalLOC()) f.write('
Average file size
%.2f bytes
' % ((100.0 * data.getTotalLOC()) / data.getTotalFiles())) f.write('
\n') f.write('

File count by date

') fg = open(path + '/files_by_date.dat', 'w') for stamp in sorted(data.files_by_stamp.keys()): fg.write('%s %d\n' % (datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d'), data.files_by_stamp[stamp])) fg.close() f.write('') f.write('

Average file size by date

') f.write('') f.close() ### # tags.html f = open(path + '/tags.html', 'w') self.printHeader(f) f.write('

Tags

') self.printNav(f) f.write('
') f.write('
Total tags
%d
' % len(data.tags)) if len(data.tags) > 0: f.write('
Average commits per tag
%.2f
' % (data.getTotalCommits() / len(data.tags))) f.write('
') f.write('') f.write('') # sort the tags by date desc tags_sorted_by_date_desc = map(lambda el : el[1], reversed(sorted(map(lambda el : (el[1]['date'], el[0]), data.tags.items())))) for tag in tags_sorted_by_date_desc: f.write('' % (tag, data.tags[tag]['date'])) f.write('
NameDate
%s%s
') f.write('') f.close() self.createGraphs(path) pass def createGraphs(self, path): print 'Generating graphs...' # hour of day f = open(path + '/hour_of_day.plot', 'w') f.write(GNUPLOT_COMMON) f.write( """ set output 'hour_of_day.png' unset key set xrange [0.5:24.5] set xtics 4 set ylabel "Commits" plot 'hour_of_day.dat' using 1:2:(0.5) w boxes fs solid """) f.close() # day of week f = open(path + '/day_of_week.plot', 'w') f.write(GNUPLOT_COMMON) f.write( """ set output 'day_of_week.png' unset key set xrange [0.5:7.5] set xtics 1 set ylabel "Commits" plot 'day_of_week.dat' using 1:2:(0.5) w boxes fs solid """) f.close() # Month of Year f = open(path + '/month_of_year.plot', 'w') f.write(GNUPLOT_COMMON) f.write( """ set output 'month_of_year.png' unset key set xrange [0.5:12.5] set xtics 1 set ylabel "Commits" plot 'month_of_year.dat' using 1:2:(0.5) w boxes fs solid """) f.close() # commits_by_year_month f = open(path + '/commits_by_year_month.plot', 'w') f.write(GNUPLOT_COMMON) f.write( # TODO rotate xtic labels by 90 degrees """ set output 'commits_by_year_month.png' unset key set xdata time set timefmt "%Y-%m" set format x "%Y-%m" set xtics 15768000 set ylabel "Commits" plot 'commits_by_year_month.dat' using 1:2:(0.5) w boxes fs solid """) f.close() # commits_by_year f = open(path + '/commits_by_year.plot', 'w') f.write(GNUPLOT_COMMON) f.write( """ set output 'commits_by_year.png' unset key set xtics 1 set ylabel "Commits" plot 'commits_by_year.dat' using 1:2:(0.5) w boxes fs solid """) f.close() # Files by date f = open(path + '/files_by_date.plot', 'w') f.write(GNUPLOT_COMMON) f.write( """ set output 'files_by_date.png' unset key set xdata time set timefmt "%Y-%m-%d" set format x "%Y-%m-%d" set ylabel "Files" set xtics rotate by 90 plot 'files_by_date.dat' using 1:2 smooth csplines """) f.close() os.chdir(path) files = glob.glob(path + '/*.plot') for f in files: print '>> gnuplot %s' % os.path.basename(f) os.system('gnuplot %s' % f) def printHeader(self, f): f.write( """ StatGit """) def printNav(self, f): f.write(""" """) usage = """ Usage: statgit [options] Options: -o html """ if len(sys.argv) < 3: print usage sys.exit(0) gitpath = sys.argv[1] outputpath = os.path.abspath(sys.argv[2]) print 'Git path: %s' % gitpath print 'Output path: %s' % outputpath os.chdir(gitpath) print 'Collecting data...' data = GitDataCollector() data.collect(gitpath) print 'Generating report...' report = HTMLReportCreator() report.create(data, outputpath)