123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567
  1. #!/usr/bin/python
  2. # Copyright (c) 2007 Heikki Hokkanen <hoxu@users.sf.net>
  3. # GPLv2
  4. import commands
  5. import datetime
  6. import os
  7. import re
  8. import sys
  9. import time
  10. GNUPLOT_COMMON = 'set terminal png transparent\nset size 0.5,0.5\n'
  11. def getoutput(cmd):
  12. print '>> %s' % cmd
  13. output = commands.getoutput(cmd)
  14. return output
  15. def getkeyssortedbyvalues(dict):
  16. return map(lambda el : el[1], sorted(map(lambda el : (el[1], el[0]), dict.items())))
  17. # TODO getdictkeyssortedbyvaluekey(dict, key) - eg. dict['author'] = { 'commits' : 512 } - ...key(dict, 'commits')
  18. class DataCollector:
  19. def __init__(self):
  20. self.stamp_created = time.time()
  21. pass
  22. ##
  23. # This should be the main function to extract data from the repository.
  24. def collect(self, dir):
  25. self.dir = dir
  26. ##
  27. # : get a dictionary of author
  28. def getAuthorInfo(self, author):
  29. return None
  30. def getActivityByDayOfWeek(self):
  31. return {}
  32. def getActivityByHourOfDay(self):
  33. return {}
  34. ##
  35. # Get a list of authors
  36. def getAuthors(self):
  37. return []
  38. def getFirstCommitDate(self):
  39. return datetime.datetime.now()
  40. def getLastCommitDate(self):
  41. return datetime.datetime.now()
  42. def getStampCreated(self):
  43. return self.stamp_created
  44. def getTags(self):
  45. return []
  46. def getTotalAuthors(self):
  47. return -1
  48. def getTotalCommits(self):
  49. return -1
  50. def getTotalFiles(self):
  51. return -1
  52. def getTotalLOC(self):
  53. return -1
  54. class GitDataCollector(DataCollector):
  55. def collect(self, dir):
  56. DataCollector.collect(self, dir)
  57. self.total_authors = int(getoutput('git-log |git-shortlog -s |wc -l'))
  58. self.total_commits = int(getoutput('git-rev-list HEAD |wc -l'))
  59. self.total_files = int(getoutput('git-ls-files |wc -l'))
  60. self.total_lines = int(getoutput('git-ls-files |xargs cat |wc -l'))
  61. self.activity_by_hour_of_day = {} # hour -> commits
  62. self.activity_by_day_of_week = {} # day -> commits
  63. self.authors = {} # name -> {commits, first_commit_stamp, last_commit_stamp}
  64. # author of the month
  65. self.author_of_month = {} # month -> author -> commits
  66. self.author_of_year = {} # year -> author -> commits
  67. self.commits_by_month = {} # month -> commits
  68. self.commits_by_year = {} # year -> commits
  69. self.first_commit_stamp = 0
  70. self.last_commit_stamp = 0
  71. # tags
  72. self.tags = {}
  73. lines = getoutput('git-show-ref --tags').split('\n')
  74. for line in lines:
  75. if len(line) == 0:
  76. continue
  77. (hash, tag) = line.split(' ')
  78. tag = tag.replace('refs/tags/', '')
  79. output = getoutput('git-log "%s" --pretty=format:"%%at %%an" -n 1' % hash)
  80. if len(output) > 0:
  81. parts = output.split(' ')
  82. stamp = 0
  83. try:
  84. stamp = int(parts[0])
  85. except ValueError:
  86. stamp = 0
  87. self.tags[tag] = { 'stamp': stamp, 'hash' : hash, 'date' : datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d') }
  88. pass
  89. # TODO also collect statistics for "last 30 days"/"last 12 months"
  90. lines = getoutput('git-rev-list --pretty=format:"%at %an" HEAD |grep -v ^commit').split('\n')
  91. for line in lines:
  92. # linux-2.6 says "<unknown>" for one line O_o
  93. parts = line.split(' ')
  94. author = ''
  95. try:
  96. stamp = int(parts[0])
  97. except ValueError:
  98. stamp = 0
  99. if len(parts) > 1:
  100. author = ' '.join(parts[1:])
  101. date = datetime.datetime.fromtimestamp(float(stamp))
  102. # First and last commit stamp
  103. if self.last_commit_stamp == 0:
  104. self.last_commit_stamp = stamp
  105. self.first_commit_stamp = stamp
  106. # activity
  107. # hour
  108. hour = date.hour
  109. if hour in self.activity_by_hour_of_day:
  110. self.activity_by_hour_of_day[hour] += 1
  111. else:
  112. self.activity_by_hour_of_day[hour] = 1
  113. # day
  114. day = date.weekday()
  115. if day in self.activity_by_day_of_week:
  116. self.activity_by_day_of_week[day] += 1
  117. else:
  118. self.activity_by_day_of_week[day] = 1
  119. # author stats
  120. if author not in self.authors:
  121. self.authors[author] = {}
  122. # TODO commits
  123. if 'last_commit_stamp' not in self.authors[author]:
  124. self.authors[author]['last_commit_stamp'] = stamp
  125. self.authors[author]['first_commit_stamp'] = stamp
  126. if 'commits' in self.authors[author]:
  127. self.authors[author]['commits'] += 1
  128. else:
  129. self.authors[author]['commits'] = 1
  130. # author of the month/year
  131. yymm = datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m')
  132. if yymm in self.author_of_month:
  133. if author in self.author_of_month[yymm]:
  134. self.author_of_month[yymm][author] += 1
  135. else:
  136. self.author_of_month[yymm][author] = 1
  137. else:
  138. self.author_of_month[yymm] = {}
  139. self.author_of_month[yymm][author] = 1
  140. if yymm in self.commits_by_month:
  141. self.commits_by_month[yymm] += 1
  142. else:
  143. self.commits_by_month[yymm] = 1
  144. yy = datetime.datetime.fromtimestamp(stamp).year
  145. if yy in self.author_of_year:
  146. if author in self.author_of_year[yy]:
  147. self.author_of_year[yy][author] += 1
  148. else:
  149. self.author_of_year[yy][author] = 1
  150. else:
  151. self.author_of_year[yy] = {}
  152. self.author_of_year[yy][author] = 1
  153. if yy in self.commits_by_year:
  154. self.commits_by_year[yy] += 1
  155. else:
  156. self.commits_by_year[yy] = 1
  157. def getActivityByDayOfWeek(self):
  158. return self.activity_by_day_of_week
  159. def getActivityByHourOfDay(self):
  160. return self.activity_by_hour_of_day
  161. def getAuthorInfo(self, author):
  162. a = self.authors[author]
  163. commits = a['commits']
  164. commits_frac = (100 * float(commits)) / self.getTotalCommits()
  165. date_first = datetime.datetime.fromtimestamp(a['first_commit_stamp']).strftime('%Y-%m-%d')
  166. date_last = datetime.datetime.fromtimestamp(a['last_commit_stamp']).strftime('%Y-%m-%d')
  167. res = { 'commits': commits, 'commits_frac': commits_frac, 'date_first': date_first, 'date_last': date_last }
  168. return res
  169. def getAuthors(self):
  170. return self.authors.keys()
  171. def getFirstCommitDate(self):
  172. return datetime.datetime.fromtimestamp(self.first_commit_stamp)
  173. def getLastCommitDate(self):
  174. return datetime.datetime.fromtimestamp(self.last_commit_stamp)
  175. def getTags(self):
  176. lines = getoutput('git-show-ref --tags |cut -d/ -f3')
  177. return lines.split('\n')
  178. def getTagDate(self, tag):
  179. return self.revToDate('tags/' + tag)
  180. def getTotalAuthors(self):
  181. return self.total_authors
  182. def getTotalCommits(self):
  183. return self.total_commits
  184. def getTotalFiles(self):
  185. return self.total_files
  186. def getTotalLOC(self):
  187. return self.total_lines
  188. def revToDate(self, rev):
  189. stamp = int(getoutput('git-log --pretty=format:%%at "%s" -n 1' % rev))
  190. return datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d')
  191. class ReportCreator:
  192. def __init__(self):
  193. pass
  194. def create(self, data, path):
  195. self.data = data
  196. self.path = path
  197. class HTMLReportCreator(ReportCreator):
  198. def create(self, data, path):
  199. ReportCreator.create(self, data, path)
  200. f = open(path + "/index.html", 'w')
  201. format = '%Y-%m-%d %H:%m:%S'
  202. self.printHeader(f)
  203. f.write('<h1>StatGit</h1>')
  204. self.printNav(f)
  205. f.write('<dl>');
  206. f.write('<dt>Generated</dt><dd>%s (in %d seconds)</dd>' % (datetime.datetime.now().strftime(format), time.time() - data.getStampCreated()));
  207. f.write('<dt>Report Period</dt><dd>%s to %s</dd>' % (data.getFirstCommitDate().strftime(format), data.getLastCommitDate().strftime(format)))
  208. f.write('<dt>Total Files</dt><dd>%s</dd>' % data.getTotalFiles())
  209. f.write('<dt>Total Lines of Code</dt><dd>%s</dd>' % data.getTotalLOC())
  210. f.write('<dt>Total Commits</dt><dd>%s</dd>' % data.getTotalCommits())
  211. f.write('<dt>Authors</dt><dd>%s</dd>' % data.getTotalAuthors())
  212. f.write('</dl>');
  213. f.write('</body>\n</html>');
  214. f.close()
  215. ###
  216. # Activity
  217. f = open(path + '/activity.html', 'w')
  218. self.printHeader(f)
  219. f.write('<h1>Activity</h1>')
  220. self.printNav(f)
  221. f.write('<h2>Last 30 days</h2>')
  222. f.write('<h2>Last 12 months</h2>')
  223. # Hour of Day
  224. f.write('\n<h2>Hour of Day</h2>\n\n')
  225. hour_of_day = data.getActivityByHourOfDay()
  226. f.write('<table><tr><th>Hour</th>')
  227. for i in range(1, 25):
  228. f.write('<th>%d</th>' % i)
  229. f.write('</tr>\n<tr><th>Commits</th>')
  230. fp = open(path + '/hour_of_day.dat', 'w')
  231. for i in range(0, 24):
  232. if i in hour_of_day:
  233. f.write('<td>%d</td>' % hour_of_day[i])
  234. fp.write('%d %d\n' % (i, hour_of_day[i]))
  235. else:
  236. f.write('<td>0</td>')
  237. fp.write('%d 0\n' % i)
  238. fp.close()
  239. f.write('</tr>\n<tr><th>%</th>')
  240. totalcommits = data.getTotalCommits()
  241. for i in range(0, 24):
  242. if i in hour_of_day:
  243. f.write('<td>%.2f</td>' % ((100.0 * hour_of_day[i]) / totalcommits))
  244. else:
  245. f.write('<td>0.00</td>')
  246. f.write('</tr></table>')
  247. f.write('<img src="hour_of_day.png" />')
  248. fg = open(path + '/hour_of_day.dat', 'w')
  249. for i in range(0, 24):
  250. if i in hour_of_day:
  251. fg.write('%d %d\n' % (i + 1, hour_of_day[i]))
  252. else:
  253. fg.write('%d 0\n' % (i + 1))
  254. fg.close()
  255. # Day of Week
  256. # TODO show also by hour of weekday?
  257. f.write('\n<h2>Day of Week</h2>\n\n')
  258. day_of_week = data.getActivityByDayOfWeek()
  259. f.write('<div class="vtable"><table>')
  260. f.write('<tr><th>Day</th><th>Total (%)</th></tr>')
  261. fp = open(path + '/day_of_week.dat', 'w')
  262. for d in range(0, 7):
  263. fp.write('%d %d\n' % (d + 1, day_of_week[d]))
  264. f.write('<tr>')
  265. f.write('<th>%d</th>' % (d + 1))
  266. if d in day_of_week:
  267. f.write('<td>%d (%.2f%%)</td>' % (day_of_week[d], (100.0 * day_of_week[d]) / totalcommits))
  268. else:
  269. f.write('<td>0</td>')
  270. f.write('</tr>')
  271. f.write('</table></div>')
  272. f.write('<img src="day_of_week.png" />')
  273. fp.close()
  274. # Commits by year/month
  275. f.write('<h2>Commits by year/month</h2>')
  276. f.write('<div class="vtable"><table><tr><th>Month</th><th>Commits</th></tr>')
  277. for yymm in reversed(sorted(data.commits_by_month.keys())):
  278. f.write('<tr><td>%s</td><td>%d</td></tr>' % (yymm, data.commits_by_month[yymm]))
  279. f.write('</table></div>')
  280. f.write('<img src="commits_by_year_month.png" />')
  281. fg = open(path + '/commits_by_year_month.dat', 'w')
  282. for yymm in sorted(data.commits_by_month.keys()):
  283. fg.write('%s %s\n' % (yymm, data.commits_by_month[yymm]))
  284. fg.close()
  285. # Commits by year
  286. f.write('<h2>Commits by year</h2>')
  287. f.write('<div class="vtable"><table><tr><th>Year</th><th>Commits (% of all)</th></tr>')
  288. for yy in reversed(sorted(data.commits_by_year.keys())):
  289. f.write('<tr><td>%s</td><td>%d (%.2f%%)</td></tr>' % (yy, data.commits_by_year[yy], (100.0 * data.commits_by_year[yy]) / data.getTotalCommits()))
  290. f.write('</table></div>')
  291. f.write('<img src="commits_by_year.png" />')
  292. fg = open(path + '/commits_by_year.dat', 'w')
  293. for yy in sorted(data.commits_by_year.keys()):
  294. fg.write('%d %d\n' % (yy, data.commits_by_year[yy]))
  295. fg.close()
  296. f.write('</body></html>')
  297. f.close()
  298. ###
  299. # Authors
  300. f = open(path + '/authors.html', 'w')
  301. self.printHeader(f)
  302. f.write('<h1>Authors</h1>')
  303. self.printNav(f)
  304. f.write('\n<h2>List of authors</h2>\n\n')
  305. f.write('<table class="authors">')
  306. f.write('<tr><th>Author</th><th>Commits (%)</th><th>First commit</th><th>Last commit</th></tr>')
  307. for author in sorted(data.getAuthors()):
  308. info = data.getAuthorInfo(author)
  309. f.write('<tr><td>%s</td><td>%d (%.2f%%)</td><td>%s</td><td>%s</td></tr>' % (author, info['commits'], info['commits_frac'], info['date_first'], info['date_last']))
  310. f.write('</table>')
  311. f.write('\n<h2>Author of Month</h2>\n\n')
  312. f.write('<table>')
  313. f.write('<tr><th>Month</th><th>Author</th><th>Commits (%)</th></tr>')
  314. for yymm in reversed(sorted(data.author_of_month.keys())):
  315. authordict = data.author_of_month[yymm]
  316. authors = getkeyssortedbyvalues(authordict)
  317. authors.reverse()
  318. commits = data.author_of_month[yymm][authors[0]]
  319. f.write('<tr><td>%s</td><td>%s</td><td>%d (%.2f%% of %d)</td></tr>' % (yymm, authors[0], commits, (100 * commits) / data.commits_by_month[yymm], data.commits_by_month[yymm]))
  320. f.write('</table>')
  321. f.write('\n<h2>Author of Year</h2>\n\n')
  322. f.write('<table><tr><th>Year</th><th>Author</th><th>Commits (%)</th></tr>')
  323. for yy in reversed(sorted(data.author_of_year.keys())):
  324. authordict = data.author_of_year[yy]
  325. authors = getkeyssortedbyvalues(authordict)
  326. authors.reverse()
  327. commits = data.author_of_year[yy][authors[0]]
  328. f.write('<tr><td>%s</td><td>%s</td><td>%d (%.2f%% of %d)</td></tr>' % (yy, authors[0], commits, (100 * commits) / data.commits_by_year[yy], data.commits_by_year[yy]))
  329. f.write('</table>')
  330. f.write('</body></html>')
  331. f.close()
  332. ###
  333. # Files
  334. f = open(path + '/files.html', 'w')
  335. self.printHeader(f)
  336. f.write('<h1>Files</h1>')
  337. self.printNav(f)
  338. f.write('<dl>\n')
  339. f.write('<dt>Total files</dt><dd>%d</dd>' % data.getTotalFiles())
  340. f.write('<dt>Total lines</dt><dd>%d</dd>' % data.getTotalLOC())
  341. f.write('<dt>Average file size</dt><dd>%.2f bytes</dd>' % ((100.0 * data.getTotalLOC()) / data.getTotalFiles()))
  342. f.write('</dl>\n')
  343. f.write('<h2>File count by date</h2>')
  344. f.write('<h2>Average file size by date</h2>')
  345. f.write('</body></html>')
  346. f.close()
  347. ###
  348. # tags.html
  349. f = open(path + '/tags.html', 'w')
  350. self.printHeader(f)
  351. f.write('<h1>Tags</h1>')
  352. self.printNav(f)
  353. f.write('<dl>')
  354. f.write('<dt>Total tags</dt><dd>%d</dd>' % len(data.tags))
  355. if len(data.tags) > 0:
  356. f.write('<dt>Average commits per tag</dt><dd>%.2f</dd>' % (data.getTotalCommits() / len(data.tags)))
  357. f.write('</dl>')
  358. f.write('<table>')
  359. f.write('<tr><th>Name</th><th>Date</th></tr>')
  360. # sort the tags by date desc
  361. tags_sorted_by_date_desc = map(lambda el : el[1], reversed(sorted(map(lambda el : (el[1]['date'], el[0]), data.tags.items()))))
  362. for tag in tags_sorted_by_date_desc:
  363. f.write('<tr><td>%s</td><td>%s</td></tr>' % (tag, data.tags[tag]['date']))
  364. f.write('</table>')
  365. f.write('</body></html>')
  366. f.close()
  367. self.createGraphs(path)
  368. pass
  369. def createGraphs(self, path):
  370. print 'Generating graphs...'
  371. # hour of day
  372. f = open(path + '/hour_of_day.plot', 'w')
  373. f.write(GNUPLOT_COMMON)
  374. f.write(
  375. """
  376. set output 'hour_of_day.png'
  377. unset key
  378. set xrange [0.5:24.5]
  379. set xtics 4
  380. set ylabel "Commits"
  381. plot 'hour_of_day.dat' using 1:2:(0.5) w boxes fs solid
  382. """)
  383. f.close()
  384. # day of week
  385. f = open(path + '/day_of_week.plot', 'w')
  386. f.write(GNUPLOT_COMMON)
  387. f.write(
  388. """
  389. set output 'day_of_week.png'
  390. unset key
  391. set xrange [0.5:7.5]
  392. set xtics 1
  393. set ylabel "Commits"
  394. plot 'day_of_week.dat' using 1:2:(0.5) w boxes fs solid
  395. """)
  396. f.close()
  397. # commits_by_year_month
  398. f = open(path + '/commits_by_year_month.plot', 'w')
  399. f.write(GNUPLOT_COMMON)
  400. f.write(
  401. # TODO rotate xtic labels by 90 degrees
  402. """
  403. set output 'commits_by_year_month.png'
  404. unset key
  405. set xdata time
  406. set timefmt "%Y-%m"
  407. set format x "%Y-%m"
  408. set xtics 15768000
  409. set ylabel "Commits"
  410. plot 'commits_by_year_month.dat' using 1:2:(0.5) w boxes fs solid
  411. """)
  412. f.close()
  413. # commits_by_year
  414. f = open(path + '/commits_by_year.plot', 'w')
  415. f.write(GNUPLOT_COMMON)
  416. f.write(
  417. """
  418. set output 'commits_by_year.png'
  419. unset key
  420. set xtics 1
  421. set ylabel "Commits"
  422. plot 'commits_by_year.dat' using 1:2:(0.5) w boxes fs solid
  423. """)
  424. f.close()
  425. os.chdir(path)
  426. for i in ('hour_of_day', 'day_of_week', 'commits_by_year_month', 'commits_by_year'):
  427. os.system('gnuplot %s.plot' % i)
  428. pass
  429. def printHeader(self, f):
  430. f.write("""<html>
  431. <head>
  432. <title>StatGit</title>
  433. <link rel="stylesheet" href="statgit.css" type="text/css" />
  434. </head>
  435. <body>
  436. """)
  437. def printNav(self, f):
  438. f.write("""
  439. <div class="nav">
  440. <li><a href="index.html">General</a></li>
  441. <li><a href="activity.html">Activity</a></li>
  442. <li><a href="authors.html">Authors</a></li>
  443. <li><a href="files.html">Files</a></li>
  444. <li><a href="lines.html">Lines</a></li>
  445. <li><a href="tags.html">Tags</a></li>
  446. </ul>
  447. </div>
  448. """)
  449. usage = """
  450. Usage: statgit [options] <gitpath> <outputpath>
  451. Options:
  452. -o html
  453. """
  454. if len(sys.argv) < 3:
  455. print usage
  456. sys.exit(0)
  457. gitpath = sys.argv[1]
  458. outputpath = os.path.abspath(sys.argv[2])
  459. print 'Git path: %s' % gitpath
  460. print 'Output path: %s' % outputpath
  461. os.chdir(gitpath)
  462. print 'Collecting data...'
  463. data = GitDataCollector()
  464. data.collect(gitpath)
  465. print 'Generating report...'
  466. report = HTMLReportCreator()
  467. report.create(data, outputpath)