gitstats 27KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916
  1. #!/usr/bin/env python
  2. # Copyright (c) 2007-2008 Heikki Hokkanen <hoxu@users.sf.net>
  3. # GPLv2 / GPLv3
  4. import subprocess
  5. import datetime
  6. import glob
  7. import os
  8. import pickle
  9. import re
  10. import shutil
  11. import sys
  12. import time
  13. import zlib
  14. GNUPLOT_COMMON = 'set terminal png transparent\nset size 0.5,0.5\n'
  15. exectime_internal = 0.0
  16. exectime_external = 0.0
  17. time_start = time.time()
  18. # By default, gnuplot is searched from path, but can be overridden with the
  19. # environment variable "GNUPLOT"
  20. gnuplot_cmd = 'gnuplot'
  21. if 'GNUPLOT' in os.environ:
  22. gnuplot_cmd = os.environ['GNUPLOT']
  23. def getpipeoutput(cmds, quiet = False):
  24. global exectime_external
  25. start = time.time()
  26. if not quiet:
  27. print '>> ' + ' | '.join(cmds),
  28. sys.stdout.flush()
  29. p0 = subprocess.Popen(cmds[0], stdout = subprocess.PIPE, shell = True)
  30. p = p0
  31. for x in cmds[1:]:
  32. p = subprocess.Popen(x, stdin = p0.stdout, stdout = subprocess.PIPE, shell = True)
  33. p0 = p
  34. output = p.communicate()[0]
  35. end = time.time()
  36. if not quiet:
  37. print '\r[%.5f] >> %s' % (end - start, ' | '.join(cmds))
  38. exectime_external += (end - start)
  39. return output.rstrip('\n')
  40. def getkeyssortedbyvalues(dict):
  41. return map(lambda el : el[1], sorted(map(lambda el : (el[1], el[0]), dict.items())))
  42. # dict['author'] = { 'commits': 512 } - ...key(dict, 'commits')
  43. def getkeyssortedbyvaluekey(d, key):
  44. return map(lambda el : el[1], sorted(map(lambda el : (d[el][key], el), d.keys())))
  45. class DataCollector:
  46. """Manages data collection from a revision control repository."""
  47. def __init__(self):
  48. self.stamp_created = time.time()
  49. self.cache = {}
  50. ##
  51. # This should be the main function to extract data from the repository.
  52. def collect(self, dir):
  53. self.dir = dir
  54. self.projectname = os.path.basename(os.path.abspath(dir))
  55. ##
  56. # Load cacheable data
  57. def loadCache(self, dir):
  58. cachefile = os.path.join(dir, '.git', 'gitstats.cache')
  59. if not os.path.exists(cachefile):
  60. return
  61. print 'Loading cache...'
  62. f = open(cachefile)
  63. try:
  64. self.cache = pickle.loads(zlib.decompress(f.read()))
  65. except:
  66. # temporary hack to upgrade non-compressed caches
  67. f.seek(0)
  68. self.cache = pickle.load(f)
  69. f.close()
  70. ##
  71. # Produce any additional statistics from the extracted data.
  72. def refine(self):
  73. pass
  74. ##
  75. # : get a dictionary of author
  76. def getAuthorInfo(self, author):
  77. return None
  78. def getActivityByDayOfWeek(self):
  79. return {}
  80. def getActivityByHourOfDay(self):
  81. return {}
  82. ##
  83. # Get a list of authors
  84. def getAuthors(self):
  85. return []
  86. def getFirstCommitDate(self):
  87. return datetime.datetime.now()
  88. def getLastCommitDate(self):
  89. return datetime.datetime.now()
  90. def getStampCreated(self):
  91. return self.stamp_created
  92. def getTags(self):
  93. return []
  94. def getTotalAuthors(self):
  95. return -1
  96. def getTotalCommits(self):
  97. return -1
  98. def getTotalFiles(self):
  99. return -1
  100. def getTotalLOC(self):
  101. return -1
  102. ##
  103. # Save cacheable data
  104. def saveCache(self, dir):
  105. print 'Saving cache...'
  106. f = open(os.path.join(dir, '.git', 'gitstats.cache'), 'w')
  107. #pickle.dump(self.cache, f)
  108. data = zlib.compress(pickle.dumps(self.cache))
  109. f.write(data)
  110. f.close()
  111. class GitDataCollector(DataCollector):
  112. def collect(self, dir):
  113. DataCollector.collect(self, dir)
  114. try:
  115. self.total_authors = int(getpipeoutput(['git-log', 'git-shortlog -s', 'wc -l']))
  116. except:
  117. self.total_authors = 0
  118. #self.total_lines = int(getoutput('git-ls-files -z |xargs -0 cat |wc -l'))
  119. self.activity_by_hour_of_day = {} # hour -> commits
  120. self.activity_by_day_of_week = {} # day -> commits
  121. self.activity_by_month_of_year = {} # month [1-12] -> commits
  122. self.activity_by_hour_of_week = {} # weekday -> hour -> commits
  123. self.authors = {} # name -> {commits, first_commit_stamp, last_commit_stamp}
  124. # author of the month
  125. self.author_of_month = {} # month -> author -> commits
  126. self.author_of_year = {} # year -> author -> commits
  127. self.commits_by_month = {} # month -> commits
  128. self.commits_by_year = {} # year -> commits
  129. self.first_commit_stamp = 0
  130. self.last_commit_stamp = 0
  131. # tags
  132. self.tags = {}
  133. lines = getpipeoutput(['git-show-ref --tags']).split('\n')
  134. for line in lines:
  135. if len(line) == 0:
  136. continue
  137. (hash, tag) = line.split(' ')
  138. tag = tag.replace('refs/tags/', '')
  139. output = getpipeoutput(['git-log "%s" --pretty=format:"%%at %%an" -n 1' % hash])
  140. if len(output) > 0:
  141. parts = output.split(' ')
  142. stamp = 0
  143. try:
  144. stamp = int(parts[0])
  145. except ValueError:
  146. stamp = 0
  147. self.tags[tag] = { 'stamp': stamp, 'hash' : hash, 'date' : datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d') }
  148. # Collect revision statistics
  149. # Outputs "<stamp> <author>"
  150. lines = getpipeoutput(['git-rev-list --pretty=format:"%at %an" HEAD', 'grep -v ^commit']).split('\n')
  151. for line in lines:
  152. # linux-2.6 says "<unknown>" for one line O_o
  153. parts = line.split(' ')
  154. author = ''
  155. try:
  156. stamp = int(parts[0])
  157. except ValueError:
  158. stamp = 0
  159. if len(parts) > 1:
  160. author = ' '.join(parts[1:])
  161. date = datetime.datetime.fromtimestamp(float(stamp))
  162. # First and last commit stamp
  163. if self.last_commit_stamp == 0:
  164. self.last_commit_stamp = stamp
  165. self.first_commit_stamp = stamp
  166. # activity
  167. # hour
  168. hour = date.hour
  169. if hour in self.activity_by_hour_of_day:
  170. self.activity_by_hour_of_day[hour] += 1
  171. else:
  172. self.activity_by_hour_of_day[hour] = 1
  173. # day of week
  174. day = date.weekday()
  175. if day in self.activity_by_day_of_week:
  176. self.activity_by_day_of_week[day] += 1
  177. else:
  178. self.activity_by_day_of_week[day] = 1
  179. # hour of week
  180. if day not in self.activity_by_hour_of_week:
  181. self.activity_by_hour_of_week[day] = {}
  182. if hour not in self.activity_by_hour_of_week[day]:
  183. self.activity_by_hour_of_week[day][hour] = 1
  184. else:
  185. self.activity_by_hour_of_week[day][hour] += 1
  186. # month of year
  187. month = date.month
  188. if month in self.activity_by_month_of_year:
  189. self.activity_by_month_of_year[month] += 1
  190. else:
  191. self.activity_by_month_of_year[month] = 1
  192. # author stats
  193. if author not in self.authors:
  194. self.authors[author] = {}
  195. # commits
  196. if 'last_commit_stamp' not in self.authors[author]:
  197. self.authors[author]['last_commit_stamp'] = stamp
  198. self.authors[author]['first_commit_stamp'] = stamp
  199. if 'commits' in self.authors[author]:
  200. self.authors[author]['commits'] += 1
  201. else:
  202. self.authors[author]['commits'] = 1
  203. # author of the month/year
  204. yymm = datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m')
  205. if yymm in self.author_of_month:
  206. if author in self.author_of_month[yymm]:
  207. self.author_of_month[yymm][author] += 1
  208. else:
  209. self.author_of_month[yymm][author] = 1
  210. else:
  211. self.author_of_month[yymm] = {}
  212. self.author_of_month[yymm][author] = 1
  213. if yymm in self.commits_by_month:
  214. self.commits_by_month[yymm] += 1
  215. else:
  216. self.commits_by_month[yymm] = 1
  217. yy = datetime.datetime.fromtimestamp(stamp).year
  218. if yy in self.author_of_year:
  219. if author in self.author_of_year[yy]:
  220. self.author_of_year[yy][author] += 1
  221. else:
  222. self.author_of_year[yy][author] = 1
  223. else:
  224. self.author_of_year[yy] = {}
  225. self.author_of_year[yy][author] = 1
  226. if yy in self.commits_by_year:
  227. self.commits_by_year[yy] += 1
  228. else:
  229. self.commits_by_year[yy] = 1
  230. # TODO Optimize this, it's the worst bottleneck
  231. # outputs "<stamp> <files>" for each revision
  232. self.files_by_stamp = {} # stamp -> files
  233. revlines = getpipeoutput(['git-rev-list --pretty=format:"%at %T" HEAD', 'grep -v ^commit']).strip().split('\n')
  234. lines = []
  235. for revline in revlines:
  236. time, rev = revline.split(' ')
  237. #linecount = int(getpipeoutput(['git-ls-tree -r --name-only "%s"' % rev, 'wc -l']).split('\n')[0])
  238. linecount = self.getFilesInCommit(rev)
  239. lines.append('%d %d' % (int(time), linecount))
  240. self.total_commits = len(lines)
  241. for line in lines:
  242. parts = line.split(' ')
  243. if len(parts) != 2:
  244. continue
  245. (stamp, files) = parts[0:2]
  246. try:
  247. self.files_by_stamp[int(stamp)] = int(files)
  248. except ValueError:
  249. print 'Warning: failed to parse line "%s"' % line
  250. # extensions
  251. self.extensions = {} # extension -> files, lines
  252. lines = getpipeoutput(['git-ls-files']).split('\n')
  253. self.total_files = len(lines)
  254. for line in lines:
  255. base = os.path.basename(line)
  256. if base.find('.') == -1:
  257. ext = ''
  258. else:
  259. ext = base[(base.rfind('.') + 1):]
  260. if ext not in self.extensions:
  261. self.extensions[ext] = {'files': 0, 'lines': 0}
  262. self.extensions[ext]['files'] += 1
  263. try:
  264. # Escaping could probably be improved here
  265. self.extensions[ext]['lines'] += int(getpipeoutput(['wc -l "%s"' % line]).split()[0])
  266. except:
  267. print 'Warning: Could not count lines for file "%s"' % line
  268. # line statistics
  269. # outputs:
  270. # N files changed, N insertions (+), N deletions(-)
  271. # <stamp> <author>
  272. self.changes_by_date = {} # stamp -> { files, ins, del }
  273. lines = getpipeoutput(['git-log --shortstat --pretty=format:"%at %an"']).split('\n')
  274. lines.reverse()
  275. files = 0; inserted = 0; deleted = 0; total_lines = 0
  276. for line in lines:
  277. if len(line) == 0:
  278. continue
  279. # <stamp> <author>
  280. if line.find('files changed,') == -1:
  281. pos = line.find(' ')
  282. if pos != -1:
  283. try:
  284. (stamp, author) = (int(line[:pos]), line[pos+1:])
  285. self.changes_by_date[stamp] = { 'files': files, 'ins': inserted, 'del': deleted, 'lines': total_lines }
  286. except ValueError:
  287. print 'Warning: unexpected line "%s"' % line
  288. else:
  289. print 'Warning: unexpected line "%s"' % line
  290. else:
  291. numbers = re.findall('\d+', line)
  292. if len(numbers) == 3:
  293. (files, inserted, deleted) = map(lambda el : int(el), numbers)
  294. total_lines += inserted
  295. total_lines -= deleted
  296. else:
  297. print 'Warning: failed to handle line "%s"' % line
  298. (files, inserted, deleted) = (0, 0, 0)
  299. #self.changes_by_date[stamp] = { 'files': files, 'ins': inserted, 'del': deleted }
  300. self.total_lines = total_lines
  301. def refine(self):
  302. # authors
  303. # name -> {place_by_commits, commits_frac, date_first, date_last, timedelta}
  304. authors_by_commits = getkeyssortedbyvaluekey(self.authors, 'commits')
  305. authors_by_commits.reverse() # most first
  306. for i, name in enumerate(authors_by_commits):
  307. self.authors[name]['place_by_commits'] = i + 1
  308. for name in self.authors.keys():
  309. a = self.authors[name]
  310. a['commits_frac'] = (100 * float(a['commits'])) / self.getTotalCommits()
  311. date_first = datetime.datetime.fromtimestamp(a['first_commit_stamp'])
  312. date_last = datetime.datetime.fromtimestamp(a['last_commit_stamp'])
  313. delta = date_last - date_first
  314. a['date_first'] = date_first.strftime('%Y-%m-%d')
  315. a['date_last'] = date_last.strftime('%Y-%m-%d')
  316. a['timedelta'] = delta
  317. def getActivityByDayOfWeek(self):
  318. return self.activity_by_day_of_week
  319. def getActivityByHourOfDay(self):
  320. return self.activity_by_hour_of_day
  321. def getAuthorInfo(self, author):
  322. return self.authors[author]
  323. def getAuthors(self):
  324. return self.authors.keys()
  325. def getFilesInCommit(self, rev):
  326. try:
  327. res = self.cache['files_in_tree'][rev]
  328. except:
  329. res = int(getpipeoutput(['git-ls-tree -r --name-only "%s"' % rev, 'wc -l']).split('\n')[0])
  330. if 'files_in_tree' not in self.cache:
  331. self.cache['files_in_tree'] = {}
  332. self.cache['files_in_tree'][rev] = res
  333. return res
  334. def getFirstCommitDate(self):
  335. return datetime.datetime.fromtimestamp(self.first_commit_stamp)
  336. def getLastCommitDate(self):
  337. return datetime.datetime.fromtimestamp(self.last_commit_stamp)
  338. def getTags(self):
  339. lines = getpipeoutput(['git-show-ref --tags', 'cut -d/ -f3'])
  340. return lines.split('\n')
  341. def getTagDate(self, tag):
  342. return self.revToDate('tags/' + tag)
  343. def getTotalAuthors(self):
  344. return self.total_authors
  345. def getTotalCommits(self):
  346. return self.total_commits
  347. def getTotalFiles(self):
  348. return self.total_files
  349. def getTotalLOC(self):
  350. return self.total_lines
  351. def revToDate(self, rev):
  352. stamp = int(getpipeoutput(['git-log --pretty=format:%%at "%s" -n 1' % rev]))
  353. return datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d')
  354. class ReportCreator:
  355. """Creates the actual report based on given data."""
  356. def __init__(self):
  357. pass
  358. def create(self, data, path):
  359. self.data = data
  360. self.path = path
  361. def html_linkify(text):
  362. return text.lower().replace(' ', '_')
  363. def html_header(level, text):
  364. name = html_linkify(text)
  365. return '\n<h%d><a href="#%s" name="%s">%s</a></h%d>\n\n' % (level, name, name, text, level)
  366. class HTMLReportCreator(ReportCreator):
  367. def create(self, data, path):
  368. ReportCreator.create(self, data, path)
  369. self.title = data.projectname
  370. # copy static files if they do not exist
  371. for file in ('gitstats.css', 'sortable.js', 'arrow-up.gif', 'arrow-down.gif', 'arrow-none.gif'):
  372. basedir = os.path.dirname(os.path.abspath(__file__))
  373. shutil.copyfile(basedir + '/' + file, path + '/' + file)
  374. f = open(path + "/index.html", 'w')
  375. format = '%Y-%m-%d %H:%m:%S'
  376. self.printHeader(f)
  377. f.write('<h1>GitStats - %s</h1>' % data.projectname)
  378. self.printNav(f)
  379. f.write('<dl>');
  380. f.write('<dt>Project name</dt><dd>%s</dd>' % (data.projectname))
  381. f.write('<dt>Generated</dt><dd>%s (in %d seconds)</dd>' % (datetime.datetime.now().strftime(format), time.time() - data.getStampCreated()));
  382. f.write('<dt>Report Period</dt><dd>%s to %s</dd>' % (data.getFirstCommitDate().strftime(format), data.getLastCommitDate().strftime(format)))
  383. f.write('<dt>Total Files</dt><dd>%s</dd>' % data.getTotalFiles())
  384. f.write('<dt>Total Lines of Code</dt><dd>%s</dd>' % data.getTotalLOC())
  385. f.write('<dt>Total Commits</dt><dd>%s</dd>' % data.getTotalCommits())
  386. f.write('<dt>Authors</dt><dd>%s</dd>' % data.getTotalAuthors())
  387. f.write('</dl>');
  388. f.write('</body>\n</html>');
  389. f.close()
  390. ###
  391. # Activity
  392. f = open(path + '/activity.html', 'w')
  393. self.printHeader(f)
  394. f.write('<h1>Activity</h1>')
  395. self.printNav(f)
  396. #f.write('<h2>Last 30 days</h2>')
  397. #f.write('<h2>Last 12 months</h2>')
  398. # Hour of Day
  399. f.write(html_header(2, 'Hour of Day'))
  400. hour_of_day = data.getActivityByHourOfDay()
  401. f.write('<table><tr><th>Hour</th>')
  402. for i in range(1, 25):
  403. f.write('<th>%d</th>' % i)
  404. f.write('</tr>\n<tr><th>Commits</th>')
  405. fp = open(path + '/hour_of_day.dat', 'w')
  406. for i in range(0, 24):
  407. if i in hour_of_day:
  408. f.write('<td>%d</td>' % hour_of_day[i])
  409. fp.write('%d %d\n' % (i, hour_of_day[i]))
  410. else:
  411. f.write('<td>0</td>')
  412. fp.write('%d 0\n' % i)
  413. fp.close()
  414. f.write('</tr>\n<tr><th>%</th>')
  415. totalcommits = data.getTotalCommits()
  416. for i in range(0, 24):
  417. if i in hour_of_day:
  418. f.write('<td>%.2f</td>' % ((100.0 * hour_of_day[i]) / totalcommits))
  419. else:
  420. f.write('<td>0.00</td>')
  421. f.write('</tr></table>')
  422. f.write('<img src="hour_of_day.png" alt="Hour of Day" />')
  423. fg = open(path + '/hour_of_day.dat', 'w')
  424. for i in range(0, 24):
  425. if i in hour_of_day:
  426. fg.write('%d %d\n' % (i + 1, hour_of_day[i]))
  427. else:
  428. fg.write('%d 0\n' % (i + 1))
  429. fg.close()
  430. # Day of Week
  431. f.write(html_header(2, 'Day of Week'))
  432. day_of_week = data.getActivityByDayOfWeek()
  433. f.write('<div class="vtable"><table>')
  434. f.write('<tr><th>Day</th><th>Total (%)</th></tr>')
  435. fp = open(path + '/day_of_week.dat', 'w')
  436. for d in range(0, 7):
  437. commits = 0
  438. if d in day_of_week:
  439. commits = day_of_week[d]
  440. fp.write('%d %d\n' % (d + 1, commits))
  441. f.write('<tr>')
  442. f.write('<th>%d</th>' % (d + 1))
  443. if d in day_of_week:
  444. f.write('<td>%d (%.2f%%)</td>' % (day_of_week[d], (100.0 * day_of_week[d]) / totalcommits))
  445. else:
  446. f.write('<td>0</td>')
  447. f.write('</tr>')
  448. f.write('</table></div>')
  449. f.write('<img src="day_of_week.png" alt="Day of Week" />')
  450. fp.close()
  451. # Hour of Week
  452. f.write(html_header(2, 'Hour of Week'))
  453. f.write('<table>')
  454. f.write('<tr><th>Weekday</th>')
  455. for hour in range(0, 24):
  456. f.write('<th>%d</th>' % (hour + 1))
  457. f.write('</tr>')
  458. for weekday in range(0, 7):
  459. f.write('<tr><th>%d</th>' % (weekday + 1))
  460. for hour in range(0, 24):
  461. try:
  462. commits = data.activity_by_hour_of_week[weekday][hour]
  463. except KeyError:
  464. commits = 0
  465. if commits != 0:
  466. f.write('<td>%d</td>' % commits)
  467. else:
  468. f.write('<td></td>')
  469. f.write('</tr>')
  470. f.write('</table>')
  471. # Month of Year
  472. f.write(html_header(2, 'Month of Year'))
  473. f.write('<div class="vtable"><table>')
  474. f.write('<tr><th>Month</th><th>Commits (%)</th></tr>')
  475. fp = open (path + '/month_of_year.dat', 'w')
  476. for mm in range(1, 13):
  477. commits = 0
  478. if mm in data.activity_by_month_of_year:
  479. commits = data.activity_by_month_of_year[mm]
  480. f.write('<tr><td>%d</td><td>%d (%.2f %%)</td></tr>' % (mm, commits, (100.0 * commits) / data.getTotalCommits()))
  481. fp.write('%d %d\n' % (mm, commits))
  482. fp.close()
  483. f.write('</table></div>')
  484. f.write('<img src="month_of_year.png" alt="Month of Year" />')
  485. # Commits by year/month
  486. f.write(html_header(2, 'Commits by year/month'))
  487. f.write('<div class="vtable"><table><tr><th>Month</th><th>Commits</th></tr>')
  488. for yymm in reversed(sorted(data.commits_by_month.keys())):
  489. f.write('<tr><td>%s</td><td>%d</td></tr>' % (yymm, data.commits_by_month[yymm]))
  490. f.write('</table></div>')
  491. f.write('<img src="commits_by_year_month.png" alt="Commits by year/month" />')
  492. fg = open(path + '/commits_by_year_month.dat', 'w')
  493. for yymm in sorted(data.commits_by_month.keys()):
  494. fg.write('%s %s\n' % (yymm, data.commits_by_month[yymm]))
  495. fg.close()
  496. # Commits by year
  497. f.write(html_header(2, 'Commits by Year'))
  498. f.write('<div class="vtable"><table><tr><th>Year</th><th>Commits (% of all)</th></tr>')
  499. for yy in reversed(sorted(data.commits_by_year.keys())):
  500. f.write('<tr><td>%s</td><td>%d (%.2f%%)</td></tr>' % (yy, data.commits_by_year[yy], (100.0 * data.commits_by_year[yy]) / data.getTotalCommits()))
  501. f.write('</table></div>')
  502. f.write('<img src="commits_by_year.png" alt="Commits by Year" />')
  503. fg = open(path + '/commits_by_year.dat', 'w')
  504. for yy in sorted(data.commits_by_year.keys()):
  505. fg.write('%d %d\n' % (yy, data.commits_by_year[yy]))
  506. fg.close()
  507. f.write('</body></html>')
  508. f.close()
  509. ###
  510. # Authors
  511. f = open(path + '/authors.html', 'w')
  512. self.printHeader(f)
  513. f.write('<h1>Authors</h1>')
  514. self.printNav(f)
  515. # Authors :: List of authors
  516. f.write(html_header(2, 'List of Authors'))
  517. f.write('<table class="authors sortable" id="authors">')
  518. f.write('<tr><th>Author</th><th>Commits (%)</th><th>First commit</th><th>Last commit</th><th class="unsortable">Age</th><th># by commits</th></tr>')
  519. for author in sorted(data.getAuthors()):
  520. info = data.getAuthorInfo(author)
  521. f.write('<tr><td>%s</td><td>%d (%.2f%%)</td><td>%s</td><td>%s</td><td>%s</td><td>%d</td></tr>' % (author, info['commits'], info['commits_frac'], info['date_first'], info['date_last'], info['timedelta'], info['place_by_commits']))
  522. f.write('</table>')
  523. # Authors :: Author of Month
  524. f.write(html_header(2, 'Author of Month'))
  525. f.write('<table class="sortable" id="aom">')
  526. f.write('<tr><th>Month</th><th>Author</th><th>Commits (%)</th><th class="unsortable">Next top 5</th></tr>')
  527. for yymm in reversed(sorted(data.author_of_month.keys())):
  528. authordict = data.author_of_month[yymm]
  529. authors = getkeyssortedbyvalues(authordict)
  530. authors.reverse()
  531. commits = data.author_of_month[yymm][authors[0]]
  532. next = ', '.join(authors[1:5])
  533. f.write('<tr><td>%s</td><td>%s</td><td>%d (%.2f%% of %d)</td><td>%s</td></tr>' % (yymm, authors[0], commits, (100 * commits) / data.commits_by_month[yymm], data.commits_by_month[yymm], next))
  534. f.write('</table>')
  535. f.write(html_header(2, 'Author of Year'))
  536. f.write('<table class="sortable" id="aoy"><tr><th>Year</th><th>Author</th><th>Commits (%)</th><th class="unsortable">Next top 5</th></tr>')
  537. for yy in reversed(sorted(data.author_of_year.keys())):
  538. authordict = data.author_of_year[yy]
  539. authors = getkeyssortedbyvalues(authordict)
  540. authors.reverse()
  541. commits = data.author_of_year[yy][authors[0]]
  542. next = ', '.join(authors[1:5])
  543. f.write('<tr><td>%s</td><td>%s</td><td>%d (%.2f%% of %d)</td><td>%s</td></tr>' % (yy, authors[0], commits, (100 * commits) / data.commits_by_year[yy], data.commits_by_year[yy], next))
  544. f.write('</table>')
  545. f.write('</body></html>')
  546. f.close()
  547. ###
  548. # Files
  549. f = open(path + '/files.html', 'w')
  550. self.printHeader(f)
  551. f.write('<h1>Files</h1>')
  552. self.printNav(f)
  553. f.write('<dl>\n')
  554. f.write('<dt>Total files</dt><dd>%d</dd>' % data.getTotalFiles())
  555. f.write('<dt>Total lines</dt><dd>%d</dd>' % data.getTotalLOC())
  556. f.write('<dt>Average file size</dt><dd>%.2f bytes</dd>' % ((100.0 * data.getTotalLOC()) / data.getTotalFiles()))
  557. f.write('</dl>\n')
  558. # Files :: File count by date
  559. f.write(html_header(2, 'File count by date'))
  560. fg = open(path + '/files_by_date.dat', 'w')
  561. for stamp in sorted(data.files_by_stamp.keys()):
  562. fg.write('%s %d\n' % (datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d'), data.files_by_stamp[stamp]))
  563. fg.close()
  564. f.write('<img src="files_by_date.png" alt="Files by Date" />')
  565. #f.write('<h2>Average file size by date</h2>')
  566. # Files :: Extensions
  567. f.write(html_header(2, 'Extensions'))
  568. f.write('<table class="sortable" id="ext"><tr><th>Extension</th><th>Files (%)</th><th>Lines (%)</th><th>Lines/file</th></tr>')
  569. for ext in sorted(data.extensions.keys()):
  570. files = data.extensions[ext]['files']
  571. lines = data.extensions[ext]['lines']
  572. f.write('<tr><td>%s</td><td>%d (%.2f%%)</td><td>%d (%.2f%%)</td><td>%d</td></tr>' % (ext, files, (100.0 * files) / data.getTotalFiles(), lines, (100.0 * lines) / data.getTotalLOC(), lines / files))
  573. f.write('</table>')
  574. f.write('</body></html>')
  575. f.close()
  576. ###
  577. # Lines
  578. f = open(path + '/lines.html', 'w')
  579. self.printHeader(f)
  580. f.write('<h1>Lines</h1>')
  581. self.printNav(f)
  582. f.write('<dl>\n')
  583. f.write('<dt>Total lines</dt><dd>%d</dd>' % data.getTotalLOC())
  584. f.write('</dl>\n')
  585. f.write(html_header(2, 'Lines of Code'))
  586. f.write('<img src="lines_of_code.png" />')
  587. fg = open(path + '/lines_of_code.dat', 'w')
  588. for stamp in sorted(data.changes_by_date.keys()):
  589. fg.write('%d %d\n' % (stamp, data.changes_by_date[stamp]['lines']))
  590. fg.close()
  591. f.write('</body></html>')
  592. f.close()
  593. ###
  594. # tags.html
  595. f = open(path + '/tags.html', 'w')
  596. self.printHeader(f)
  597. f.write('<h1>Tags</h1>')
  598. self.printNav(f)
  599. f.write('<dl>')
  600. f.write('<dt>Total tags</dt><dd>%d</dd>' % len(data.tags))
  601. if len(data.tags) > 0:
  602. f.write('<dt>Average commits per tag</dt><dd>%.2f</dd>' % (data.getTotalCommits() / len(data.tags)))
  603. f.write('</dl>')
  604. f.write('<table>')
  605. f.write('<tr><th>Name</th><th>Date</th></tr>')
  606. # sort the tags by date desc
  607. tags_sorted_by_date_desc = map(lambda el : el[1], reversed(sorted(map(lambda el : (el[1]['date'], el[0]), data.tags.items()))))
  608. for tag in tags_sorted_by_date_desc:
  609. f.write('<tr><td>%s</td><td>%s</td></tr>' % (tag, data.tags[tag]['date']))
  610. f.write('</table>')
  611. f.write('</body></html>')
  612. f.close()
  613. self.createGraphs(path)
  614. def createGraphs(self, path):
  615. print 'Generating graphs...'
  616. # hour of day
  617. f = open(path + '/hour_of_day.plot', 'w')
  618. f.write(GNUPLOT_COMMON)
  619. f.write(
  620. """
  621. set output 'hour_of_day.png'
  622. unset key
  623. set xrange [0.5:24.5]
  624. set xtics 4
  625. set ylabel "Commits"
  626. plot 'hour_of_day.dat' using 1:2:(0.5) w boxes fs solid
  627. """)
  628. f.close()
  629. # day of week
  630. f = open(path + '/day_of_week.plot', 'w')
  631. f.write(GNUPLOT_COMMON)
  632. f.write(
  633. """
  634. set output 'day_of_week.png'
  635. unset key
  636. set xrange [0.5:7.5]
  637. set xtics 1
  638. set ylabel "Commits"
  639. plot 'day_of_week.dat' using 1:2:(0.5) w boxes fs solid
  640. """)
  641. f.close()
  642. # Month of Year
  643. f = open(path + '/month_of_year.plot', 'w')
  644. f.write(GNUPLOT_COMMON)
  645. f.write(
  646. """
  647. set output 'month_of_year.png'
  648. unset key
  649. set xrange [0.5:12.5]
  650. set xtics 1
  651. set ylabel "Commits"
  652. plot 'month_of_year.dat' using 1:2:(0.5) w boxes fs solid
  653. """)
  654. f.close()
  655. # commits_by_year_month
  656. f = open(path + '/commits_by_year_month.plot', 'w')
  657. f.write(GNUPLOT_COMMON)
  658. f.write(
  659. """
  660. set output 'commits_by_year_month.png'
  661. unset key
  662. set xdata time
  663. set timefmt "%Y-%m"
  664. set format x "%Y-%m"
  665. set xtics rotate by 90 15768000
  666. set bmargin 5
  667. set ylabel "Commits"
  668. plot 'commits_by_year_month.dat' using 1:2:(0.5) w boxes fs solid
  669. """)
  670. f.close()
  671. # commits_by_year
  672. f = open(path + '/commits_by_year.plot', 'w')
  673. f.write(GNUPLOT_COMMON)
  674. f.write(
  675. """
  676. set output 'commits_by_year.png'
  677. unset key
  678. set xtics 1
  679. set ylabel "Commits"
  680. plot 'commits_by_year.dat' using 1:2:(0.5) w boxes fs solid
  681. """)
  682. f.close()
  683. # Files by date
  684. f = open(path + '/files_by_date.plot', 'w')
  685. f.write(GNUPLOT_COMMON)
  686. f.write(
  687. """
  688. set output 'files_by_date.png'
  689. unset key
  690. set xdata time
  691. set timefmt "%Y-%m-%d"
  692. set format x "%Y-%m-%d"
  693. set ylabel "Files"
  694. set xtics rotate by 90
  695. set bmargin 6
  696. plot 'files_by_date.dat' using 1:2 smooth csplines
  697. """)
  698. f.close()
  699. # Lines of Code
  700. f = open(path + '/lines_of_code.plot', 'w')
  701. f.write(GNUPLOT_COMMON)
  702. f.write(
  703. """
  704. set output 'lines_of_code.png'
  705. unset key
  706. set xdata time
  707. set timefmt "%s"
  708. set format x "%Y-%m-%d"
  709. set ylabel "Lines"
  710. set xtics rotate by 90
  711. set bmargin 6
  712. plot 'lines_of_code.dat' using 1:2 w lines
  713. """)
  714. f.close()
  715. os.chdir(path)
  716. files = glob.glob(path + '/*.plot')
  717. for f in files:
  718. out = getpipeoutput([gnuplot_cmd + ' "%s"' % f])
  719. if len(out) > 0:
  720. print out
  721. def printHeader(self, f, title = ''):
  722. f.write(
  723. """<?xml version="1.0" encoding="UTF-8"?>
  724. <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
  725. <html xmlns="http://www.w3.org/1999/xhtml">
  726. <head>
  727. <title>GitStats - %s</title>
  728. <link rel="stylesheet" href="gitstats.css" type="text/css" />
  729. <meta name="generator" content="GitStats" />
  730. <script type="text/javascript" src="sortable.js"></script>
  731. </head>
  732. <body>
  733. """ % self.title)
  734. def printNav(self, f):
  735. f.write("""
  736. <div class="nav">
  737. <ul>
  738. <li><a href="index.html">General</a></li>
  739. <li><a href="activity.html">Activity</a></li>
  740. <li><a href="authors.html">Authors</a></li>
  741. <li><a href="files.html">Files</a></li>
  742. <li><a href="lines.html">Lines</a></li>
  743. <li><a href="tags.html">Tags</a></li>
  744. </ul>
  745. </div>
  746. """)
  747. usage = """
  748. Usage: gitstats [options] <gitpath> <outputpath>
  749. Options:
  750. """
  751. if len(sys.argv) < 3:
  752. print usage
  753. sys.exit(0)
  754. gitpath = sys.argv[1]
  755. outputpath = os.path.abspath(sys.argv[2])
  756. rundir = os.getcwd()
  757. try:
  758. os.makedirs(outputpath)
  759. except OSError:
  760. pass
  761. if not os.path.isdir(outputpath):
  762. print 'FATAL: Output path is not a directory or does not exist'
  763. sys.exit(1)
  764. print 'Git path: %s' % gitpath
  765. print 'Output path: %s' % outputpath
  766. os.chdir(gitpath)
  767. print 'Collecting data...'
  768. data = GitDataCollector()
  769. data.loadCache(gitpath)
  770. data.collect(gitpath)
  771. print 'Refining data...'
  772. data.saveCache(gitpath)
  773. data.refine()
  774. os.chdir(rundir)
  775. print 'Generating report...'
  776. report = HTMLReportCreator()
  777. report.create(data, outputpath)
  778. time_end = time.time()
  779. exectime_internal = time_end - time_start
  780. print 'Execution time %.5f secs, %.5f secs (%.2f %%) in external commands)' % (exectime_internal, exectime_external, (100.0 * exectime_external) / exectime_internal)