gitstats 28KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946
  1. #!/usr/bin/env python
  2. # Copyright (c) 2007-2009 Heikki Hokkanen <hoxu@users.sf.net> & others (see doc/author.txt)
  3. # GPLv2 / GPLv3
  4. import datetime
  5. import glob
  6. import os
  7. import pickle
  8. import re
  9. import shutil
  10. import subprocess
  11. import sys
  12. import time
  13. import zlib
  14. GNUPLOT_COMMON = 'set terminal png transparent\nset size 0.5,0.5\n'
  15. MAX_EXT_LENGTH = 10 # maximum file extension length
  16. exectime_internal = 0.0
  17. exectime_external = 0.0
  18. time_start = time.time()
  19. # By default, gnuplot is searched from path, but can be overridden with the
  20. # environment variable "GNUPLOT"
  21. gnuplot_cmd = 'gnuplot'
  22. if 'GNUPLOT' in os.environ:
  23. gnuplot_cmd = os.environ['GNUPLOT']
  24. def getpipeoutput(cmds, quiet = False):
  25. global exectime_external
  26. start = time.time()
  27. if not quiet:
  28. print '>> ' + ' | '.join(cmds),
  29. sys.stdout.flush()
  30. p0 = subprocess.Popen(cmds[0], stdout = subprocess.PIPE, shell = True)
  31. p = p0
  32. for x in cmds[1:]:
  33. p = subprocess.Popen(x, stdin = p0.stdout, stdout = subprocess.PIPE, shell = True)
  34. p0 = p
  35. output = p.communicate()[0]
  36. end = time.time()
  37. if not quiet:
  38. print '\r[%.5f] >> %s' % (end - start, ' | '.join(cmds))
  39. exectime_external += (end - start)
  40. return output.rstrip('\n')
  41. def getkeyssortedbyvalues(dict):
  42. return map(lambda el : el[1], sorted(map(lambda el : (el[1], el[0]), dict.items())))
  43. # dict['author'] = { 'commits': 512 } - ...key(dict, 'commits')
  44. def getkeyssortedbyvaluekey(d, key):
  45. return map(lambda el : el[1], sorted(map(lambda el : (d[el][key], el), d.keys())))
  46. VERSION = 0
  47. def getversion():
  48. global VERSION
  49. if VERSION == 0:
  50. VERSION = getpipeoutput(["git rev-parse --short HEAD"]).split('\n')[0]
  51. return VERSION
  52. class DataCollector:
  53. """Manages data collection from a revision control repository."""
  54. def __init__(self):
  55. self.stamp_created = time.time()
  56. self.cache = {}
  57. ##
  58. # This should be the main function to extract data from the repository.
  59. def collect(self, dir):
  60. self.dir = dir
  61. self.projectname = os.path.basename(os.path.abspath(dir))
  62. ##
  63. # Load cacheable data
  64. def loadCache(self, cachefile):
  65. if not os.path.exists(cachefile):
  66. return
  67. print 'Loading cache...'
  68. f = open(cachefile)
  69. try:
  70. self.cache = pickle.loads(zlib.decompress(f.read()))
  71. except:
  72. # temporary hack to upgrade non-compressed caches
  73. f.seek(0)
  74. self.cache = pickle.load(f)
  75. f.close()
  76. ##
  77. # Produce any additional statistics from the extracted data.
  78. def refine(self):
  79. pass
  80. ##
  81. # : get a dictionary of author
  82. def getAuthorInfo(self, author):
  83. return None
  84. def getActivityByDayOfWeek(self):
  85. return {}
  86. def getActivityByHourOfDay(self):
  87. return {}
  88. ##
  89. # Get a list of authors
  90. def getAuthors(self):
  91. return []
  92. def getFirstCommitDate(self):
  93. return datetime.datetime.now()
  94. def getLastCommitDate(self):
  95. return datetime.datetime.now()
  96. def getStampCreated(self):
  97. return self.stamp_created
  98. def getTags(self):
  99. return []
  100. def getTotalAuthors(self):
  101. return -1
  102. def getTotalCommits(self):
  103. return -1
  104. def getTotalFiles(self):
  105. return -1
  106. def getTotalLOC(self):
  107. return -1
  108. ##
  109. # Save cacheable data
  110. def saveCache(self, filename):
  111. print 'Saving cache...'
  112. f = open(cachefile, 'w')
  113. #pickle.dump(self.cache, f)
  114. data = zlib.compress(pickle.dumps(self.cache))
  115. f.write(data)
  116. f.close()
  117. class GitDataCollector(DataCollector):
  118. def collect(self, dir):
  119. DataCollector.collect(self, dir)
  120. try:
  121. self.total_authors = int(getpipeoutput(['git log', 'git shortlog -s', 'wc -l']))
  122. except:
  123. self.total_authors = 0
  124. #self.total_lines = int(getoutput('git-ls-files -z |xargs -0 cat |wc -l'))
  125. self.activity_by_hour_of_day = {} # hour -> commits
  126. self.activity_by_day_of_week = {} # day -> commits
  127. self.activity_by_month_of_year = {} # month [1-12] -> commits
  128. self.activity_by_hour_of_week = {} # weekday -> hour -> commits
  129. self.activity_by_hour_of_day_busiest = 0
  130. self.activity_by_hour_of_week_busiest = 0
  131. self.authors = {} # name -> {commits, first_commit_stamp, last_commit_stamp}
  132. # author of the month
  133. self.author_of_month = {} # month -> author -> commits
  134. self.author_of_year = {} # year -> author -> commits
  135. self.commits_by_month = {} # month -> commits
  136. self.commits_by_year = {} # year -> commits
  137. self.first_commit_stamp = 0
  138. self.last_commit_stamp = 0
  139. # tags
  140. self.tags = {}
  141. lines = getpipeoutput(['git show-ref --tags']).split('\n')
  142. for line in lines:
  143. if len(line) == 0:
  144. continue
  145. (hash, tag) = line.split(' ')
  146. tag = tag.replace('refs/tags/', '')
  147. output = getpipeoutput(['git log "%s" --pretty=format:"%%at %%an" -n 1' % hash])
  148. if len(output) > 0:
  149. parts = output.split(' ')
  150. stamp = 0
  151. try:
  152. stamp = int(parts[0])
  153. except ValueError:
  154. stamp = 0
  155. self.tags[tag] = { 'stamp': stamp, 'hash' : hash, 'date' : datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d') }
  156. # Collect revision statistics
  157. # Outputs "<stamp> <author>"
  158. lines = getpipeoutput(['git rev-list --pretty=format:"%at %an" HEAD', 'grep -v ^commit']).split('\n')
  159. for line in lines:
  160. # linux-2.6 says "<unknown>" for one line O_o
  161. parts = line.split(' ')
  162. author = ''
  163. try:
  164. stamp = int(parts[0])
  165. except ValueError:
  166. stamp = 0
  167. if len(parts) > 1:
  168. author = ' '.join(parts[1:])
  169. date = datetime.datetime.fromtimestamp(float(stamp))
  170. # First and last commit stamp
  171. if self.last_commit_stamp == 0:
  172. self.last_commit_stamp = stamp
  173. self.first_commit_stamp = stamp
  174. # activity
  175. # hour
  176. hour = date.hour
  177. if hour in self.activity_by_hour_of_day:
  178. self.activity_by_hour_of_day[hour] += 1
  179. else:
  180. self.activity_by_hour_of_day[hour] = 1
  181. # most active hour?
  182. if self.activity_by_hour_of_day[hour] > self.activity_by_hour_of_day_busiest:
  183. self.activity_by_hour_of_day_busiest = self.activity_by_hour_of_day[hour]
  184. # day of week
  185. day = date.weekday()
  186. if day in self.activity_by_day_of_week:
  187. self.activity_by_day_of_week[day] += 1
  188. else:
  189. self.activity_by_day_of_week[day] = 1
  190. # hour of week
  191. if day not in self.activity_by_hour_of_week:
  192. self.activity_by_hour_of_week[day] = {}
  193. if hour not in self.activity_by_hour_of_week[day]:
  194. self.activity_by_hour_of_week[day][hour] = 1
  195. else:
  196. self.activity_by_hour_of_week[day][hour] += 1
  197. # most active hour?
  198. if self.activity_by_hour_of_week[day][hour] > self.activity_by_hour_of_week_busiest:
  199. self.activity_by_hour_of_week_busiest = self.activity_by_hour_of_week[day][hour]
  200. # month of year
  201. month = date.month
  202. if month in self.activity_by_month_of_year:
  203. self.activity_by_month_of_year[month] += 1
  204. else:
  205. self.activity_by_month_of_year[month] = 1
  206. # author stats
  207. if author not in self.authors:
  208. self.authors[author] = {}
  209. # commits
  210. if 'last_commit_stamp' not in self.authors[author]:
  211. self.authors[author]['last_commit_stamp'] = stamp
  212. self.authors[author]['first_commit_stamp'] = stamp
  213. if 'commits' in self.authors[author]:
  214. self.authors[author]['commits'] += 1
  215. else:
  216. self.authors[author]['commits'] = 1
  217. # author of the month/year
  218. yymm = datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m')
  219. if yymm in self.author_of_month:
  220. if author in self.author_of_month[yymm]:
  221. self.author_of_month[yymm][author] += 1
  222. else:
  223. self.author_of_month[yymm][author] = 1
  224. else:
  225. self.author_of_month[yymm] = {}
  226. self.author_of_month[yymm][author] = 1
  227. if yymm in self.commits_by_month:
  228. self.commits_by_month[yymm] += 1
  229. else:
  230. self.commits_by_month[yymm] = 1
  231. yy = datetime.datetime.fromtimestamp(stamp).year
  232. if yy in self.author_of_year:
  233. if author in self.author_of_year[yy]:
  234. self.author_of_year[yy][author] += 1
  235. else:
  236. self.author_of_year[yy][author] = 1
  237. else:
  238. self.author_of_year[yy] = {}
  239. self.author_of_year[yy][author] = 1
  240. if yy in self.commits_by_year:
  241. self.commits_by_year[yy] += 1
  242. else:
  243. self.commits_by_year[yy] = 1
  244. # TODO Optimize this, it's the worst bottleneck
  245. # outputs "<stamp> <files>" for each revision
  246. self.files_by_stamp = {} # stamp -> files
  247. revlines = getpipeoutput(['git rev-list --pretty=format:"%at %T" HEAD', 'grep -v ^commit']).strip().split('\n')
  248. lines = []
  249. for revline in revlines:
  250. time, rev = revline.split(' ')
  251. #linecount = int(getpipeoutput(['git-ls-tree -r --name-only "%s"' % rev, 'wc -l']).split('\n')[0])
  252. linecount = self.getFilesInCommit(rev)
  253. lines.append('%d %d' % (int(time), linecount))
  254. self.total_commits = len(lines)
  255. for line in lines:
  256. parts = line.split(' ')
  257. if len(parts) != 2:
  258. continue
  259. (stamp, files) = parts[0:2]
  260. try:
  261. self.files_by_stamp[int(stamp)] = int(files)
  262. except ValueError:
  263. print 'Warning: failed to parse line "%s"' % line
  264. # extensions
  265. self.extensions = {} # extension -> files, lines
  266. lines = getpipeoutput(['git ls-files']).split('\n')
  267. self.total_files = len(lines)
  268. for line in lines:
  269. base = os.path.basename(line)
  270. # Ignore extensionless (including .hidden files)
  271. if base.find('.') == -1 or base.rfind('.') == 0:
  272. ext = ''
  273. else:
  274. ext = base[(base.rfind('.') + 1):]
  275. if len(ext) > MAX_EXT_LENGTH:
  276. ext = ''
  277. if ext not in self.extensions:
  278. self.extensions[ext] = {'files': 0, 'lines': 0}
  279. self.extensions[ext]['files'] += 1
  280. try:
  281. # Escaping could probably be improved here
  282. self.extensions[ext]['lines'] += int(getpipeoutput(['wc -l "%s"' % line]).split()[0])
  283. except:
  284. print 'Warning: Could not count lines for file "%s"' % line
  285. # line statistics
  286. # outputs:
  287. # N files changed, N insertions (+), N deletions(-)
  288. # <stamp> <author>
  289. self.changes_by_date = {} # stamp -> { files, ins, del }
  290. lines = getpipeoutput(['git log --shortstat --pretty=format:"%at %an"']).split('\n')
  291. lines.reverse()
  292. files = 0; inserted = 0; deleted = 0; total_lines = 0
  293. for line in lines:
  294. if len(line) == 0:
  295. continue
  296. # <stamp> <author>
  297. if line.find('files changed,') == -1:
  298. pos = line.find(' ')
  299. if pos != -1:
  300. try:
  301. (stamp, author) = (int(line[:pos]), line[pos+1:])
  302. self.changes_by_date[stamp] = { 'files': files, 'ins': inserted, 'del': deleted, 'lines': total_lines }
  303. except ValueError:
  304. print 'Warning: unexpected line "%s"' % line
  305. else:
  306. print 'Warning: unexpected line "%s"' % line
  307. else:
  308. numbers = re.findall('\d+', line)
  309. if len(numbers) == 3:
  310. (files, inserted, deleted) = map(lambda el : int(el), numbers)
  311. total_lines += inserted
  312. total_lines -= deleted
  313. else:
  314. print 'Warning: failed to handle line "%s"' % line
  315. (files, inserted, deleted) = (0, 0, 0)
  316. #self.changes_by_date[stamp] = { 'files': files, 'ins': inserted, 'del': deleted }
  317. self.total_lines = total_lines
  318. def refine(self):
  319. # authors
  320. # name -> {place_by_commits, commits_frac, date_first, date_last, timedelta}
  321. authors_by_commits = getkeyssortedbyvaluekey(self.authors, 'commits')
  322. authors_by_commits.reverse() # most first
  323. for i, name in enumerate(authors_by_commits):
  324. self.authors[name]['place_by_commits'] = i + 1
  325. for name in self.authors.keys():
  326. a = self.authors[name]
  327. a['commits_frac'] = (100 * float(a['commits'])) / self.getTotalCommits()
  328. date_first = datetime.datetime.fromtimestamp(a['first_commit_stamp'])
  329. date_last = datetime.datetime.fromtimestamp(a['last_commit_stamp'])
  330. delta = date_last - date_first
  331. a['date_first'] = date_first.strftime('%Y-%m-%d')
  332. a['date_last'] = date_last.strftime('%Y-%m-%d')
  333. a['timedelta'] = delta
  334. def getActivityByDayOfWeek(self):
  335. return self.activity_by_day_of_week
  336. def getActivityByHourOfDay(self):
  337. return self.activity_by_hour_of_day
  338. def getAuthorInfo(self, author):
  339. return self.authors[author]
  340. def getAuthors(self):
  341. return self.authors.keys()
  342. def getCommitDeltaDays(self):
  343. return (self.last_commit_stamp - self.first_commit_stamp) / 86400
  344. def getFilesInCommit(self, rev):
  345. try:
  346. res = self.cache['files_in_tree'][rev]
  347. except:
  348. res = int(getpipeoutput(['git ls-tree -r --name-only "%s"' % rev, 'wc -l']).split('\n')[0])
  349. if 'files_in_tree' not in self.cache:
  350. self.cache['files_in_tree'] = {}
  351. self.cache['files_in_tree'][rev] = res
  352. return res
  353. def getFirstCommitDate(self):
  354. return datetime.datetime.fromtimestamp(self.first_commit_stamp)
  355. def getLastCommitDate(self):
  356. return datetime.datetime.fromtimestamp(self.last_commit_stamp)
  357. def getTags(self):
  358. lines = getpipeoutput(['git show-ref --tags', 'cut -d/ -f3'])
  359. return lines.split('\n')
  360. def getTagDate(self, tag):
  361. return self.revToDate('tags/' + tag)
  362. def getTotalAuthors(self):
  363. return self.total_authors
  364. def getTotalCommits(self):
  365. return self.total_commits
  366. def getTotalFiles(self):
  367. return self.total_files
  368. def getTotalLOC(self):
  369. return self.total_lines
  370. def revToDate(self, rev):
  371. stamp = int(getpipeoutput(['git log --pretty=format:%%at "%s" -n 1' % rev]))
  372. return datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d')
  373. class ReportCreator:
  374. """Creates the actual report based on given data."""
  375. def __init__(self):
  376. pass
  377. def create(self, data, path):
  378. self.data = data
  379. self.path = path
  380. def html_linkify(text):
  381. return text.lower().replace(' ', '_')
  382. def html_header(level, text):
  383. name = html_linkify(text)
  384. return '\n<h%d><a href="#%s" name="%s">%s</a></h%d>\n\n' % (level, name, name, text, level)
  385. class HTMLReportCreator(ReportCreator):
  386. def create(self, data, path):
  387. ReportCreator.create(self, data, path)
  388. self.title = data.projectname
  389. # copy static files if they do not exist
  390. for file in ('gitstats.css', 'sortable.js', 'arrow-up.gif', 'arrow-down.gif', 'arrow-none.gif'):
  391. basedir = os.path.dirname(os.path.abspath(__file__))
  392. shutil.copyfile(basedir + '/' + file, path + '/' + file)
  393. f = open(path + "/index.html", 'w')
  394. format = '%Y-%m-%d %H:%M:%S'
  395. self.printHeader(f)
  396. f.write('<h1>GitStats - %s</h1>' % data.projectname)
  397. self.printNav(f)
  398. f.write('<dl>')
  399. f.write('<dt>Project name</dt><dd>%s</dd>' % (data.projectname))
  400. f.write('<dt>Generated</dt><dd>%s (in %d seconds)</dd>' % (datetime.datetime.now().strftime(format), time.time() - data.getStampCreated()))
  401. f.write('<dt>Generator</dt><dd><a href="http://gitstats.sourceforge.net/">GitStats</a> (version %s)</dd>' % getversion())
  402. f.write('<dt>Report Period</dt><dd>%s to %s (%d days)</dd>' % (data.getFirstCommitDate().strftime(format), data.getLastCommitDate().strftime(format), data.getCommitDeltaDays()))
  403. f.write('<dt>Total Files</dt><dd>%s</dd>' % data.getTotalFiles())
  404. f.write('<dt>Total Lines of Code</dt><dd>%s</dd>' % data.getTotalLOC())
  405. f.write('<dt>Total Commits</dt><dd>%s</dd>' % data.getTotalCommits())
  406. f.write('<dt>Authors</dt><dd>%s</dd>' % data.getTotalAuthors())
  407. f.write('</dl>')
  408. f.write('</body>\n</html>')
  409. f.close()
  410. ###
  411. # Activity
  412. f = open(path + '/activity.html', 'w')
  413. self.printHeader(f)
  414. f.write('<h1>Activity</h1>')
  415. self.printNav(f)
  416. #f.write('<h2>Last 30 days</h2>')
  417. #f.write('<h2>Last 12 months</h2>')
  418. # Hour of Day
  419. f.write(html_header(2, 'Hour of Day'))
  420. hour_of_day = data.getActivityByHourOfDay()
  421. f.write('<table><tr><th>Hour</th>')
  422. for i in range(0, 24):
  423. f.write('<th>%d</th>' % i)
  424. f.write('</tr>\n<tr><th>Commits</th>')
  425. fp = open(path + '/hour_of_day.dat', 'w')
  426. for i in range(0, 24):
  427. if i in hour_of_day:
  428. r = 127 + int((float(hour_of_day[i]) / data.activity_by_hour_of_day_busiest) * 128)
  429. f.write('<td style="background-color: rgb(%d, 0, 0)">%d</td>' % (r, hour_of_day[i]))
  430. fp.write('%d %d\n' % (i, hour_of_day[i]))
  431. else:
  432. f.write('<td>0</td>')
  433. fp.write('%d 0\n' % i)
  434. fp.close()
  435. f.write('</tr>\n<tr><th>%</th>')
  436. totalcommits = data.getTotalCommits()
  437. for i in range(0, 24):
  438. if i in hour_of_day:
  439. r = 127 + int((float(hour_of_day[i]) / data.activity_by_hour_of_day_busiest) * 128)
  440. f.write('<td style="background-color: rgb(%d, 0, 0)">%.2f</td>' % (r, (100.0 * hour_of_day[i]) / totalcommits))
  441. else:
  442. f.write('<td>0.00</td>')
  443. f.write('</tr></table>')
  444. f.write('<img src="hour_of_day.png" alt="Hour of Day" />')
  445. fg = open(path + '/hour_of_day.dat', 'w')
  446. for i in range(0, 24):
  447. if i in hour_of_day:
  448. fg.write('%d %d\n' % (i + 1, hour_of_day[i]))
  449. else:
  450. fg.write('%d 0\n' % (i + 1))
  451. fg.close()
  452. # Day of Week
  453. f.write(html_header(2, 'Day of Week'))
  454. day_of_week = data.getActivityByDayOfWeek()
  455. f.write('<div class="vtable"><table>')
  456. f.write('<tr><th>Day</th><th>Total (%)</th></tr>')
  457. fp = open(path + '/day_of_week.dat', 'w')
  458. for d in range(0, 7):
  459. commits = 0
  460. if d in day_of_week:
  461. commits = day_of_week[d]
  462. fp.write('%d %d\n' % (d + 1, commits))
  463. f.write('<tr>')
  464. f.write('<th>%d</th>' % (d + 1))
  465. if d in day_of_week:
  466. f.write('<td>%d (%.2f%%)</td>' % (day_of_week[d], (100.0 * day_of_week[d]) / totalcommits))
  467. else:
  468. f.write('<td>0</td>')
  469. f.write('</tr>')
  470. f.write('</table></div>')
  471. f.write('<img src="day_of_week.png" alt="Day of Week" />')
  472. fp.close()
  473. # Hour of Week
  474. f.write(html_header(2, 'Hour of Week'))
  475. f.write('<table>')
  476. f.write('<tr><th>Weekday</th>')
  477. for hour in range(0, 24):
  478. f.write('<th>%d</th>' % (hour))
  479. f.write('</tr>')
  480. for weekday in range(0, 7):
  481. f.write('<tr><th>%d</th>' % (weekday + 1))
  482. for hour in range(0, 24):
  483. try:
  484. commits = data.activity_by_hour_of_week[weekday][hour]
  485. except KeyError:
  486. commits = 0
  487. if commits != 0:
  488. f.write('<td')
  489. r = 127 + int((float(commits) / data.activity_by_hour_of_week_busiest) * 128)
  490. f.write(' style="background-color: rgb(%d, 0, 0)"' % r)
  491. f.write('>%d</td>' % commits)
  492. else:
  493. f.write('<td></td>')
  494. f.write('</tr>')
  495. f.write('</table>')
  496. # Month of Year
  497. f.write(html_header(2, 'Month of Year'))
  498. f.write('<div class="vtable"><table>')
  499. f.write('<tr><th>Month</th><th>Commits (%)</th></tr>')
  500. fp = open (path + '/month_of_year.dat', 'w')
  501. for mm in range(1, 13):
  502. commits = 0
  503. if mm in data.activity_by_month_of_year:
  504. commits = data.activity_by_month_of_year[mm]
  505. f.write('<tr><td>%d</td><td>%d (%.2f %%)</td></tr>' % (mm, commits, (100.0 * commits) / data.getTotalCommits()))
  506. fp.write('%d %d\n' % (mm, commits))
  507. fp.close()
  508. f.write('</table></div>')
  509. f.write('<img src="month_of_year.png" alt="Month of Year" />')
  510. # Commits by year/month
  511. f.write(html_header(2, 'Commits by year/month'))
  512. f.write('<div class="vtable"><table><tr><th>Month</th><th>Commits</th></tr>')
  513. for yymm in reversed(sorted(data.commits_by_month.keys())):
  514. f.write('<tr><td>%s</td><td>%d</td></tr>' % (yymm, data.commits_by_month[yymm]))
  515. f.write('</table></div>')
  516. f.write('<img src="commits_by_year_month.png" alt="Commits by year/month" />')
  517. fg = open(path + '/commits_by_year_month.dat', 'w')
  518. for yymm in sorted(data.commits_by_month.keys()):
  519. fg.write('%s %s\n' % (yymm, data.commits_by_month[yymm]))
  520. fg.close()
  521. # Commits by year
  522. f.write(html_header(2, 'Commits by Year'))
  523. f.write('<div class="vtable"><table><tr><th>Year</th><th>Commits (% of all)</th></tr>')
  524. for yy in reversed(sorted(data.commits_by_year.keys())):
  525. f.write('<tr><td>%s</td><td>%d (%.2f%%)</td></tr>' % (yy, data.commits_by_year[yy], (100.0 * data.commits_by_year[yy]) / data.getTotalCommits()))
  526. f.write('</table></div>')
  527. f.write('<img src="commits_by_year.png" alt="Commits by Year" />')
  528. fg = open(path + '/commits_by_year.dat', 'w')
  529. for yy in sorted(data.commits_by_year.keys()):
  530. fg.write('%d %d\n' % (yy, data.commits_by_year[yy]))
  531. fg.close()
  532. f.write('</body></html>')
  533. f.close()
  534. ###
  535. # Authors
  536. f = open(path + '/authors.html', 'w')
  537. self.printHeader(f)
  538. f.write('<h1>Authors</h1>')
  539. self.printNav(f)
  540. # Authors :: List of authors
  541. f.write(html_header(2, 'List of Authors'))
  542. f.write('<table class="authors sortable" id="authors">')
  543. f.write('<tr><th>Author</th><th>Commits (%)</th><th>First commit</th><th>Last commit</th><th class="unsortable">Age</th><th># by commits</th></tr>')
  544. for author in sorted(data.getAuthors()):
  545. info = data.getAuthorInfo(author)
  546. f.write('<tr><td>%s</td><td>%d (%.2f%%)</td><td>%s</td><td>%s</td><td>%s</td><td>%d</td></tr>' % (author, info['commits'], info['commits_frac'], info['date_first'], info['date_last'], info['timedelta'], info['place_by_commits']))
  547. f.write('</table>')
  548. # Authors :: Author of Month
  549. f.write(html_header(2, 'Author of Month'))
  550. f.write('<table class="sortable" id="aom">')
  551. f.write('<tr><th>Month</th><th>Author</th><th>Commits (%)</th><th class="unsortable">Next top 5</th></tr>')
  552. for yymm in reversed(sorted(data.author_of_month.keys())):
  553. authordict = data.author_of_month[yymm]
  554. authors = getkeyssortedbyvalues(authordict)
  555. authors.reverse()
  556. commits = data.author_of_month[yymm][authors[0]]
  557. next = ', '.join(authors[1:5])
  558. f.write('<tr><td>%s</td><td>%s</td><td>%d (%.2f%% of %d)</td><td>%s</td></tr>' % (yymm, authors[0], commits, (100 * commits) / data.commits_by_month[yymm], data.commits_by_month[yymm], next))
  559. f.write('</table>')
  560. f.write(html_header(2, 'Author of Year'))
  561. f.write('<table class="sortable" id="aoy"><tr><th>Year</th><th>Author</th><th>Commits (%)</th><th class="unsortable">Next top 5</th></tr>')
  562. for yy in reversed(sorted(data.author_of_year.keys())):
  563. authordict = data.author_of_year[yy]
  564. authors = getkeyssortedbyvalues(authordict)
  565. authors.reverse()
  566. commits = data.author_of_year[yy][authors[0]]
  567. next = ', '.join(authors[1:5])
  568. f.write('<tr><td>%s</td><td>%s</td><td>%d (%.2f%% of %d)</td><td>%s</td></tr>' % (yy, authors[0], commits, (100 * commits) / data.commits_by_year[yy], data.commits_by_year[yy], next))
  569. f.write('</table>')
  570. f.write('</body></html>')
  571. f.close()
  572. ###
  573. # Files
  574. f = open(path + '/files.html', 'w')
  575. self.printHeader(f)
  576. f.write('<h1>Files</h1>')
  577. self.printNav(f)
  578. f.write('<dl>\n')
  579. f.write('<dt>Total files</dt><dd>%d</dd>' % data.getTotalFiles())
  580. f.write('<dt>Total lines</dt><dd>%d</dd>' % data.getTotalLOC())
  581. f.write('<dt>Average file size</dt><dd>%.2f bytes</dd>' % ((100.0 * data.getTotalLOC()) / data.getTotalFiles()))
  582. f.write('</dl>\n')
  583. # Files :: File count by date
  584. f.write(html_header(2, 'File count by date'))
  585. fg = open(path + '/files_by_date.dat', 'w')
  586. for stamp in sorted(data.files_by_stamp.keys()):
  587. fg.write('%s %d\n' % (datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d'), data.files_by_stamp[stamp]))
  588. fg.close()
  589. f.write('<img src="files_by_date.png" alt="Files by Date" />')
  590. #f.write('<h2>Average file size by date</h2>')
  591. # Files :: Extensions
  592. f.write(html_header(2, 'Extensions'))
  593. f.write('<table class="sortable" id="ext"><tr><th>Extension</th><th>Files (%)</th><th>Lines (%)</th><th>Lines/file</th></tr>')
  594. for ext in sorted(data.extensions.keys()):
  595. files = data.extensions[ext]['files']
  596. lines = data.extensions[ext]['lines']
  597. f.write('<tr><td>%s</td><td>%d (%.2f%%)</td><td>%d (%.2f%%)</td><td>%d</td></tr>' % (ext, files, (100.0 * files) / data.getTotalFiles(), lines, (100.0 * lines) / data.getTotalLOC(), lines / files))
  598. f.write('</table>')
  599. f.write('</body></html>')
  600. f.close()
  601. ###
  602. # Lines
  603. f = open(path + '/lines.html', 'w')
  604. self.printHeader(f)
  605. f.write('<h1>Lines</h1>')
  606. self.printNav(f)
  607. f.write('<dl>\n')
  608. f.write('<dt>Total lines</dt><dd>%d</dd>' % data.getTotalLOC())
  609. f.write('</dl>\n')
  610. f.write(html_header(2, 'Lines of Code'))
  611. f.write('<img src="lines_of_code.png" />')
  612. fg = open(path + '/lines_of_code.dat', 'w')
  613. for stamp in sorted(data.changes_by_date.keys()):
  614. fg.write('%d %d\n' % (stamp, data.changes_by_date[stamp]['lines']))
  615. fg.close()
  616. f.write('</body></html>')
  617. f.close()
  618. ###
  619. # tags.html
  620. f = open(path + '/tags.html', 'w')
  621. self.printHeader(f)
  622. f.write('<h1>Tags</h1>')
  623. self.printNav(f)
  624. f.write('<dl>')
  625. f.write('<dt>Total tags</dt><dd>%d</dd>' % len(data.tags))
  626. if len(data.tags) > 0:
  627. f.write('<dt>Average commits per tag</dt><dd>%.2f</dd>' % (data.getTotalCommits() / len(data.tags)))
  628. f.write('</dl>')
  629. f.write('<table>')
  630. f.write('<tr><th>Name</th><th>Date</th></tr>')
  631. # sort the tags by date desc
  632. tags_sorted_by_date_desc = map(lambda el : el[1], reversed(sorted(map(lambda el : (el[1]['date'], el[0]), data.tags.items()))))
  633. for tag in tags_sorted_by_date_desc:
  634. f.write('<tr><td>%s</td><td>%s</td></tr>' % (tag, data.tags[tag]['date']))
  635. f.write('</table>')
  636. f.write('</body></html>')
  637. f.close()
  638. self.createGraphs(path)
  639. def createGraphs(self, path):
  640. print 'Generating graphs...'
  641. # hour of day
  642. f = open(path + '/hour_of_day.plot', 'w')
  643. f.write(GNUPLOT_COMMON)
  644. f.write(
  645. """
  646. set output 'hour_of_day.png'
  647. unset key
  648. set xrange [0.5:24.5]
  649. set xtics 4
  650. set ylabel "Commits"
  651. plot 'hour_of_day.dat' using 1:2:(0.5) w boxes fs solid
  652. """)
  653. f.close()
  654. # day of week
  655. f = open(path + '/day_of_week.plot', 'w')
  656. f.write(GNUPLOT_COMMON)
  657. f.write(
  658. """
  659. set output 'day_of_week.png'
  660. unset key
  661. set xrange [0.5:7.5]
  662. set xtics 1
  663. set ylabel "Commits"
  664. plot 'day_of_week.dat' using 1:2:(0.5) w boxes fs solid
  665. """)
  666. f.close()
  667. # Month of Year
  668. f = open(path + '/month_of_year.plot', 'w')
  669. f.write(GNUPLOT_COMMON)
  670. f.write(
  671. """
  672. set output 'month_of_year.png'
  673. unset key
  674. set xrange [0.5:12.5]
  675. set xtics 1
  676. set ylabel "Commits"
  677. plot 'month_of_year.dat' using 1:2:(0.5) w boxes fs solid
  678. """)
  679. f.close()
  680. # commits_by_year_month
  681. f = open(path + '/commits_by_year_month.plot', 'w')
  682. f.write(GNUPLOT_COMMON)
  683. f.write(
  684. """
  685. set output 'commits_by_year_month.png'
  686. unset key
  687. set xdata time
  688. set timefmt "%Y-%m"
  689. set format x "%Y-%m"
  690. set xtics rotate by 90 15768000
  691. set bmargin 5
  692. set ylabel "Commits"
  693. plot 'commits_by_year_month.dat' using 1:2:(0.5) w boxes fs solid
  694. """)
  695. f.close()
  696. # commits_by_year
  697. f = open(path + '/commits_by_year.plot', 'w')
  698. f.write(GNUPLOT_COMMON)
  699. f.write(
  700. """
  701. set output 'commits_by_year.png'
  702. unset key
  703. set xtics 1
  704. set ylabel "Commits"
  705. set yrange [0:]
  706. plot 'commits_by_year.dat' using 1:2:(0.5) w boxes fs solid
  707. """)
  708. f.close()
  709. # Files by date
  710. f = open(path + '/files_by_date.plot', 'w')
  711. f.write(GNUPLOT_COMMON)
  712. f.write(
  713. """
  714. set output 'files_by_date.png'
  715. unset key
  716. set xdata time
  717. set timefmt "%Y-%m-%d"
  718. set format x "%Y-%m-%d"
  719. set ylabel "Files"
  720. set xtics rotate by 90
  721. set bmargin 6
  722. plot 'files_by_date.dat' using 1:2 w histeps
  723. """)
  724. f.close()
  725. # Lines of Code
  726. f = open(path + '/lines_of_code.plot', 'w')
  727. f.write(GNUPLOT_COMMON)
  728. f.write(
  729. """
  730. set output 'lines_of_code.png'
  731. unset key
  732. set xdata time
  733. set timefmt "%s"
  734. set format x "%Y-%m-%d"
  735. set ylabel "Lines"
  736. set xtics rotate by 90
  737. set bmargin 6
  738. plot 'lines_of_code.dat' using 1:2 w lines
  739. """)
  740. f.close()
  741. os.chdir(path)
  742. files = glob.glob(path + '/*.plot')
  743. for f in files:
  744. out = getpipeoutput([gnuplot_cmd + ' "%s"' % f])
  745. if len(out) > 0:
  746. print out
  747. def printHeader(self, f, title = ''):
  748. f.write(
  749. """<?xml version="1.0" encoding="UTF-8"?>
  750. <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
  751. <html xmlns="http://www.w3.org/1999/xhtml">
  752. <head>
  753. <title>GitStats - %s</title>
  754. <link rel="stylesheet" href="gitstats.css" type="text/css" />
  755. <meta name="generator" content="GitStats %s" />
  756. <script type="text/javascript" src="sortable.js"></script>
  757. </head>
  758. <body>
  759. """ % (self.title, getversion()))
  760. def printNav(self, f):
  761. f.write("""
  762. <div class="nav">
  763. <ul>
  764. <li><a href="index.html">General</a></li>
  765. <li><a href="activity.html">Activity</a></li>
  766. <li><a href="authors.html">Authors</a></li>
  767. <li><a href="files.html">Files</a></li>
  768. <li><a href="lines.html">Lines</a></li>
  769. <li><a href="tags.html">Tags</a></li>
  770. </ul>
  771. </div>
  772. """)
  773. usage = """
  774. Usage: gitstats [options] <gitpath> <outputpath>
  775. Options:
  776. """
  777. if len(sys.argv) < 3:
  778. print usage
  779. sys.exit(0)
  780. gitpath = sys.argv[1]
  781. outputpath = os.path.abspath(sys.argv[2])
  782. rundir = os.getcwd()
  783. try:
  784. os.makedirs(outputpath)
  785. except OSError:
  786. pass
  787. if not os.path.isdir(outputpath):
  788. print 'FATAL: Output path is not a directory or does not exist'
  789. sys.exit(1)
  790. print 'Git path: %s' % gitpath
  791. print 'Output path: %s' % outputpath
  792. os.chdir(gitpath)
  793. cachefile = os.path.join(outputpath, 'gitstats.cache')
  794. print 'Collecting data...'
  795. data = GitDataCollector()
  796. data.loadCache(cachefile)
  797. data.collect(gitpath)
  798. print 'Refining data...'
  799. data.saveCache(cachefile)
  800. data.refine()
  801. os.chdir(rundir)
  802. print 'Generating report...'
  803. report = HTMLReportCreator()
  804. report.create(data, outputpath)
  805. time_end = time.time()
  806. exectime_internal = time_end - time_start
  807. print 'Execution time %.5f secs, %.5f secs (%.2f %%) in external commands)' % (exectime_internal, exectime_external, (100.0 * exectime_external) / exectime_internal)