123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935
  1. #!/usr/bin/env python
  2. # Copyright (c) 2007-2008 Heikki Hokkanen <hoxu@users.sf.net>
  3. # GPLv2 / GPLv3
  4. import subprocess
  5. import datetime
  6. import glob
  7. import os
  8. import pickle
  9. import re
  10. import shutil
  11. import sys
  12. import time
  13. import zlib
  14. GNUPLOT_COMMON = 'set terminal png transparent\nset size 0.5,0.5\n'
  15. MAX_EXT_LENGTH = 10 # maximum file extension length
  16. exectime_internal = 0.0
  17. exectime_external = 0.0
  18. time_start = time.time()
  19. # By default, gnuplot is searched from path, but can be overridden with the
  20. # environment variable "GNUPLOT"
  21. gnuplot_cmd = 'gnuplot'
  22. if 'GNUPLOT' in os.environ:
  23. gnuplot_cmd = os.environ['GNUPLOT']
  24. def getpipeoutput(cmds, quiet = False):
  25. global exectime_external
  26. start = time.time()
  27. if not quiet:
  28. print '>> ' + ' | '.join(cmds),
  29. sys.stdout.flush()
  30. p0 = subprocess.Popen(cmds[0], stdout = subprocess.PIPE, shell = True)
  31. p = p0
  32. for x in cmds[1:]:
  33. p = subprocess.Popen(x, stdin = p0.stdout, stdout = subprocess.PIPE, shell = True)
  34. p0 = p
  35. output = p.communicate()[0]
  36. end = time.time()
  37. if not quiet:
  38. print '\r[%.5f] >> %s' % (end - start, ' | '.join(cmds))
  39. exectime_external += (end - start)
  40. return output.rstrip('\n')
  41. def getkeyssortedbyvalues(dict):
  42. return map(lambda el : el[1], sorted(map(lambda el : (el[1], el[0]), dict.items())))
  43. # dict['author'] = { 'commits': 512 } - ...key(dict, 'commits')
  44. def getkeyssortedbyvaluekey(d, key):
  45. return map(lambda el : el[1], sorted(map(lambda el : (d[el][key], el), d.keys())))
  46. class DataCollector:
  47. """Manages data collection from a revision control repository."""
  48. def __init__(self):
  49. self.stamp_created = time.time()
  50. self.cache = {}
  51. ##
  52. # This should be the main function to extract data from the repository.
  53. def collect(self, dir):
  54. self.dir = dir
  55. self.projectname = os.path.basename(os.path.abspath(dir))
  56. ##
  57. # Load cacheable data
  58. def loadCache(self, cachefile):
  59. if not os.path.exists(cachefile):
  60. return
  61. print 'Loading cache...'
  62. f = open(cachefile)
  63. try:
  64. self.cache = pickle.loads(zlib.decompress(f.read()))
  65. except:
  66. # temporary hack to upgrade non-compressed caches
  67. f.seek(0)
  68. self.cache = pickle.load(f)
  69. f.close()
  70. ##
  71. # Produce any additional statistics from the extracted data.
  72. def refine(self):
  73. pass
  74. ##
  75. # : get a dictionary of author
  76. def getAuthorInfo(self, author):
  77. return None
  78. def getActivityByDayOfWeek(self):
  79. return {}
  80. def getActivityByHourOfDay(self):
  81. return {}
  82. ##
  83. # Get a list of authors
  84. def getAuthors(self):
  85. return []
  86. def getFirstCommitDate(self):
  87. return datetime.datetime.now()
  88. def getLastCommitDate(self):
  89. return datetime.datetime.now()
  90. def getStampCreated(self):
  91. return self.stamp_created
  92. def getTags(self):
  93. return []
  94. def getTotalAuthors(self):
  95. return -1
  96. def getTotalCommits(self):
  97. return -1
  98. def getTotalFiles(self):
  99. return -1
  100. def getTotalLOC(self):
  101. return -1
  102. ##
  103. # Save cacheable data
  104. def saveCache(self, filename):
  105. print 'Saving cache...'
  106. f = open(cachefile, 'w')
  107. #pickle.dump(self.cache, f)
  108. data = zlib.compress(pickle.dumps(self.cache))
  109. f.write(data)
  110. f.close()
  111. class GitDataCollector(DataCollector):
  112. def collect(self, dir):
  113. DataCollector.collect(self, dir)
  114. try:
  115. self.total_authors = int(getpipeoutput(['git log', 'git shortlog -s', 'wc -l']))
  116. except:
  117. self.total_authors = 0
  118. #self.total_lines = int(getoutput('git-ls-files -z |xargs -0 cat |wc -l'))
  119. self.activity_by_hour_of_day = {} # hour -> commits
  120. self.activity_by_day_of_week = {} # day -> commits
  121. self.activity_by_month_of_year = {} # month [1-12] -> commits
  122. self.activity_by_hour_of_week = {} # weekday -> hour -> commits
  123. self.activity_by_hour_of_day_busiest = 0
  124. self.activity_by_hour_of_week_busiest = 0
  125. self.authors = {} # name -> {commits, first_commit_stamp, last_commit_stamp}
  126. # author of the month
  127. self.author_of_month = {} # month -> author -> commits
  128. self.author_of_year = {} # year -> author -> commits
  129. self.commits_by_month = {} # month -> commits
  130. self.commits_by_year = {} # year -> commits
  131. self.first_commit_stamp = 0
  132. self.last_commit_stamp = 0
  133. # tags
  134. self.tags = {}
  135. lines = getpipeoutput(['git show-ref --tags']).split('\n')
  136. for line in lines:
  137. if len(line) == 0:
  138. continue
  139. (hash, tag) = line.split(' ')
  140. tag = tag.replace('refs/tags/', '')
  141. output = getpipeoutput(['git log "%s" --pretty=format:"%%at %%an" -n 1' % hash])
  142. if len(output) > 0:
  143. parts = output.split(' ')
  144. stamp = 0
  145. try:
  146. stamp = int(parts[0])
  147. except ValueError:
  148. stamp = 0
  149. self.tags[tag] = { 'stamp': stamp, 'hash' : hash, 'date' : datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d') }
  150. # Collect revision statistics
  151. # Outputs "<stamp> <author>"
  152. lines = getpipeoutput(['git rev-list --pretty=format:"%at %an" HEAD', 'grep -v ^commit']).split('\n')
  153. for line in lines:
  154. # linux-2.6 says "<unknown>" for one line O_o
  155. parts = line.split(' ')
  156. author = ''
  157. try:
  158. stamp = int(parts[0])
  159. except ValueError:
  160. stamp = 0
  161. if len(parts) > 1:
  162. author = ' '.join(parts[1:])
  163. date = datetime.datetime.fromtimestamp(float(stamp))
  164. # First and last commit stamp
  165. if self.last_commit_stamp == 0:
  166. self.last_commit_stamp = stamp
  167. self.first_commit_stamp = stamp
  168. # activity
  169. # hour
  170. hour = date.hour
  171. if hour in self.activity_by_hour_of_day:
  172. self.activity_by_hour_of_day[hour] += 1
  173. else:
  174. self.activity_by_hour_of_day[hour] = 1
  175. # most active hour?
  176. if self.activity_by_hour_of_day[hour] > self.activity_by_hour_of_day_busiest:
  177. self.activity_by_hour_of_day_busiest = self.activity_by_hour_of_day[hour]
  178. # day of week
  179. day = date.weekday()
  180. if day in self.activity_by_day_of_week:
  181. self.activity_by_day_of_week[day] += 1
  182. else:
  183. self.activity_by_day_of_week[day] = 1
  184. # hour of week
  185. if day not in self.activity_by_hour_of_week:
  186. self.activity_by_hour_of_week[day] = {}
  187. if hour not in self.activity_by_hour_of_week[day]:
  188. self.activity_by_hour_of_week[day][hour] = 1
  189. else:
  190. self.activity_by_hour_of_week[day][hour] += 1
  191. # most active hour?
  192. if self.activity_by_hour_of_week[day][hour] > self.activity_by_hour_of_week_busiest:
  193. self.activity_by_hour_of_week_busiest = self.activity_by_hour_of_week[day][hour]
  194. # month of year
  195. month = date.month
  196. if month in self.activity_by_month_of_year:
  197. self.activity_by_month_of_year[month] += 1
  198. else:
  199. self.activity_by_month_of_year[month] = 1
  200. # author stats
  201. if author not in self.authors:
  202. self.authors[author] = {}
  203. # commits
  204. if 'last_commit_stamp' not in self.authors[author]:
  205. self.authors[author]['last_commit_stamp'] = stamp
  206. self.authors[author]['first_commit_stamp'] = stamp
  207. if 'commits' in self.authors[author]:
  208. self.authors[author]['commits'] += 1
  209. else:
  210. self.authors[author]['commits'] = 1
  211. # author of the month/year
  212. yymm = datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m')
  213. if yymm in self.author_of_month:
  214. if author in self.author_of_month[yymm]:
  215. self.author_of_month[yymm][author] += 1
  216. else:
  217. self.author_of_month[yymm][author] = 1
  218. else:
  219. self.author_of_month[yymm] = {}
  220. self.author_of_month[yymm][author] = 1
  221. if yymm in self.commits_by_month:
  222. self.commits_by_month[yymm] += 1
  223. else:
  224. self.commits_by_month[yymm] = 1
  225. yy = datetime.datetime.fromtimestamp(stamp).year
  226. if yy in self.author_of_year:
  227. if author in self.author_of_year[yy]:
  228. self.author_of_year[yy][author] += 1
  229. else:
  230. self.author_of_year[yy][author] = 1
  231. else:
  232. self.author_of_year[yy] = {}
  233. self.author_of_year[yy][author] = 1
  234. if yy in self.commits_by_year:
  235. self.commits_by_year[yy] += 1
  236. else:
  237. self.commits_by_year[yy] = 1
  238. # TODO Optimize this, it's the worst bottleneck
  239. # outputs "<stamp> <files>" for each revision
  240. self.files_by_stamp = {} # stamp -> files
  241. revlines = getpipeoutput(['git rev-list --pretty=format:"%at %T" HEAD', 'grep -v ^commit']).strip().split('\n')
  242. lines = []
  243. for revline in revlines:
  244. time, rev = revline.split(' ')
  245. #linecount = int(getpipeoutput(['git-ls-tree -r --name-only "%s"' % rev, 'wc -l']).split('\n')[0])
  246. linecount = self.getFilesInCommit(rev)
  247. lines.append('%d %d' % (int(time), linecount))
  248. self.total_commits = len(lines)
  249. for line in lines:
  250. parts = line.split(' ')
  251. if len(parts) != 2:
  252. continue
  253. (stamp, files) = parts[0:2]
  254. try:
  255. self.files_by_stamp[int(stamp)] = int(files)
  256. except ValueError:
  257. print 'Warning: failed to parse line "%s"' % line
  258. # extensions
  259. self.extensions = {} # extension -> files, lines
  260. lines = getpipeoutput(['git ls-files']).split('\n')
  261. self.total_files = len(lines)
  262. for line in lines:
  263. base = os.path.basename(line)
  264. # Ignore extensionless (including .hidden files)
  265. if base.find('.') == -1 or base.rfind('.') == 0:
  266. ext = ''
  267. else:
  268. ext = base[(base.rfind('.') + 1):]
  269. if len(ext) > MAX_EXT_LENGTH:
  270. ext = ''
  271. if ext not in self.extensions:
  272. self.extensions[ext] = {'files': 0, 'lines': 0}
  273. self.extensions[ext]['files'] += 1
  274. try:
  275. # Escaping could probably be improved here
  276. self.extensions[ext]['lines'] += int(getpipeoutput(['wc -l "%s"' % line]).split()[0])
  277. except:
  278. print 'Warning: Could not count lines for file "%s"' % line
  279. # line statistics
  280. # outputs:
  281. # N files changed, N insertions (+), N deletions(-)
  282. # <stamp> <author>
  283. self.changes_by_date = {} # stamp -> { files, ins, del }
  284. lines = getpipeoutput(['git log --shortstat --pretty=format:"%at %an"']).split('\n')
  285. lines.reverse()
  286. files = 0; inserted = 0; deleted = 0; total_lines = 0
  287. for line in lines:
  288. if len(line) == 0:
  289. continue
  290. # <stamp> <author>
  291. if line.find('files changed,') == -1:
  292. pos = line.find(' ')
  293. if pos != -1:
  294. try:
  295. (stamp, author) = (int(line[:pos]), line[pos+1:])
  296. self.changes_by_date[stamp] = { 'files': files, 'ins': inserted, 'del': deleted, 'lines': total_lines }
  297. except ValueError:
  298. print 'Warning: unexpected line "%s"' % line
  299. else:
  300. print 'Warning: unexpected line "%s"' % line
  301. else:
  302. numbers = re.findall('\d+', line)
  303. if len(numbers) == 3:
  304. (files, inserted, deleted) = map(lambda el : int(el), numbers)
  305. total_lines += inserted
  306. total_lines -= deleted
  307. else:
  308. print 'Warning: failed to handle line "%s"' % line
  309. (files, inserted, deleted) = (0, 0, 0)
  310. #self.changes_by_date[stamp] = { 'files': files, 'ins': inserted, 'del': deleted }
  311. self.total_lines = total_lines
  312. def refine(self):
  313. # authors
  314. # name -> {place_by_commits, commits_frac, date_first, date_last, timedelta}
  315. authors_by_commits = getkeyssortedbyvaluekey(self.authors, 'commits')
  316. authors_by_commits.reverse() # most first
  317. for i, name in enumerate(authors_by_commits):
  318. self.authors[name]['place_by_commits'] = i + 1
  319. for name in self.authors.keys():
  320. a = self.authors[name]
  321. a['commits_frac'] = (100 * float(a['commits'])) / self.getTotalCommits()
  322. date_first = datetime.datetime.fromtimestamp(a['first_commit_stamp'])
  323. date_last = datetime.datetime.fromtimestamp(a['last_commit_stamp'])
  324. delta = date_last - date_first
  325. a['date_first'] = date_first.strftime('%Y-%m-%d')
  326. a['date_last'] = date_last.strftime('%Y-%m-%d')
  327. a['timedelta'] = delta
  328. def getActivityByDayOfWeek(self):
  329. return self.activity_by_day_of_week
  330. def getActivityByHourOfDay(self):
  331. return self.activity_by_hour_of_day
  332. def getAuthorInfo(self, author):
  333. return self.authors[author]
  334. def getAuthors(self):
  335. return self.authors.keys()
  336. def getFilesInCommit(self, rev):
  337. try:
  338. res = self.cache['files_in_tree'][rev]
  339. except:
  340. res = int(getpipeoutput(['git ls-tree -r --name-only "%s"' % rev, 'wc -l']).split('\n')[0])
  341. if 'files_in_tree' not in self.cache:
  342. self.cache['files_in_tree'] = {}
  343. self.cache['files_in_tree'][rev] = res
  344. return res
  345. def getFirstCommitDate(self):
  346. return datetime.datetime.fromtimestamp(self.first_commit_stamp)
  347. def getLastCommitDate(self):
  348. return datetime.datetime.fromtimestamp(self.last_commit_stamp)
  349. def getTags(self):
  350. lines = getpipeoutput(['git show-ref --tags', 'cut -d/ -f3'])
  351. return lines.split('\n')
  352. def getTagDate(self, tag):
  353. return self.revToDate('tags/' + tag)
  354. def getTotalAuthors(self):
  355. return self.total_authors
  356. def getTotalCommits(self):
  357. return self.total_commits
  358. def getTotalFiles(self):
  359. return self.total_files
  360. def getTotalLOC(self):
  361. return self.total_lines
  362. def revToDate(self, rev):
  363. stamp = int(getpipeoutput(['git log --pretty=format:%%at "%s" -n 1' % rev]))
  364. return datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d')
  365. class ReportCreator:
  366. """Creates the actual report based on given data."""
  367. def __init__(self):
  368. pass
  369. def create(self, data, path):
  370. self.data = data
  371. self.path = path
  372. def html_linkify(text):
  373. return text.lower().replace(' ', '_')
  374. def html_header(level, text):
  375. name = html_linkify(text)
  376. return '\n<h%d><a href="#%s" name="%s">%s</a></h%d>\n\n' % (level, name, name, text, level)
  377. class HTMLReportCreator(ReportCreator):
  378. def create(self, data, path):
  379. ReportCreator.create(self, data, path)
  380. self.title = data.projectname
  381. # copy static files if they do not exist
  382. for file in ('gitstats.css', 'sortable.js', 'arrow-up.gif', 'arrow-down.gif', 'arrow-none.gif'):
  383. basedir = os.path.dirname(os.path.abspath(__file__))
  384. shutil.copyfile(basedir + '/' + file, path + '/' + file)
  385. f = open(path + "/index.html", 'w')
  386. format = '%Y-%m-%d %H:%m:%S'
  387. self.printHeader(f)
  388. f.write('<h1>GitStats - %s</h1>' % data.projectname)
  389. self.printNav(f)
  390. f.write('<dl>');
  391. f.write('<dt>Project name</dt><dd>%s</dd>' % (data.projectname))
  392. f.write('<dt>Generated</dt><dd>%s (in %d seconds)</dd>' % (datetime.datetime.now().strftime(format), time.time() - data.getStampCreated()));
  393. f.write('<dt>Report Period</dt><dd>%s to %s</dd>' % (data.getFirstCommitDate().strftime(format), data.getLastCommitDate().strftime(format)))
  394. f.write('<dt>Total Files</dt><dd>%s</dd>' % data.getTotalFiles())
  395. f.write('<dt>Total Lines of Code</dt><dd>%s</dd>' % data.getTotalLOC())
  396. f.write('<dt>Total Commits</dt><dd>%s</dd>' % data.getTotalCommits())
  397. f.write('<dt>Authors</dt><dd>%s</dd>' % data.getTotalAuthors())
  398. f.write('</dl>');
  399. f.write('</body>\n</html>');
  400. f.close()
  401. ###
  402. # Activity
  403. f = open(path + '/activity.html', 'w')
  404. self.printHeader(f)
  405. f.write('<h1>Activity</h1>')
  406. self.printNav(f)
  407. #f.write('<h2>Last 30 days</h2>')
  408. #f.write('<h2>Last 12 months</h2>')
  409. # Hour of Day
  410. f.write(html_header(2, 'Hour of Day'))
  411. hour_of_day = data.getActivityByHourOfDay()
  412. f.write('<table><tr><th>Hour</th>')
  413. for i in range(1, 25):
  414. f.write('<th>%d</th>' % i)
  415. f.write('</tr>\n<tr><th>Commits</th>')
  416. fp = open(path + '/hour_of_day.dat', 'w')
  417. for i in range(0, 24):
  418. if i in hour_of_day:
  419. r = 127 + int((float(hour_of_day[i]) / data.activity_by_hour_of_day_busiest) * 128)
  420. f.write('<td style="background-color: rgb(%d, 0, 0)">%d</td>' % (r, hour_of_day[i]))
  421. fp.write('%d %d\n' % (i, hour_of_day[i]))
  422. else:
  423. f.write('<td>0</td>')
  424. fp.write('%d 0\n' % i)
  425. fp.close()
  426. f.write('</tr>\n<tr><th>%</th>')
  427. totalcommits = data.getTotalCommits()
  428. for i in range(0, 24):
  429. if i in hour_of_day:
  430. r = 127 + int((float(hour_of_day[i]) / data.activity_by_hour_of_day_busiest) * 128)
  431. f.write('<td style="background-color: rgb(%d, 0, 0)">%.2f</td>' % (r, (100.0 * hour_of_day[i]) / totalcommits))
  432. else:
  433. f.write('<td>0.00</td>')
  434. f.write('</tr></table>')
  435. f.write('<img src="hour_of_day.png" alt="Hour of Day" />')
  436. fg = open(path + '/hour_of_day.dat', 'w')
  437. for i in range(0, 24):
  438. if i in hour_of_day:
  439. fg.write('%d %d\n' % (i + 1, hour_of_day[i]))
  440. else:
  441. fg.write('%d 0\n' % (i + 1))
  442. fg.close()
  443. # Day of Week
  444. f.write(html_header(2, 'Day of Week'))
  445. day_of_week = data.getActivityByDayOfWeek()
  446. f.write('<div class="vtable"><table>')
  447. f.write('<tr><th>Day</th><th>Total (%)</th></tr>')
  448. fp = open(path + '/day_of_week.dat', 'w')
  449. for d in range(0, 7):
  450. commits = 0
  451. if d in day_of_week:
  452. commits = day_of_week[d]
  453. fp.write('%d %d\n' % (d + 1, commits))
  454. f.write('<tr>')
  455. f.write('<th>%d</th>' % (d + 1))
  456. if d in day_of_week:
  457. f.write('<td>%d (%.2f%%)</td>' % (day_of_week[d], (100.0 * day_of_week[d]) / totalcommits))
  458. else:
  459. f.write('<td>0</td>')
  460. f.write('</tr>')
  461. f.write('</table></div>')
  462. f.write('<img src="day_of_week.png" alt="Day of Week" />')
  463. fp.close()
  464. # Hour of Week
  465. f.write(html_header(2, 'Hour of Week'))
  466. f.write('<table>')
  467. f.write('<tr><th>Weekday</th>')
  468. for hour in range(0, 24):
  469. f.write('<th>%d</th>' % (hour + 1))
  470. f.write('</tr>')
  471. for weekday in range(0, 7):
  472. f.write('<tr><th>%d</th>' % (weekday + 1))
  473. for hour in range(0, 24):
  474. try:
  475. commits = data.activity_by_hour_of_week[weekday][hour]
  476. except KeyError:
  477. commits = 0
  478. if commits != 0:
  479. f.write('<td');
  480. r = 127 + int((float(commits) / data.activity_by_hour_of_week_busiest) * 128)
  481. f.write(' style="background-color: rgb(%d, 0, 0)"' % r);
  482. f.write('>%d</td>' % commits);
  483. else:
  484. f.write('<td></td>')
  485. f.write('</tr>')
  486. f.write('</table>')
  487. # Month of Year
  488. f.write(html_header(2, 'Month of Year'))
  489. f.write('<div class="vtable"><table>')
  490. f.write('<tr><th>Month</th><th>Commits (%)</th></tr>')
  491. fp = open (path + '/month_of_year.dat', 'w')
  492. for mm in range(1, 13):
  493. commits = 0
  494. if mm in data.activity_by_month_of_year:
  495. commits = data.activity_by_month_of_year[mm]
  496. f.write('<tr><td>%d</td><td>%d (%.2f %%)</td></tr>' % (mm, commits, (100.0 * commits) / data.getTotalCommits()))
  497. fp.write('%d %d\n' % (mm, commits))
  498. fp.close()
  499. f.write('</table></div>')
  500. f.write('<img src="month_of_year.png" alt="Month of Year" />')
  501. # Commits by year/month
  502. f.write(html_header(2, 'Commits by year/month'))
  503. f.write('<div class="vtable"><table><tr><th>Month</th><th>Commits</th></tr>')
  504. for yymm in reversed(sorted(data.commits_by_month.keys())):
  505. f.write('<tr><td>%s</td><td>%d</td></tr>' % (yymm, data.commits_by_month[yymm]))
  506. f.write('</table></div>')
  507. f.write('<img src="commits_by_year_month.png" alt="Commits by year/month" />')
  508. fg = open(path + '/commits_by_year_month.dat', 'w')
  509. for yymm in sorted(data.commits_by_month.keys()):
  510. fg.write('%s %s\n' % (yymm, data.commits_by_month[yymm]))
  511. fg.close()
  512. # Commits by year
  513. f.write(html_header(2, 'Commits by Year'))
  514. f.write('<div class="vtable"><table><tr><th>Year</th><th>Commits (% of all)</th></tr>')
  515. for yy in reversed(sorted(data.commits_by_year.keys())):
  516. f.write('<tr><td>%s</td><td>%d (%.2f%%)</td></tr>' % (yy, data.commits_by_year[yy], (100.0 * data.commits_by_year[yy]) / data.getTotalCommits()))
  517. f.write('</table></div>')
  518. f.write('<img src="commits_by_year.png" alt="Commits by Year" />')
  519. fg = open(path + '/commits_by_year.dat', 'w')
  520. for yy in sorted(data.commits_by_year.keys()):
  521. fg.write('%d %d\n' % (yy, data.commits_by_year[yy]))
  522. fg.close()
  523. f.write('</body></html>')
  524. f.close()
  525. ###
  526. # Authors
  527. f = open(path + '/authors.html', 'w')
  528. self.printHeader(f)
  529. f.write('<h1>Authors</h1>')
  530. self.printNav(f)
  531. # Authors :: List of authors
  532. f.write(html_header(2, 'List of Authors'))
  533. f.write('<table class="authors sortable" id="authors">')
  534. f.write('<tr><th>Author</th><th>Commits (%)</th><th>First commit</th><th>Last commit</th><th class="unsortable">Age</th><th># by commits</th></tr>')
  535. for author in sorted(data.getAuthors()):
  536. info = data.getAuthorInfo(author)
  537. f.write('<tr><td>%s</td><td>%d (%.2f%%)</td><td>%s</td><td>%s</td><td>%s</td><td>%d</td></tr>' % (author, info['commits'], info['commits_frac'], info['date_first'], info['date_last'], info['timedelta'], info['place_by_commits']))
  538. f.write('</table>')
  539. # Authors :: Author of Month
  540. f.write(html_header(2, 'Author of Month'))
  541. f.write('<table class="sortable" id="aom">')
  542. f.write('<tr><th>Month</th><th>Author</th><th>Commits (%)</th><th class="unsortable">Next top 5</th></tr>')
  543. for yymm in reversed(sorted(data.author_of_month.keys())):
  544. authordict = data.author_of_month[yymm]
  545. authors = getkeyssortedbyvalues(authordict)
  546. authors.reverse()
  547. commits = data.author_of_month[yymm][authors[0]]
  548. next = ', '.join(authors[1:5])
  549. f.write('<tr><td>%s</td><td>%s</td><td>%d (%.2f%% of %d)</td><td>%s</td></tr>' % (yymm, authors[0], commits, (100 * commits) / data.commits_by_month[yymm], data.commits_by_month[yymm], next))
  550. f.write('</table>')
  551. f.write(html_header(2, 'Author of Year'))
  552. f.write('<table class="sortable" id="aoy"><tr><th>Year</th><th>Author</th><th>Commits (%)</th><th class="unsortable">Next top 5</th></tr>')
  553. for yy in reversed(sorted(data.author_of_year.keys())):
  554. authordict = data.author_of_year[yy]
  555. authors = getkeyssortedbyvalues(authordict)
  556. authors.reverse()
  557. commits = data.author_of_year[yy][authors[0]]
  558. next = ', '.join(authors[1:5])
  559. f.write('<tr><td>%s</td><td>%s</td><td>%d (%.2f%% of %d)</td><td>%s</td></tr>' % (yy, authors[0], commits, (100 * commits) / data.commits_by_year[yy], data.commits_by_year[yy], next))
  560. f.write('</table>')
  561. f.write('</body></html>')
  562. f.close()
  563. ###
  564. # Files
  565. f = open(path + '/files.html', 'w')
  566. self.printHeader(f)
  567. f.write('<h1>Files</h1>')
  568. self.printNav(f)
  569. f.write('<dl>\n')
  570. f.write('<dt>Total files</dt><dd>%d</dd>' % data.getTotalFiles())
  571. f.write('<dt>Total lines</dt><dd>%d</dd>' % data.getTotalLOC())
  572. f.write('<dt>Average file size</dt><dd>%.2f bytes</dd>' % ((100.0 * data.getTotalLOC()) / data.getTotalFiles()))
  573. f.write('</dl>\n')
  574. # Files :: File count by date
  575. f.write(html_header(2, 'File count by date'))
  576. fg = open(path + '/files_by_date.dat', 'w')
  577. for stamp in sorted(data.files_by_stamp.keys()):
  578. fg.write('%s %d\n' % (datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d'), data.files_by_stamp[stamp]))
  579. fg.close()
  580. f.write('<img src="files_by_date.png" alt="Files by Date" />')
  581. #f.write('<h2>Average file size by date</h2>')
  582. # Files :: Extensions
  583. f.write(html_header(2, 'Extensions'))
  584. f.write('<table class="sortable" id="ext"><tr><th>Extension</th><th>Files (%)</th><th>Lines (%)</th><th>Lines/file</th></tr>')
  585. for ext in sorted(data.extensions.keys()):
  586. files = data.extensions[ext]['files']
  587. lines = data.extensions[ext]['lines']
  588. f.write('<tr><td>%s</td><td>%d (%.2f%%)</td><td>%d (%.2f%%)</td><td>%d</td></tr>' % (ext, files, (100.0 * files) / data.getTotalFiles(), lines, (100.0 * lines) / data.getTotalLOC(), lines / files))
  589. f.write('</table>')
  590. f.write('</body></html>')
  591. f.close()
  592. ###
  593. # Lines
  594. f = open(path + '/lines.html', 'w')
  595. self.printHeader(f)
  596. f.write('<h1>Lines</h1>')
  597. self.printNav(f)
  598. f.write('<dl>\n')
  599. f.write('<dt>Total lines</dt><dd>%d</dd>' % data.getTotalLOC())
  600. f.write('</dl>\n')
  601. f.write(html_header(2, 'Lines of Code'))
  602. f.write('<img src="lines_of_code.png" />')
  603. fg = open(path + '/lines_of_code.dat', 'w')
  604. for stamp in sorted(data.changes_by_date.keys()):
  605. fg.write('%d %d\n' % (stamp, data.changes_by_date[stamp]['lines']))
  606. fg.close()
  607. f.write('</body></html>')
  608. f.close()
  609. ###
  610. # tags.html
  611. f = open(path + '/tags.html', 'w')
  612. self.printHeader(f)
  613. f.write('<h1>Tags</h1>')
  614. self.printNav(f)
  615. f.write('<dl>')
  616. f.write('<dt>Total tags</dt><dd>%d</dd>' % len(data.tags))
  617. if len(data.tags) > 0:
  618. f.write('<dt>Average commits per tag</dt><dd>%.2f</dd>' % (data.getTotalCommits() / len(data.tags)))
  619. f.write('</dl>')
  620. f.write('<table>')
  621. f.write('<tr><th>Name</th><th>Date</th></tr>')
  622. # sort the tags by date desc
  623. tags_sorted_by_date_desc = map(lambda el : el[1], reversed(sorted(map(lambda el : (el[1]['date'], el[0]), data.tags.items()))))
  624. for tag in tags_sorted_by_date_desc:
  625. f.write('<tr><td>%s</td><td>%s</td></tr>' % (tag, data.tags[tag]['date']))
  626. f.write('</table>')
  627. f.write('</body></html>')
  628. f.close()
  629. self.createGraphs(path)
  630. def createGraphs(self, path):
  631. print 'Generating graphs...'
  632. # hour of day
  633. f = open(path + '/hour_of_day.plot', 'w')
  634. f.write(GNUPLOT_COMMON)
  635. f.write(
  636. """
  637. set output 'hour_of_day.png'
  638. unset key
  639. set xrange [0.5:24.5]
  640. set xtics 4
  641. set ylabel "Commits"
  642. plot 'hour_of_day.dat' using 1:2:(0.5) w boxes fs solid
  643. """)
  644. f.close()
  645. # day of week
  646. f = open(path + '/day_of_week.plot', 'w')
  647. f.write(GNUPLOT_COMMON)
  648. f.write(
  649. """
  650. set output 'day_of_week.png'
  651. unset key
  652. set xrange [0.5:7.5]
  653. set xtics 1
  654. set ylabel "Commits"
  655. plot 'day_of_week.dat' using 1:2:(0.5) w boxes fs solid
  656. """)
  657. f.close()
  658. # Month of Year
  659. f = open(path + '/month_of_year.plot', 'w')
  660. f.write(GNUPLOT_COMMON)
  661. f.write(
  662. """
  663. set output 'month_of_year.png'
  664. unset key
  665. set xrange [0.5:12.5]
  666. set xtics 1
  667. set ylabel "Commits"
  668. plot 'month_of_year.dat' using 1:2:(0.5) w boxes fs solid
  669. """)
  670. f.close()
  671. # commits_by_year_month
  672. f = open(path + '/commits_by_year_month.plot', 'w')
  673. f.write(GNUPLOT_COMMON)
  674. f.write(
  675. """
  676. set output 'commits_by_year_month.png'
  677. unset key
  678. set xdata time
  679. set timefmt "%Y-%m"
  680. set format x "%Y-%m"
  681. set xtics rotate by 90 15768000
  682. set bmargin 5
  683. set ylabel "Commits"
  684. plot 'commits_by_year_month.dat' using 1:2:(0.5) w boxes fs solid
  685. """)
  686. f.close()
  687. # commits_by_year
  688. f = open(path + '/commits_by_year.plot', 'w')
  689. f.write(GNUPLOT_COMMON)
  690. f.write(
  691. """
  692. set output 'commits_by_year.png'
  693. unset key
  694. set xtics 1
  695. set ylabel "Commits"
  696. set yrange [0:]
  697. plot 'commits_by_year.dat' using 1:2:(0.5) w boxes fs solid
  698. """)
  699. f.close()
  700. # Files by date
  701. f = open(path + '/files_by_date.plot', 'w')
  702. f.write(GNUPLOT_COMMON)
  703. f.write(
  704. """
  705. set output 'files_by_date.png'
  706. unset key
  707. set xdata time
  708. set timefmt "%Y-%m-%d"
  709. set format x "%Y-%m-%d"
  710. set ylabel "Files"
  711. set xtics rotate by 90
  712. set bmargin 6
  713. plot 'files_by_date.dat' using 1:2 w histeps
  714. """)
  715. f.close()
  716. # Lines of Code
  717. f = open(path + '/lines_of_code.plot', 'w')
  718. f.write(GNUPLOT_COMMON)
  719. f.write(
  720. """
  721. set output 'lines_of_code.png'
  722. unset key
  723. set xdata time
  724. set timefmt "%s"
  725. set format x "%Y-%m-%d"
  726. set ylabel "Lines"
  727. set xtics rotate by 90
  728. set bmargin 6
  729. plot 'lines_of_code.dat' using 1:2 w lines
  730. """)
  731. f.close()
  732. os.chdir(path)
  733. files = glob.glob(path + '/*.plot')
  734. for f in files:
  735. out = getpipeoutput([gnuplot_cmd + ' "%s"' % f])
  736. if len(out) > 0:
  737. print out
  738. def printHeader(self, f, title = ''):
  739. f.write(
  740. """<?xml version="1.0" encoding="UTF-8"?>
  741. <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
  742. <html xmlns="http://www.w3.org/1999/xhtml">
  743. <head>
  744. <title>GitStats - %s</title>
  745. <link rel="stylesheet" href="gitstats.css" type="text/css" />
  746. <meta name="generator" content="GitStats" />
  747. <script type="text/javascript" src="sortable.js"></script>
  748. </head>
  749. <body>
  750. """ % self.title)
  751. def printNav(self, f):
  752. f.write("""
  753. <div class="nav">
  754. <ul>
  755. <li><a href="index.html">General</a></li>
  756. <li><a href="activity.html">Activity</a></li>
  757. <li><a href="authors.html">Authors</a></li>
  758. <li><a href="files.html">Files</a></li>
  759. <li><a href="lines.html">Lines</a></li>
  760. <li><a href="tags.html">Tags</a></li>
  761. </ul>
  762. </div>
  763. """)
  764. usage = """
  765. Usage: gitstats [options] <gitpath> <outputpath>
  766. Options:
  767. """
  768. if len(sys.argv) < 3:
  769. print usage
  770. sys.exit(0)
  771. gitpath = sys.argv[1]
  772. outputpath = os.path.abspath(sys.argv[2])
  773. rundir = os.getcwd()
  774. try:
  775. os.makedirs(outputpath)
  776. except OSError:
  777. pass
  778. if not os.path.isdir(outputpath):
  779. print 'FATAL: Output path is not a directory or does not exist'
  780. sys.exit(1)
  781. print 'Git path: %s' % gitpath
  782. print 'Output path: %s' % outputpath
  783. os.chdir(gitpath)
  784. cachefile = os.path.join(outputpath, 'gitstats.cache')
  785. print 'Collecting data...'
  786. data = GitDataCollector()
  787. data.loadCache(cachefile)
  788. data.collect(gitpath)
  789. print 'Refining data...'
  790. data.saveCache(cachefile)
  791. data.refine()
  792. os.chdir(rundir)
  793. print 'Generating report...'
  794. report = HTMLReportCreator()
  795. report.create(data, outputpath)
  796. time_end = time.time()
  797. exectime_internal = time_end - time_start
  798. print 'Execution time %.5f secs, %.5f secs (%.2f %%) in external commands)' % (exectime_internal, exectime_external, (100.0 * exectime_external) / exectime_internal)