123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976
  1. #!/usr/bin/env python
  2. # Copyright (c) 2007-2009 Heikki Hokkanen <hoxu@users.sf.net> & others (see doc/author.txt)
  3. # GPLv2 / GPLv3
  4. import datetime
  5. import glob
  6. import os
  7. import pickle
  8. import re
  9. import shutil
  10. import subprocess
  11. import sys
  12. import time
  13. import zlib
  14. GNUPLOT_COMMON = 'set terminal png transparent\nset size 0.5,0.5\n'
  15. MAX_EXT_LENGTH = 10 # maximum file extension length
  16. exectime_internal = 0.0
  17. exectime_external = 0.0
  18. time_start = time.time()
  19. # By default, gnuplot is searched from path, but can be overridden with the
  20. # environment variable "GNUPLOT"
  21. gnuplot_cmd = 'gnuplot'
  22. if 'GNUPLOT' in os.environ:
  23. gnuplot_cmd = os.environ['GNUPLOT']
  24. def getpipeoutput(cmds, quiet = False):
  25. global exectime_external
  26. start = time.time()
  27. if not quiet:
  28. print '>> ' + ' | '.join(cmds),
  29. sys.stdout.flush()
  30. p0 = subprocess.Popen(cmds[0], stdout = subprocess.PIPE, shell = True)
  31. p = p0
  32. for x in cmds[1:]:
  33. p = subprocess.Popen(x, stdin = p0.stdout, stdout = subprocess.PIPE, shell = True)
  34. p0 = p
  35. output = p.communicate()[0]
  36. end = time.time()
  37. if not quiet:
  38. print '\r[%.5f] >> %s' % (end - start, ' | '.join(cmds))
  39. exectime_external += (end - start)
  40. return output.rstrip('\n')
  41. def getkeyssortedbyvalues(dict):
  42. return map(lambda el : el[1], sorted(map(lambda el : (el[1], el[0]), dict.items())))
  43. # dict['author'] = { 'commits': 512 } - ...key(dict, 'commits')
  44. def getkeyssortedbyvaluekey(d, key):
  45. return map(lambda el : el[1], sorted(map(lambda el : (d[el][key], el), d.keys())))
  46. VERSION = 0
  47. def getversion():
  48. global VERSION
  49. if VERSION == 0:
  50. VERSION = getpipeoutput(["git rev-parse --short HEAD"]).split('\n')[0]
  51. return VERSION
  52. class DataCollector:
  53. """Manages data collection from a revision control repository."""
  54. def __init__(self):
  55. self.stamp_created = time.time()
  56. self.cache = {}
  57. ##
  58. # This should be the main function to extract data from the repository.
  59. def collect(self, dir):
  60. self.dir = dir
  61. self.projectname = os.path.basename(os.path.abspath(dir))
  62. ##
  63. # Load cacheable data
  64. def loadCache(self, cachefile):
  65. if not os.path.exists(cachefile):
  66. return
  67. print 'Loading cache...'
  68. f = open(cachefile)
  69. try:
  70. self.cache = pickle.loads(zlib.decompress(f.read()))
  71. except:
  72. # temporary hack to upgrade non-compressed caches
  73. f.seek(0)
  74. self.cache = pickle.load(f)
  75. f.close()
  76. ##
  77. # Produce any additional statistics from the extracted data.
  78. def refine(self):
  79. pass
  80. ##
  81. # : get a dictionary of author
  82. def getAuthorInfo(self, author):
  83. return None
  84. def getActivityByDayOfWeek(self):
  85. return {}
  86. def getActivityByHourOfDay(self):
  87. return {}
  88. ##
  89. # Get a list of authors
  90. def getAuthors(self):
  91. return []
  92. def getFirstCommitDate(self):
  93. return datetime.datetime.now()
  94. def getLastCommitDate(self):
  95. return datetime.datetime.now()
  96. def getStampCreated(self):
  97. return self.stamp_created
  98. def getTags(self):
  99. return []
  100. def getTotalAuthors(self):
  101. return -1
  102. def getTotalCommits(self):
  103. return -1
  104. def getTotalFiles(self):
  105. return -1
  106. def getTotalLOC(self):
  107. return -1
  108. ##
  109. # Save cacheable data
  110. def saveCache(self, filename):
  111. print 'Saving cache...'
  112. f = open(cachefile, 'w')
  113. #pickle.dump(self.cache, f)
  114. data = zlib.compress(pickle.dumps(self.cache))
  115. f.write(data)
  116. f.close()
  117. class GitDataCollector(DataCollector):
  118. def collect(self, dir):
  119. DataCollector.collect(self, dir)
  120. try:
  121. self.total_authors = int(getpipeoutput(['git log', 'git shortlog -s', 'wc -l']))
  122. except:
  123. self.total_authors = 0
  124. #self.total_lines = int(getoutput('git-ls-files -z |xargs -0 cat |wc -l'))
  125. self.activity_by_hour_of_day = {} # hour -> commits
  126. self.activity_by_day_of_week = {} # day -> commits
  127. self.activity_by_month_of_year = {} # month [1-12] -> commits
  128. self.activity_by_hour_of_week = {} # weekday -> hour -> commits
  129. self.activity_by_hour_of_day_busiest = 0
  130. self.activity_by_hour_of_week_busiest = 0
  131. self.authors = {} # name -> {commits, first_commit_stamp, last_commit_stamp}
  132. # author of the month
  133. self.author_of_month = {} # month -> author -> commits
  134. self.author_of_year = {} # year -> author -> commits
  135. self.commits_by_month = {} # month -> commits
  136. self.commits_by_year = {} # year -> commits
  137. self.first_commit_stamp = 0
  138. self.last_commit_stamp = 0
  139. # tags
  140. self.tags = {}
  141. lines = getpipeoutput(['git show-ref --tags']).split('\n')
  142. for line in lines:
  143. if len(line) == 0:
  144. continue
  145. (hash, tag) = line.split(' ')
  146. tag = tag.replace('refs/tags/', '')
  147. output = getpipeoutput(['git log "%s" --pretty=format:"%%at %%an" -n 1' % hash])
  148. if len(output) > 0:
  149. parts = output.split(' ')
  150. stamp = 0
  151. try:
  152. stamp = int(parts[0])
  153. except ValueError:
  154. stamp = 0
  155. self.tags[tag] = { 'stamp': stamp, 'hash' : hash, 'date' : datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d'), 'commits': 0, 'authors': {} }
  156. # collect info on tags, starting from latest
  157. tags_sorted_by_date_desc = map(lambda el : el[1], reversed(sorted(map(lambda el : (el[1]['date'], el[0]), data.tags.items()))))
  158. prev = None
  159. for tag in reversed(tags_sorted_by_date_desc):
  160. cmd = 'git shortlog -s "%s"' % tag
  161. if prev != None:
  162. cmd += ' "^%s"' % prev
  163. output = getpipeoutput([cmd])
  164. if len(output) == 0:
  165. continue
  166. prev = tag
  167. for line in output.split('\n'):
  168. parts = re.split('\s+', line, 2)
  169. commits = int(parts[1])
  170. author = parts[2]
  171. self.tags[tag]['commits'] += commits
  172. self.tags[tag]['authors'][author] = commits
  173. # Collect revision statistics
  174. # Outputs "<stamp> <author>"
  175. lines = getpipeoutput(['git rev-list --pretty=format:"%at %an" HEAD', 'grep -v ^commit']).split('\n')
  176. for line in lines:
  177. # linux-2.6 says "<unknown>" for one line O_o
  178. parts = line.split(' ')
  179. author = ''
  180. try:
  181. stamp = int(parts[0])
  182. except ValueError:
  183. stamp = 0
  184. if len(parts) > 1:
  185. author = ' '.join(parts[1:])
  186. date = datetime.datetime.fromtimestamp(float(stamp))
  187. # First and last commit stamp
  188. if self.last_commit_stamp == 0:
  189. self.last_commit_stamp = stamp
  190. self.first_commit_stamp = stamp
  191. # activity
  192. # hour
  193. hour = date.hour
  194. if hour in self.activity_by_hour_of_day:
  195. self.activity_by_hour_of_day[hour] += 1
  196. else:
  197. self.activity_by_hour_of_day[hour] = 1
  198. # most active hour?
  199. if self.activity_by_hour_of_day[hour] > self.activity_by_hour_of_day_busiest:
  200. self.activity_by_hour_of_day_busiest = self.activity_by_hour_of_day[hour]
  201. # day of week
  202. day = date.weekday()
  203. if day in self.activity_by_day_of_week:
  204. self.activity_by_day_of_week[day] += 1
  205. else:
  206. self.activity_by_day_of_week[day] = 1
  207. # hour of week
  208. if day not in self.activity_by_hour_of_week:
  209. self.activity_by_hour_of_week[day] = {}
  210. if hour not in self.activity_by_hour_of_week[day]:
  211. self.activity_by_hour_of_week[day][hour] = 1
  212. else:
  213. self.activity_by_hour_of_week[day][hour] += 1
  214. # most active hour?
  215. if self.activity_by_hour_of_week[day][hour] > self.activity_by_hour_of_week_busiest:
  216. self.activity_by_hour_of_week_busiest = self.activity_by_hour_of_week[day][hour]
  217. # month of year
  218. month = date.month
  219. if month in self.activity_by_month_of_year:
  220. self.activity_by_month_of_year[month] += 1
  221. else:
  222. self.activity_by_month_of_year[month] = 1
  223. # author stats
  224. if author not in self.authors:
  225. self.authors[author] = {}
  226. # commits
  227. if 'last_commit_stamp' not in self.authors[author]:
  228. self.authors[author]['last_commit_stamp'] = stamp
  229. self.authors[author]['first_commit_stamp'] = stamp
  230. if 'commits' in self.authors[author]:
  231. self.authors[author]['commits'] += 1
  232. else:
  233. self.authors[author]['commits'] = 1
  234. # author of the month/year
  235. yymm = datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m')
  236. if yymm in self.author_of_month:
  237. if author in self.author_of_month[yymm]:
  238. self.author_of_month[yymm][author] += 1
  239. else:
  240. self.author_of_month[yymm][author] = 1
  241. else:
  242. self.author_of_month[yymm] = {}
  243. self.author_of_month[yymm][author] = 1
  244. if yymm in self.commits_by_month:
  245. self.commits_by_month[yymm] += 1
  246. else:
  247. self.commits_by_month[yymm] = 1
  248. yy = datetime.datetime.fromtimestamp(stamp).year
  249. if yy in self.author_of_year:
  250. if author in self.author_of_year[yy]:
  251. self.author_of_year[yy][author] += 1
  252. else:
  253. self.author_of_year[yy][author] = 1
  254. else:
  255. self.author_of_year[yy] = {}
  256. self.author_of_year[yy][author] = 1
  257. if yy in self.commits_by_year:
  258. self.commits_by_year[yy] += 1
  259. else:
  260. self.commits_by_year[yy] = 1
  261. # TODO Optimize this, it's the worst bottleneck
  262. # outputs "<stamp> <files>" for each revision
  263. self.files_by_stamp = {} # stamp -> files
  264. revlines = getpipeoutput(['git rev-list --pretty=format:"%at %T" HEAD', 'grep -v ^commit']).strip().split('\n')
  265. lines = []
  266. for revline in revlines:
  267. time, rev = revline.split(' ')
  268. linecount = self.getFilesInCommit(rev)
  269. lines.append('%d %d' % (int(time), linecount))
  270. self.total_commits = len(lines)
  271. for line in lines:
  272. parts = line.split(' ')
  273. if len(parts) != 2:
  274. continue
  275. (stamp, files) = parts[0:2]
  276. try:
  277. self.files_by_stamp[int(stamp)] = int(files)
  278. except ValueError:
  279. print 'Warning: failed to parse line "%s"' % line
  280. # extensions
  281. self.extensions = {} # extension -> files, lines
  282. lines = getpipeoutput(['git ls-tree -r -z HEAD']).split('\000')
  283. self.total_files = len(lines)
  284. for line in lines:
  285. if len(line) == 0:
  286. continue
  287. parts = re.split('\s+', line, 4)
  288. sha1 = parts[2]
  289. filename = parts[3]
  290. if filename.find('.') == -1 or filename.rfind('.') == 0:
  291. ext = ''
  292. else:
  293. ext = filename[(filename.rfind('.') + 1):]
  294. if len(ext) > MAX_EXT_LENGTH:
  295. ext = ''
  296. if ext not in self.extensions:
  297. self.extensions[ext] = {'files': 0, 'lines': 0}
  298. self.extensions[ext]['files'] += 1
  299. try:
  300. self.extensions[ext]['lines'] += int(getpipeoutput(['git cat-file blob %s' % sha1, 'wc -l']).split()[0])
  301. except:
  302. print 'Warning: Could not count lines for file "%s"' % line
  303. # line statistics
  304. # outputs:
  305. # N files changed, N insertions (+), N deletions(-)
  306. # <stamp> <author>
  307. self.changes_by_date = {} # stamp -> { files, ins, del }
  308. lines = getpipeoutput(['git log --shortstat --pretty=format:"%at %an"']).split('\n')
  309. lines.reverse()
  310. files = 0; inserted = 0; deleted = 0; total_lines = 0
  311. for line in lines:
  312. if len(line) == 0:
  313. continue
  314. # <stamp> <author>
  315. if line.find('files changed,') == -1:
  316. pos = line.find(' ')
  317. if pos != -1:
  318. try:
  319. (stamp, author) = (int(line[:pos]), line[pos+1:])
  320. self.changes_by_date[stamp] = { 'files': files, 'ins': inserted, 'del': deleted, 'lines': total_lines }
  321. except ValueError:
  322. print 'Warning: unexpected line "%s"' % line
  323. else:
  324. print 'Warning: unexpected line "%s"' % line
  325. else:
  326. numbers = re.findall('\d+', line)
  327. if len(numbers) == 3:
  328. (files, inserted, deleted) = map(lambda el : int(el), numbers)
  329. total_lines += inserted
  330. total_lines -= deleted
  331. else:
  332. print 'Warning: failed to handle line "%s"' % line
  333. (files, inserted, deleted) = (0, 0, 0)
  334. #self.changes_by_date[stamp] = { 'files': files, 'ins': inserted, 'del': deleted }
  335. self.total_lines = total_lines
  336. def refine(self):
  337. # authors
  338. # name -> {place_by_commits, commits_frac, date_first, date_last, timedelta}
  339. authors_by_commits = getkeyssortedbyvaluekey(self.authors, 'commits')
  340. authors_by_commits.reverse() # most first
  341. for i, name in enumerate(authors_by_commits):
  342. self.authors[name]['place_by_commits'] = i + 1
  343. for name in self.authors.keys():
  344. a = self.authors[name]
  345. a['commits_frac'] = (100 * float(a['commits'])) / self.getTotalCommits()
  346. date_first = datetime.datetime.fromtimestamp(a['first_commit_stamp'])
  347. date_last = datetime.datetime.fromtimestamp(a['last_commit_stamp'])
  348. delta = date_last - date_first
  349. a['date_first'] = date_first.strftime('%Y-%m-%d')
  350. a['date_last'] = date_last.strftime('%Y-%m-%d')
  351. a['timedelta'] = delta
  352. def getActivityByDayOfWeek(self):
  353. return self.activity_by_day_of_week
  354. def getActivityByHourOfDay(self):
  355. return self.activity_by_hour_of_day
  356. def getAuthorInfo(self, author):
  357. return self.authors[author]
  358. def getAuthors(self):
  359. return self.authors.keys()
  360. def getCommitDeltaDays(self):
  361. return (self.last_commit_stamp - self.first_commit_stamp) / 86400
  362. def getFilesInCommit(self, rev):
  363. try:
  364. res = self.cache['files_in_tree'][rev]
  365. except:
  366. res = int(getpipeoutput(['git ls-tree -r --name-only "%s"' % rev, 'wc -l']).split('\n')[0])
  367. if 'files_in_tree' not in self.cache:
  368. self.cache['files_in_tree'] = {}
  369. self.cache['files_in_tree'][rev] = res
  370. return res
  371. def getFirstCommitDate(self):
  372. return datetime.datetime.fromtimestamp(self.first_commit_stamp)
  373. def getLastCommitDate(self):
  374. return datetime.datetime.fromtimestamp(self.last_commit_stamp)
  375. def getTags(self):
  376. lines = getpipeoutput(['git show-ref --tags', 'cut -d/ -f3'])
  377. return lines.split('\n')
  378. def getTagDate(self, tag):
  379. return self.revToDate('tags/' + tag)
  380. def getTotalAuthors(self):
  381. return self.total_authors
  382. def getTotalCommits(self):
  383. return self.total_commits
  384. def getTotalFiles(self):
  385. return self.total_files
  386. def getTotalLOC(self):
  387. return self.total_lines
  388. def revToDate(self, rev):
  389. stamp = int(getpipeoutput(['git log --pretty=format:%%at "%s" -n 1' % rev]))
  390. return datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d')
  391. class ReportCreator:
  392. """Creates the actual report based on given data."""
  393. def __init__(self):
  394. pass
  395. def create(self, data, path):
  396. self.data = data
  397. self.path = path
  398. def html_linkify(text):
  399. return text.lower().replace(' ', '_')
  400. def html_header(level, text):
  401. name = html_linkify(text)
  402. return '\n<h%d><a href="#%s" name="%s">%s</a></h%d>\n\n' % (level, name, name, text, level)
  403. class HTMLReportCreator(ReportCreator):
  404. def create(self, data, path):
  405. ReportCreator.create(self, data, path)
  406. self.title = data.projectname
  407. # copy static files if they do not exist
  408. basedirs = [os.path.dirname(os.path.abspath(__file__)), '/usr/share/gitstats']
  409. for file in ('gitstats.css', 'sortable.js', 'arrow-up.gif', 'arrow-down.gif', 'arrow-none.gif'):
  410. for base in basedirs:
  411. src = base + '/' + file
  412. if os.path.exists(src):
  413. shutil.copyfile(src, path + '/' + file)
  414. break
  415. else:
  416. print 'Warning: "%s" not found, so not copied (searched: %s)' % (file, basedirs)
  417. f = open(path + "/index.html", 'w')
  418. format = '%Y-%m-%d %H:%M:%S'
  419. self.printHeader(f)
  420. f.write('<h1>GitStats - %s</h1>' % data.projectname)
  421. self.printNav(f)
  422. f.write('<dl>')
  423. f.write('<dt>Project name</dt><dd>%s</dd>' % (data.projectname))
  424. f.write('<dt>Generated</dt><dd>%s (in %d seconds)</dd>' % (datetime.datetime.now().strftime(format), time.time() - data.getStampCreated()))
  425. f.write('<dt>Generator</dt><dd><a href="http://gitstats.sourceforge.net/">GitStats</a> (version %s)</dd>' % getversion())
  426. f.write('<dt>Report Period</dt><dd>%s to %s (%d days)</dd>' % (data.getFirstCommitDate().strftime(format), data.getLastCommitDate().strftime(format), data.getCommitDeltaDays()))
  427. f.write('<dt>Total Files</dt><dd>%s</dd>' % data.getTotalFiles())
  428. f.write('<dt>Total Lines of Code</dt><dd>%s</dd>' % data.getTotalLOC())
  429. f.write('<dt>Total Commits</dt><dd>%s</dd>' % data.getTotalCommits())
  430. f.write('<dt>Authors</dt><dd>%s</dd>' % data.getTotalAuthors())
  431. f.write('</dl>')
  432. f.write('</body>\n</html>')
  433. f.close()
  434. ###
  435. # Activity
  436. f = open(path + '/activity.html', 'w')
  437. self.printHeader(f)
  438. f.write('<h1>Activity</h1>')
  439. self.printNav(f)
  440. #f.write('<h2>Last 30 days</h2>')
  441. #f.write('<h2>Last 12 months</h2>')
  442. # Hour of Day
  443. f.write(html_header(2, 'Hour of Day'))
  444. hour_of_day = data.getActivityByHourOfDay()
  445. f.write('<table><tr><th>Hour</th>')
  446. for i in range(0, 24):
  447. f.write('<th>%d</th>' % i)
  448. f.write('</tr>\n<tr><th>Commits</th>')
  449. fp = open(path + '/hour_of_day.dat', 'w')
  450. for i in range(0, 24):
  451. if i in hour_of_day:
  452. r = 127 + int((float(hour_of_day[i]) / data.activity_by_hour_of_day_busiest) * 128)
  453. f.write('<td style="background-color: rgb(%d, 0, 0)">%d</td>' % (r, hour_of_day[i]))
  454. fp.write('%d %d\n' % (i, hour_of_day[i]))
  455. else:
  456. f.write('<td>0</td>')
  457. fp.write('%d 0\n' % i)
  458. fp.close()
  459. f.write('</tr>\n<tr><th>%</th>')
  460. totalcommits = data.getTotalCommits()
  461. for i in range(0, 24):
  462. if i in hour_of_day:
  463. r = 127 + int((float(hour_of_day[i]) / data.activity_by_hour_of_day_busiest) * 128)
  464. f.write('<td style="background-color: rgb(%d, 0, 0)">%.2f</td>' % (r, (100.0 * hour_of_day[i]) / totalcommits))
  465. else:
  466. f.write('<td>0.00</td>')
  467. f.write('</tr></table>')
  468. f.write('<img src="hour_of_day.png" alt="Hour of Day" />')
  469. fg = open(path + '/hour_of_day.dat', 'w')
  470. for i in range(0, 24):
  471. if i in hour_of_day:
  472. fg.write('%d %d\n' % (i + 1, hour_of_day[i]))
  473. else:
  474. fg.write('%d 0\n' % (i + 1))
  475. fg.close()
  476. # Day of Week
  477. f.write(html_header(2, 'Day of Week'))
  478. day_of_week = data.getActivityByDayOfWeek()
  479. f.write('<div class="vtable"><table>')
  480. f.write('<tr><th>Day</th><th>Total (%)</th></tr>')
  481. fp = open(path + '/day_of_week.dat', 'w')
  482. for d in range(0, 7):
  483. commits = 0
  484. if d in day_of_week:
  485. commits = day_of_week[d]
  486. fp.write('%d %d\n' % (d + 1, commits))
  487. f.write('<tr>')
  488. f.write('<th>%d</th>' % (d + 1))
  489. if d in day_of_week:
  490. f.write('<td>%d (%.2f%%)</td>' % (day_of_week[d], (100.0 * day_of_week[d]) / totalcommits))
  491. else:
  492. f.write('<td>0</td>')
  493. f.write('</tr>')
  494. f.write('</table></div>')
  495. f.write('<img src="day_of_week.png" alt="Day of Week" />')
  496. fp.close()
  497. # Hour of Week
  498. f.write(html_header(2, 'Hour of Week'))
  499. f.write('<table>')
  500. f.write('<tr><th>Weekday</th>')
  501. for hour in range(0, 24):
  502. f.write('<th>%d</th>' % (hour))
  503. f.write('</tr>')
  504. for weekday in range(0, 7):
  505. f.write('<tr><th>%d</th>' % (weekday + 1))
  506. for hour in range(0, 24):
  507. try:
  508. commits = data.activity_by_hour_of_week[weekday][hour]
  509. except KeyError:
  510. commits = 0
  511. if commits != 0:
  512. f.write('<td')
  513. r = 127 + int((float(commits) / data.activity_by_hour_of_week_busiest) * 128)
  514. f.write(' style="background-color: rgb(%d, 0, 0)"' % r)
  515. f.write('>%d</td>' % commits)
  516. else:
  517. f.write('<td></td>')
  518. f.write('</tr>')
  519. f.write('</table>')
  520. # Month of Year
  521. f.write(html_header(2, 'Month of Year'))
  522. f.write('<div class="vtable"><table>')
  523. f.write('<tr><th>Month</th><th>Commits (%)</th></tr>')
  524. fp = open (path + '/month_of_year.dat', 'w')
  525. for mm in range(1, 13):
  526. commits = 0
  527. if mm in data.activity_by_month_of_year:
  528. commits = data.activity_by_month_of_year[mm]
  529. f.write('<tr><td>%d</td><td>%d (%.2f %%)</td></tr>' % (mm, commits, (100.0 * commits) / data.getTotalCommits()))
  530. fp.write('%d %d\n' % (mm, commits))
  531. fp.close()
  532. f.write('</table></div>')
  533. f.write('<img src="month_of_year.png" alt="Month of Year" />')
  534. # Commits by year/month
  535. f.write(html_header(2, 'Commits by year/month'))
  536. f.write('<div class="vtable"><table><tr><th>Month</th><th>Commits</th></tr>')
  537. for yymm in reversed(sorted(data.commits_by_month.keys())):
  538. f.write('<tr><td>%s</td><td>%d</td></tr>' % (yymm, data.commits_by_month[yymm]))
  539. f.write('</table></div>')
  540. f.write('<img src="commits_by_year_month.png" alt="Commits by year/month" />')
  541. fg = open(path + '/commits_by_year_month.dat', 'w')
  542. for yymm in sorted(data.commits_by_month.keys()):
  543. fg.write('%s %s\n' % (yymm, data.commits_by_month[yymm]))
  544. fg.close()
  545. # Commits by year
  546. f.write(html_header(2, 'Commits by Year'))
  547. f.write('<div class="vtable"><table><tr><th>Year</th><th>Commits (% of all)</th></tr>')
  548. for yy in reversed(sorted(data.commits_by_year.keys())):
  549. f.write('<tr><td>%s</td><td>%d (%.2f%%)</td></tr>' % (yy, data.commits_by_year[yy], (100.0 * data.commits_by_year[yy]) / data.getTotalCommits()))
  550. f.write('</table></div>')
  551. f.write('<img src="commits_by_year.png" alt="Commits by Year" />')
  552. fg = open(path + '/commits_by_year.dat', 'w')
  553. for yy in sorted(data.commits_by_year.keys()):
  554. fg.write('%d %d\n' % (yy, data.commits_by_year[yy]))
  555. fg.close()
  556. f.write('</body></html>')
  557. f.close()
  558. ###
  559. # Authors
  560. f = open(path + '/authors.html', 'w')
  561. self.printHeader(f)
  562. f.write('<h1>Authors</h1>')
  563. self.printNav(f)
  564. # Authors :: List of authors
  565. f.write(html_header(2, 'List of Authors'))
  566. f.write('<table class="authors sortable" id="authors">')
  567. f.write('<tr><th>Author</th><th>Commits (%)</th><th>First commit</th><th>Last commit</th><th class="unsortable">Age</th><th># by commits</th></tr>')
  568. for author in sorted(data.getAuthors()):
  569. info = data.getAuthorInfo(author)
  570. f.write('<tr><td>%s</td><td>%d (%.2f%%)</td><td>%s</td><td>%s</td><td>%s</td><td>%d</td></tr>' % (author, info['commits'], info['commits_frac'], info['date_first'], info['date_last'], info['timedelta'], info['place_by_commits']))
  571. f.write('</table>')
  572. # Authors :: Author of Month
  573. f.write(html_header(2, 'Author of Month'))
  574. f.write('<table class="sortable" id="aom">')
  575. f.write('<tr><th>Month</th><th>Author</th><th>Commits (%)</th><th class="unsortable">Next top 5</th></tr>')
  576. for yymm in reversed(sorted(data.author_of_month.keys())):
  577. authordict = data.author_of_month[yymm]
  578. authors = getkeyssortedbyvalues(authordict)
  579. authors.reverse()
  580. commits = data.author_of_month[yymm][authors[0]]
  581. next = ', '.join(authors[1:5])
  582. f.write('<tr><td>%s</td><td>%s</td><td>%d (%.2f%% of %d)</td><td>%s</td></tr>' % (yymm, authors[0], commits, (100 * commits) / data.commits_by_month[yymm], data.commits_by_month[yymm], next))
  583. f.write('</table>')
  584. f.write(html_header(2, 'Author of Year'))
  585. f.write('<table class="sortable" id="aoy"><tr><th>Year</th><th>Author</th><th>Commits (%)</th><th class="unsortable">Next top 5</th></tr>')
  586. for yy in reversed(sorted(data.author_of_year.keys())):
  587. authordict = data.author_of_year[yy]
  588. authors = getkeyssortedbyvalues(authordict)
  589. authors.reverse()
  590. commits = data.author_of_year[yy][authors[0]]
  591. next = ', '.join(authors[1:5])
  592. f.write('<tr><td>%s</td><td>%s</td><td>%d (%.2f%% of %d)</td><td>%s</td></tr>' % (yy, authors[0], commits, (100 * commits) / data.commits_by_year[yy], data.commits_by_year[yy], next))
  593. f.write('</table>')
  594. f.write('</body></html>')
  595. f.close()
  596. ###
  597. # Files
  598. f = open(path + '/files.html', 'w')
  599. self.printHeader(f)
  600. f.write('<h1>Files</h1>')
  601. self.printNav(f)
  602. f.write('<dl>\n')
  603. f.write('<dt>Total files</dt><dd>%d</dd>' % data.getTotalFiles())
  604. f.write('<dt>Total lines</dt><dd>%d</dd>' % data.getTotalLOC())
  605. f.write('<dt>Average file size</dt><dd>%.2f bytes</dd>' % ((100.0 * data.getTotalLOC()) / data.getTotalFiles()))
  606. f.write('</dl>\n')
  607. # Files :: File count by date
  608. f.write(html_header(2, 'File count by date'))
  609. fg = open(path + '/files_by_date.dat', 'w')
  610. for stamp in sorted(data.files_by_stamp.keys()):
  611. fg.write('%s %d\n' % (datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d'), data.files_by_stamp[stamp]))
  612. fg.close()
  613. f.write('<img src="files_by_date.png" alt="Files by Date" />')
  614. #f.write('<h2>Average file size by date</h2>')
  615. # Files :: Extensions
  616. f.write(html_header(2, 'Extensions'))
  617. f.write('<table class="sortable" id="ext"><tr><th>Extension</th><th>Files (%)</th><th>Lines (%)</th><th>Lines/file</th></tr>')
  618. for ext in sorted(data.extensions.keys()):
  619. files = data.extensions[ext]['files']
  620. lines = data.extensions[ext]['lines']
  621. f.write('<tr><td>%s</td><td>%d (%.2f%%)</td><td>%d (%.2f%%)</td><td>%d</td></tr>' % (ext, files, (100.0 * files) / data.getTotalFiles(), lines, (100.0 * lines) / data.getTotalLOC(), lines / files))
  622. f.write('</table>')
  623. f.write('</body></html>')
  624. f.close()
  625. ###
  626. # Lines
  627. f = open(path + '/lines.html', 'w')
  628. self.printHeader(f)
  629. f.write('<h1>Lines</h1>')
  630. self.printNav(f)
  631. f.write('<dl>\n')
  632. f.write('<dt>Total lines</dt><dd>%d</dd>' % data.getTotalLOC())
  633. f.write('</dl>\n')
  634. f.write(html_header(2, 'Lines of Code'))
  635. f.write('<img src="lines_of_code.png" />')
  636. fg = open(path + '/lines_of_code.dat', 'w')
  637. for stamp in sorted(data.changes_by_date.keys()):
  638. fg.write('%d %d\n' % (stamp, data.changes_by_date[stamp]['lines']))
  639. fg.close()
  640. f.write('</body></html>')
  641. f.close()
  642. ###
  643. # tags.html
  644. f = open(path + '/tags.html', 'w')
  645. self.printHeader(f)
  646. f.write('<h1>Tags</h1>')
  647. self.printNav(f)
  648. f.write('<dl>')
  649. f.write('<dt>Total tags</dt><dd>%d</dd>' % len(data.tags))
  650. if len(data.tags) > 0:
  651. f.write('<dt>Average commits per tag</dt><dd>%.2f</dd>' % (data.getTotalCommits() / len(data.tags)))
  652. f.write('</dl>')
  653. f.write('<table>')
  654. f.write('<tr><th>Name</th><th>Date</th><th>Commits</th><th>Authors</th></tr>')
  655. # sort the tags by date desc
  656. tags_sorted_by_date_desc = map(lambda el : el[1], reversed(sorted(map(lambda el : (el[1]['date'], el[0]), data.tags.items()))))
  657. for tag in tags_sorted_by_date_desc:
  658. authorinfo = []
  659. authors_by_commits = getkeyssortedbyvalues(data.tags[tag]['authors'])
  660. for i in reversed(authors_by_commits):
  661. authorinfo.append('%s (%d)' % (i, data.tags[tag]['authors'][i]))
  662. f.write('<tr><td>%s</td><td>%s</td><td>%d</td><td>%s</td></tr>' % (tag, data.tags[tag]['date'], data.tags[tag]['commits'], ', '.join(authorinfo)))
  663. f.write('</table>')
  664. f.write('</body></html>')
  665. f.close()
  666. self.createGraphs(path)
  667. def createGraphs(self, path):
  668. print 'Generating graphs...'
  669. # hour of day
  670. f = open(path + '/hour_of_day.plot', 'w')
  671. f.write(GNUPLOT_COMMON)
  672. f.write(
  673. """
  674. set output 'hour_of_day.png'
  675. unset key
  676. set xrange [0.5:24.5]
  677. set xtics 4
  678. set ylabel "Commits"
  679. plot 'hour_of_day.dat' using 1:2:(0.5) w boxes fs solid
  680. """)
  681. f.close()
  682. # day of week
  683. f = open(path + '/day_of_week.plot', 'w')
  684. f.write(GNUPLOT_COMMON)
  685. f.write(
  686. """
  687. set output 'day_of_week.png'
  688. unset key
  689. set xrange [0.5:7.5]
  690. set xtics 1
  691. set ylabel "Commits"
  692. plot 'day_of_week.dat' using 1:2:(0.5) w boxes fs solid
  693. """)
  694. f.close()
  695. # Month of Year
  696. f = open(path + '/month_of_year.plot', 'w')
  697. f.write(GNUPLOT_COMMON)
  698. f.write(
  699. """
  700. set output 'month_of_year.png'
  701. unset key
  702. set xrange [0.5:12.5]
  703. set xtics 1
  704. set ylabel "Commits"
  705. plot 'month_of_year.dat' using 1:2:(0.5) w boxes fs solid
  706. """)
  707. f.close()
  708. # commits_by_year_month
  709. f = open(path + '/commits_by_year_month.plot', 'w')
  710. f.write(GNUPLOT_COMMON)
  711. f.write(
  712. """
  713. set output 'commits_by_year_month.png'
  714. unset key
  715. set xdata time
  716. set timefmt "%Y-%m"
  717. set format x "%Y-%m"
  718. set xtics rotate by 90 15768000
  719. set bmargin 5
  720. set ylabel "Commits"
  721. plot 'commits_by_year_month.dat' using 1:2:(0.5) w boxes fs solid
  722. """)
  723. f.close()
  724. # commits_by_year
  725. f = open(path + '/commits_by_year.plot', 'w')
  726. f.write(GNUPLOT_COMMON)
  727. f.write(
  728. """
  729. set output 'commits_by_year.png'
  730. unset key
  731. set xtics 1
  732. set ylabel "Commits"
  733. set yrange [0:]
  734. plot 'commits_by_year.dat' using 1:2:(0.5) w boxes fs solid
  735. """)
  736. f.close()
  737. # Files by date
  738. f = open(path + '/files_by_date.plot', 'w')
  739. f.write(GNUPLOT_COMMON)
  740. f.write(
  741. """
  742. set output 'files_by_date.png'
  743. unset key
  744. set xdata time
  745. set timefmt "%Y-%m-%d"
  746. set format x "%Y-%m-%d"
  747. set ylabel "Files"
  748. set xtics rotate by 90
  749. set bmargin 6
  750. plot 'files_by_date.dat' using 1:2 w steps
  751. """)
  752. f.close()
  753. # Lines of Code
  754. f = open(path + '/lines_of_code.plot', 'w')
  755. f.write(GNUPLOT_COMMON)
  756. f.write(
  757. """
  758. set output 'lines_of_code.png'
  759. unset key
  760. set xdata time
  761. set timefmt "%s"
  762. set format x "%Y-%m-%d"
  763. set ylabel "Lines"
  764. set xtics rotate by 90
  765. set bmargin 6
  766. plot 'lines_of_code.dat' using 1:2 w lines
  767. """)
  768. f.close()
  769. os.chdir(path)
  770. files = glob.glob(path + '/*.plot')
  771. for f in files:
  772. out = getpipeoutput([gnuplot_cmd + ' "%s"' % f])
  773. if len(out) > 0:
  774. print out
  775. def printHeader(self, f, title = ''):
  776. f.write(
  777. """<?xml version="1.0" encoding="UTF-8"?>
  778. <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
  779. <html xmlns="http://www.w3.org/1999/xhtml">
  780. <head>
  781. <title>GitStats - %s</title>
  782. <link rel="stylesheet" href="gitstats.css" type="text/css" />
  783. <meta name="generator" content="GitStats %s" />
  784. <script type="text/javascript" src="sortable.js"></script>
  785. </head>
  786. <body>
  787. """ % (self.title, getversion()))
  788. def printNav(self, f):
  789. f.write("""
  790. <div class="nav">
  791. <ul>
  792. <li><a href="index.html">General</a></li>
  793. <li><a href="activity.html">Activity</a></li>
  794. <li><a href="authors.html">Authors</a></li>
  795. <li><a href="files.html">Files</a></li>
  796. <li><a href="lines.html">Lines</a></li>
  797. <li><a href="tags.html">Tags</a></li>
  798. </ul>
  799. </div>
  800. """)
  801. usage = """
  802. Usage: gitstats [options] <gitpath> <outputpath>
  803. Options:
  804. """
  805. if len(sys.argv) < 3:
  806. print usage
  807. sys.exit(0)
  808. gitpath = sys.argv[1]
  809. outputpath = os.path.abspath(sys.argv[2])
  810. rundir = os.getcwd()
  811. try:
  812. os.makedirs(outputpath)
  813. except OSError:
  814. pass
  815. if not os.path.isdir(outputpath):
  816. print 'FATAL: Output path is not a directory or does not exist'
  817. sys.exit(1)
  818. print 'Git path: %s' % gitpath
  819. print 'Output path: %s' % outputpath
  820. os.chdir(gitpath)
  821. cachefile = os.path.join(outputpath, 'gitstats.cache')
  822. print 'Collecting data...'
  823. data = GitDataCollector()
  824. data.loadCache(cachefile)
  825. data.collect(gitpath)
  826. print 'Refining data...'
  827. data.saveCache(cachefile)
  828. data.refine()
  829. os.chdir(rundir)
  830. print 'Generating report...'
  831. report = HTMLReportCreator()
  832. report.create(data, outputpath)
  833. time_end = time.time()
  834. exectime_internal = time_end - time_start
  835. print 'Execution time %.5f secs, %.5f secs (%.2f %%) in external commands)' % (exectime_internal, exectime_external, (100.0 * exectime_external) / exectime_internal)