123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002
  1. #!/usr/bin/env python
  2. # Copyright (c) 2007-2009 Heikki Hokkanen <hoxu@users.sf.net> & others (see doc/author.txt)
  3. # GPLv2 / GPLv3
  4. import datetime
  5. import glob
  6. import os
  7. import pickle
  8. import platform
  9. import re
  10. import shutil
  11. import subprocess
  12. import sys
  13. import time
  14. import zlib
  15. GNUPLOT_COMMON = 'set terminal png transparent\nset size 0.5,0.5\n'
  16. MAX_EXT_LENGTH = 10 # maximum file extension length
  17. ON_LINUX = (platform.system() == 'Linux')
  18. exectime_internal = 0.0
  19. exectime_external = 0.0
  20. time_start = time.time()
  21. # By default, gnuplot is searched from path, but can be overridden with the
  22. # environment variable "GNUPLOT"
  23. gnuplot_cmd = 'gnuplot'
  24. if 'GNUPLOT' in os.environ:
  25. gnuplot_cmd = os.environ['GNUPLOT']
  26. def getpipeoutput(cmds, quiet = False):
  27. global exectime_external
  28. start = time.time()
  29. if not quiet and ON_LINUX and os.isatty(1):
  30. print '>> ' + ' | '.join(cmds),
  31. sys.stdout.flush()
  32. p0 = subprocess.Popen(cmds[0], stdout = subprocess.PIPE, shell = True)
  33. p = p0
  34. for x in cmds[1:]:
  35. p = subprocess.Popen(x, stdin = p0.stdout, stdout = subprocess.PIPE, shell = True)
  36. p0 = p
  37. output = p.communicate()[0]
  38. end = time.time()
  39. if not quiet:
  40. if ON_LINUX and os.isatty(1):
  41. print '\r',
  42. print '[%.5f] >> %s' % (end - start, ' | '.join(cmds))
  43. exectime_external += (end - start)
  44. return output.rstrip('\n')
  45. def getkeyssortedbyvalues(dict):
  46. return map(lambda el : el[1], sorted(map(lambda el : (el[1], el[0]), dict.items())))
  47. # dict['author'] = { 'commits': 512 } - ...key(dict, 'commits')
  48. def getkeyssortedbyvaluekey(d, key):
  49. return map(lambda el : el[1], sorted(map(lambda el : (d[el][key], el), d.keys())))
  50. VERSION = 0
  51. def getversion():
  52. global VERSION
  53. if VERSION == 0:
  54. VERSION = getpipeoutput(["git rev-parse --short HEAD"]).split('\n')[0]
  55. return VERSION
  56. class DataCollector:
  57. """Manages data collection from a revision control repository."""
  58. def __init__(self):
  59. self.stamp_created = time.time()
  60. self.cache = {}
  61. ##
  62. # This should be the main function to extract data from the repository.
  63. def collect(self, dir):
  64. self.dir = dir
  65. self.projectname = os.path.basename(os.path.abspath(dir))
  66. ##
  67. # Load cacheable data
  68. def loadCache(self, cachefile):
  69. if not os.path.exists(cachefile):
  70. return
  71. print 'Loading cache...'
  72. f = open(cachefile)
  73. try:
  74. self.cache = pickle.loads(zlib.decompress(f.read()))
  75. except:
  76. # temporary hack to upgrade non-compressed caches
  77. f.seek(0)
  78. self.cache = pickle.load(f)
  79. f.close()
  80. ##
  81. # Produce any additional statistics from the extracted data.
  82. def refine(self):
  83. pass
  84. ##
  85. # : get a dictionary of author
  86. def getAuthorInfo(self, author):
  87. return None
  88. def getActivityByDayOfWeek(self):
  89. return {}
  90. def getActivityByHourOfDay(self):
  91. return {}
  92. ##
  93. # Get a list of authors
  94. def getAuthors(self):
  95. return []
  96. def getFirstCommitDate(self):
  97. return datetime.datetime.now()
  98. def getLastCommitDate(self):
  99. return datetime.datetime.now()
  100. def getStampCreated(self):
  101. return self.stamp_created
  102. def getTags(self):
  103. return []
  104. def getTotalAuthors(self):
  105. return -1
  106. def getTotalCommits(self):
  107. return -1
  108. def getTotalFiles(self):
  109. return -1
  110. def getTotalLOC(self):
  111. return -1
  112. ##
  113. # Save cacheable data
  114. def saveCache(self, filename):
  115. print 'Saving cache...'
  116. f = open(cachefile, 'w')
  117. #pickle.dump(self.cache, f)
  118. data = zlib.compress(pickle.dumps(self.cache))
  119. f.write(data)
  120. f.close()
  121. class GitDataCollector(DataCollector):
  122. def collect(self, dir):
  123. DataCollector.collect(self, dir)
  124. try:
  125. self.total_authors = int(getpipeoutput(['git log', 'git shortlog -s', 'wc -l']))
  126. except:
  127. self.total_authors = 0
  128. #self.total_lines = int(getoutput('git-ls-files -z |xargs -0 cat |wc -l'))
  129. self.activity_by_hour_of_day = {} # hour -> commits
  130. self.activity_by_day_of_week = {} # day -> commits
  131. self.activity_by_month_of_year = {} # month [1-12] -> commits
  132. self.activity_by_hour_of_week = {} # weekday -> hour -> commits
  133. self.activity_by_hour_of_day_busiest = 0
  134. self.activity_by_hour_of_week_busiest = 0
  135. self.authors = {} # name -> {commits, first_commit_stamp, last_commit_stamp, last_active_day, active_days}
  136. # author of the month
  137. self.author_of_month = {} # month -> author -> commits
  138. self.author_of_year = {} # year -> author -> commits
  139. self.commits_by_month = {} # month -> commits
  140. self.commits_by_year = {} # year -> commits
  141. self.first_commit_stamp = 0
  142. self.last_commit_stamp = 0
  143. self.last_active_day = None
  144. self.active_days = 0
  145. # tags
  146. self.tags = {}
  147. lines = getpipeoutput(['git show-ref --tags']).split('\n')
  148. for line in lines:
  149. if len(line) == 0:
  150. continue
  151. (hash, tag) = line.split(' ')
  152. tag = tag.replace('refs/tags/', '')
  153. output = getpipeoutput(['git log "%s" --pretty=format:"%%at %%an" -n 1' % hash])
  154. if len(output) > 0:
  155. parts = output.split(' ')
  156. stamp = 0
  157. try:
  158. stamp = int(parts[0])
  159. except ValueError:
  160. stamp = 0
  161. self.tags[tag] = { 'stamp': stamp, 'hash' : hash, 'date' : datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d'), 'commits': 0, 'authors': {} }
  162. # collect info on tags, starting from latest
  163. tags_sorted_by_date_desc = map(lambda el : el[1], reversed(sorted(map(lambda el : (el[1]['date'], el[0]), data.tags.items()))))
  164. prev = None
  165. for tag in reversed(tags_sorted_by_date_desc):
  166. cmd = 'git shortlog -s "%s"' % tag
  167. if prev != None:
  168. cmd += ' "^%s"' % prev
  169. output = getpipeoutput([cmd])
  170. if len(output) == 0:
  171. continue
  172. prev = tag
  173. for line in output.split('\n'):
  174. parts = re.split('\s+', line, 2)
  175. commits = int(parts[1])
  176. author = parts[2]
  177. self.tags[tag]['commits'] += commits
  178. self.tags[tag]['authors'][author] = commits
  179. # Collect revision statistics
  180. # Outputs "<stamp> <author>"
  181. lines = getpipeoutput(['git rev-list --pretty=format:"%at %an" HEAD', 'grep -v ^commit']).split('\n')
  182. for line in lines:
  183. # linux-2.6 says "<unknown>" for one line O_o
  184. parts = line.split(' ')
  185. author = ''
  186. try:
  187. stamp = int(parts[0])
  188. except ValueError:
  189. stamp = 0
  190. if len(parts) > 1:
  191. author = ' '.join(parts[1:])
  192. date = datetime.datetime.fromtimestamp(float(stamp))
  193. # First and last commit stamp
  194. if self.last_commit_stamp == 0:
  195. self.last_commit_stamp = stamp
  196. self.first_commit_stamp = stamp
  197. # activity
  198. # hour
  199. hour = date.hour
  200. if hour in self.activity_by_hour_of_day:
  201. self.activity_by_hour_of_day[hour] += 1
  202. else:
  203. self.activity_by_hour_of_day[hour] = 1
  204. # most active hour?
  205. if self.activity_by_hour_of_day[hour] > self.activity_by_hour_of_day_busiest:
  206. self.activity_by_hour_of_day_busiest = self.activity_by_hour_of_day[hour]
  207. # day of week
  208. day = date.weekday()
  209. if day in self.activity_by_day_of_week:
  210. self.activity_by_day_of_week[day] += 1
  211. else:
  212. self.activity_by_day_of_week[day] = 1
  213. # hour of week
  214. if day not in self.activity_by_hour_of_week:
  215. self.activity_by_hour_of_week[day] = {}
  216. if hour not in self.activity_by_hour_of_week[day]:
  217. self.activity_by_hour_of_week[day][hour] = 1
  218. else:
  219. self.activity_by_hour_of_week[day][hour] += 1
  220. # most active hour?
  221. if self.activity_by_hour_of_week[day][hour] > self.activity_by_hour_of_week_busiest:
  222. self.activity_by_hour_of_week_busiest = self.activity_by_hour_of_week[day][hour]
  223. # month of year
  224. month = date.month
  225. if month in self.activity_by_month_of_year:
  226. self.activity_by_month_of_year[month] += 1
  227. else:
  228. self.activity_by_month_of_year[month] = 1
  229. # author stats
  230. if author not in self.authors:
  231. self.authors[author] = {}
  232. # commits
  233. if 'last_commit_stamp' not in self.authors[author]:
  234. self.authors[author]['last_commit_stamp'] = stamp
  235. self.authors[author]['first_commit_stamp'] = stamp
  236. if 'commits' in self.authors[author]:
  237. self.authors[author]['commits'] += 1
  238. else:
  239. self.authors[author]['commits'] = 1
  240. # author of the month/year
  241. yymm = date.strftime('%Y-%m')
  242. if yymm in self.author_of_month:
  243. if author in self.author_of_month[yymm]:
  244. self.author_of_month[yymm][author] += 1
  245. else:
  246. self.author_of_month[yymm][author] = 1
  247. else:
  248. self.author_of_month[yymm] = {}
  249. self.author_of_month[yymm][author] = 1
  250. if yymm in self.commits_by_month:
  251. self.commits_by_month[yymm] += 1
  252. else:
  253. self.commits_by_month[yymm] = 1
  254. yy = date.year
  255. if yy in self.author_of_year:
  256. if author in self.author_of_year[yy]:
  257. self.author_of_year[yy][author] += 1
  258. else:
  259. self.author_of_year[yy][author] = 1
  260. else:
  261. self.author_of_year[yy] = {}
  262. self.author_of_year[yy][author] = 1
  263. if yy in self.commits_by_year:
  264. self.commits_by_year[yy] += 1
  265. else:
  266. self.commits_by_year[yy] = 1
  267. # authors: active days
  268. yymmdd = date.strftime('%Y-%m-%d')
  269. if 'last_active_day' not in self.authors[author]:
  270. self.authors[author]['last_active_day'] = yymmdd
  271. self.authors[author]['active_days'] = 1
  272. elif yymmdd != self.authors[author]['last_active_day']:
  273. self.authors[author]['last_active_day'] = yymmdd
  274. self.authors[author]['active_days'] += 1
  275. # project: active days
  276. if yymmdd != self.last_active_day:
  277. self.last_active_day = yymmdd
  278. self.active_days += 1
  279. # TODO Optimize this, it's the worst bottleneck
  280. # outputs "<stamp> <files>" for each revision
  281. self.files_by_stamp = {} # stamp -> files
  282. revlines = getpipeoutput(['git rev-list --pretty=format:"%at %T" HEAD', 'grep -v ^commit']).strip().split('\n')
  283. lines = []
  284. for revline in revlines:
  285. time, rev = revline.split(' ')
  286. linecount = self.getFilesInCommit(rev)
  287. lines.append('%d %d' % (int(time), linecount))
  288. self.total_commits = len(lines)
  289. for line in lines:
  290. parts = line.split(' ')
  291. if len(parts) != 2:
  292. continue
  293. (stamp, files) = parts[0:2]
  294. try:
  295. self.files_by_stamp[int(stamp)] = int(files)
  296. except ValueError:
  297. print 'Warning: failed to parse line "%s"' % line
  298. # extensions
  299. self.extensions = {} # extension -> files, lines
  300. lines = getpipeoutput(['git ls-tree -r -z HEAD']).split('\000')
  301. self.total_files = len(lines)
  302. for line in lines:
  303. if len(line) == 0:
  304. continue
  305. parts = re.split('\s+', line, 4)
  306. sha1 = parts[2]
  307. filename = parts[3]
  308. if filename.find('.') == -1 or filename.rfind('.') == 0:
  309. ext = ''
  310. else:
  311. ext = filename[(filename.rfind('.') + 1):]
  312. if len(ext) > MAX_EXT_LENGTH:
  313. ext = ''
  314. if ext not in self.extensions:
  315. self.extensions[ext] = {'files': 0, 'lines': 0}
  316. self.extensions[ext]['files'] += 1
  317. try:
  318. self.extensions[ext]['lines'] += int(getpipeoutput(['git cat-file blob %s' % sha1, 'wc -l']).split()[0])
  319. except:
  320. print 'Warning: Could not count lines for file "%s"' % line
  321. # line statistics
  322. # outputs:
  323. # N files changed, N insertions (+), N deletions(-)
  324. # <stamp> <author>
  325. self.changes_by_date = {} # stamp -> { files, ins, del }
  326. lines = getpipeoutput(['git log --shortstat --pretty=format:"%at %an"']).split('\n')
  327. lines.reverse()
  328. files = 0; inserted = 0; deleted = 0; total_lines = 0
  329. for line in lines:
  330. if len(line) == 0:
  331. continue
  332. # <stamp> <author>
  333. if line.find('files changed,') == -1:
  334. pos = line.find(' ')
  335. if pos != -1:
  336. try:
  337. (stamp, author) = (int(line[:pos]), line[pos+1:])
  338. self.changes_by_date[stamp] = { 'files': files, 'ins': inserted, 'del': deleted, 'lines': total_lines }
  339. except ValueError:
  340. print 'Warning: unexpected line "%s"' % line
  341. else:
  342. print 'Warning: unexpected line "%s"' % line
  343. else:
  344. numbers = re.findall('\d+', line)
  345. if len(numbers) == 3:
  346. (files, inserted, deleted) = map(lambda el : int(el), numbers)
  347. total_lines += inserted
  348. total_lines -= deleted
  349. else:
  350. print 'Warning: failed to handle line "%s"' % line
  351. (files, inserted, deleted) = (0, 0, 0)
  352. #self.changes_by_date[stamp] = { 'files': files, 'ins': inserted, 'del': deleted }
  353. self.total_lines = total_lines
  354. def refine(self):
  355. # authors
  356. # name -> {place_by_commits, commits_frac, date_first, date_last, timedelta}
  357. authors_by_commits = getkeyssortedbyvaluekey(self.authors, 'commits')
  358. authors_by_commits.reverse() # most first
  359. for i, name in enumerate(authors_by_commits):
  360. self.authors[name]['place_by_commits'] = i + 1
  361. for name in self.authors.keys():
  362. a = self.authors[name]
  363. a['commits_frac'] = (100 * float(a['commits'])) / self.getTotalCommits()
  364. date_first = datetime.datetime.fromtimestamp(a['first_commit_stamp'])
  365. date_last = datetime.datetime.fromtimestamp(a['last_commit_stamp'])
  366. delta = date_last - date_first
  367. a['date_first'] = date_first.strftime('%Y-%m-%d')
  368. a['date_last'] = date_last.strftime('%Y-%m-%d')
  369. a['timedelta'] = delta
  370. def getActiveDays(self):
  371. return self.active_days
  372. def getActivityByDayOfWeek(self):
  373. return self.activity_by_day_of_week
  374. def getActivityByHourOfDay(self):
  375. return self.activity_by_hour_of_day
  376. def getAuthorInfo(self, author):
  377. return self.authors[author]
  378. def getAuthors(self):
  379. return self.authors.keys()
  380. def getCommitDeltaDays(self):
  381. return (self.last_commit_stamp - self.first_commit_stamp) / 86400
  382. def getFilesInCommit(self, rev):
  383. try:
  384. res = self.cache['files_in_tree'][rev]
  385. except:
  386. res = int(getpipeoutput(['git ls-tree -r --name-only "%s"' % rev, 'wc -l']).split('\n')[0])
  387. if 'files_in_tree' not in self.cache:
  388. self.cache['files_in_tree'] = {}
  389. self.cache['files_in_tree'][rev] = res
  390. return res
  391. def getFirstCommitDate(self):
  392. return datetime.datetime.fromtimestamp(self.first_commit_stamp)
  393. def getLastCommitDate(self):
  394. return datetime.datetime.fromtimestamp(self.last_commit_stamp)
  395. def getTags(self):
  396. lines = getpipeoutput(['git show-ref --tags', 'cut -d/ -f3'])
  397. return lines.split('\n')
  398. def getTagDate(self, tag):
  399. return self.revToDate('tags/' + tag)
  400. def getTotalAuthors(self):
  401. return self.total_authors
  402. def getTotalCommits(self):
  403. return self.total_commits
  404. def getTotalFiles(self):
  405. return self.total_files
  406. def getTotalLOC(self):
  407. return self.total_lines
  408. def revToDate(self, rev):
  409. stamp = int(getpipeoutput(['git log --pretty=format:%%at "%s" -n 1' % rev]))
  410. return datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d')
  411. class ReportCreator:
  412. """Creates the actual report based on given data."""
  413. def __init__(self):
  414. pass
  415. def create(self, data, path):
  416. self.data = data
  417. self.path = path
  418. def html_linkify(text):
  419. return text.lower().replace(' ', '_')
  420. def html_header(level, text):
  421. name = html_linkify(text)
  422. return '\n<h%d><a href="#%s" name="%s">%s</a></h%d>\n\n' % (level, name, name, text, level)
  423. class HTMLReportCreator(ReportCreator):
  424. def create(self, data, path):
  425. ReportCreator.create(self, data, path)
  426. self.title = data.projectname
  427. # copy static files. Looks in the binary directory, ../share/gitstats and /usr/share/gitstats
  428. binarypath = os.path.dirname(os.path.abspath(__file__))
  429. secondarypath = os.path.join(binarypath, '..', 'share', 'gitstats')
  430. basedirs = [binarypath, secondarypath, '/usr/share/gitstats']
  431. for file in ('gitstats.css', 'sortable.js', 'arrow-up.gif', 'arrow-down.gif', 'arrow-none.gif'):
  432. for base in basedirs:
  433. src = base + '/' + file
  434. if os.path.exists(src):
  435. shutil.copyfile(src, path + '/' + file)
  436. break
  437. else:
  438. print 'Warning: "%s" not found, so not copied (searched: %s)' % (file, basedirs)
  439. f = open(path + "/index.html", 'w')
  440. format = '%Y-%m-%d %H:%M:%S'
  441. self.printHeader(f)
  442. f.write('<h1>GitStats - %s</h1>' % data.projectname)
  443. self.printNav(f)
  444. f.write('<dl>')
  445. f.write('<dt>Project name</dt><dd>%s</dd>' % (data.projectname))
  446. f.write('<dt>Generated</dt><dd>%s (in %d seconds)</dd>' % (datetime.datetime.now().strftime(format), time.time() - data.getStampCreated()))
  447. f.write('<dt>Generator</dt><dd><a href="http://gitstats.sourceforge.net/">GitStats</a> (version %s)</dd>' % getversion())
  448. f.write('<dt>Report Period</dt><dd>%s to %s (%d days, %d active days)</dd>' % (data.getFirstCommitDate().strftime(format), data.getLastCommitDate().strftime(format), data.getCommitDeltaDays(), data.getActiveDays()))
  449. f.write('<dt>Total Files</dt><dd>%s</dd>' % data.getTotalFiles())
  450. f.write('<dt>Total Lines of Code</dt><dd>%s</dd>' % data.getTotalLOC())
  451. f.write('<dt>Total Commits</dt><dd>%s</dd>' % data.getTotalCommits())
  452. f.write('<dt>Authors</dt><dd>%s</dd>' % data.getTotalAuthors())
  453. f.write('</dl>')
  454. f.write('</body>\n</html>')
  455. f.close()
  456. ###
  457. # Activity
  458. f = open(path + '/activity.html', 'w')
  459. self.printHeader(f)
  460. f.write('<h1>Activity</h1>')
  461. self.printNav(f)
  462. #f.write('<h2>Last 30 days</h2>')
  463. #f.write('<h2>Last 12 months</h2>')
  464. # Hour of Day
  465. f.write(html_header(2, 'Hour of Day'))
  466. hour_of_day = data.getActivityByHourOfDay()
  467. f.write('<table><tr><th>Hour</th>')
  468. for i in range(0, 24):
  469. f.write('<th>%d</th>' % i)
  470. f.write('</tr>\n<tr><th>Commits</th>')
  471. fp = open(path + '/hour_of_day.dat', 'w')
  472. for i in range(0, 24):
  473. if i in hour_of_day:
  474. r = 127 + int((float(hour_of_day[i]) / data.activity_by_hour_of_day_busiest) * 128)
  475. f.write('<td style="background-color: rgb(%d, 0, 0)">%d</td>' % (r, hour_of_day[i]))
  476. fp.write('%d %d\n' % (i, hour_of_day[i]))
  477. else:
  478. f.write('<td>0</td>')
  479. fp.write('%d 0\n' % i)
  480. fp.close()
  481. f.write('</tr>\n<tr><th>%</th>')
  482. totalcommits = data.getTotalCommits()
  483. for i in range(0, 24):
  484. if i in hour_of_day:
  485. r = 127 + int((float(hour_of_day[i]) / data.activity_by_hour_of_day_busiest) * 128)
  486. f.write('<td style="background-color: rgb(%d, 0, 0)">%.2f</td>' % (r, (100.0 * hour_of_day[i]) / totalcommits))
  487. else:
  488. f.write('<td>0.00</td>')
  489. f.write('</tr></table>')
  490. f.write('<img src="hour_of_day.png" alt="Hour of Day" />')
  491. fg = open(path + '/hour_of_day.dat', 'w')
  492. for i in range(0, 24):
  493. if i in hour_of_day:
  494. fg.write('%d %d\n' % (i + 1, hour_of_day[i]))
  495. else:
  496. fg.write('%d 0\n' % (i + 1))
  497. fg.close()
  498. # Day of Week
  499. f.write(html_header(2, 'Day of Week'))
  500. day_of_week = data.getActivityByDayOfWeek()
  501. f.write('<div class="vtable"><table>')
  502. f.write('<tr><th>Day</th><th>Total (%)</th></tr>')
  503. fp = open(path + '/day_of_week.dat', 'w')
  504. for d in range(0, 7):
  505. commits = 0
  506. if d in day_of_week:
  507. commits = day_of_week[d]
  508. fp.write('%d %d\n' % (d + 1, commits))
  509. f.write('<tr>')
  510. f.write('<th>%d</th>' % (d + 1))
  511. if d in day_of_week:
  512. f.write('<td>%d (%.2f%%)</td>' % (day_of_week[d], (100.0 * day_of_week[d]) / totalcommits))
  513. else:
  514. f.write('<td>0</td>')
  515. f.write('</tr>')
  516. f.write('</table></div>')
  517. f.write('<img src="day_of_week.png" alt="Day of Week" />')
  518. fp.close()
  519. # Hour of Week
  520. f.write(html_header(2, 'Hour of Week'))
  521. f.write('<table>')
  522. f.write('<tr><th>Weekday</th>')
  523. for hour in range(0, 24):
  524. f.write('<th>%d</th>' % (hour))
  525. f.write('</tr>')
  526. for weekday in range(0, 7):
  527. f.write('<tr><th>%d</th>' % (weekday + 1))
  528. for hour in range(0, 24):
  529. try:
  530. commits = data.activity_by_hour_of_week[weekday][hour]
  531. except KeyError:
  532. commits = 0
  533. if commits != 0:
  534. f.write('<td')
  535. r = 127 + int((float(commits) / data.activity_by_hour_of_week_busiest) * 128)
  536. f.write(' style="background-color: rgb(%d, 0, 0)"' % r)
  537. f.write('>%d</td>' % commits)
  538. else:
  539. f.write('<td></td>')
  540. f.write('</tr>')
  541. f.write('</table>')
  542. # Month of Year
  543. f.write(html_header(2, 'Month of Year'))
  544. f.write('<div class="vtable"><table>')
  545. f.write('<tr><th>Month</th><th>Commits (%)</th></tr>')
  546. fp = open (path + '/month_of_year.dat', 'w')
  547. for mm in range(1, 13):
  548. commits = 0
  549. if mm in data.activity_by_month_of_year:
  550. commits = data.activity_by_month_of_year[mm]
  551. f.write('<tr><td>%d</td><td>%d (%.2f %%)</td></tr>' % (mm, commits, (100.0 * commits) / data.getTotalCommits()))
  552. fp.write('%d %d\n' % (mm, commits))
  553. fp.close()
  554. f.write('</table></div>')
  555. f.write('<img src="month_of_year.png" alt="Month of Year" />')
  556. # Commits by year/month
  557. f.write(html_header(2, 'Commits by year/month'))
  558. f.write('<div class="vtable"><table><tr><th>Month</th><th>Commits</th></tr>')
  559. for yymm in reversed(sorted(data.commits_by_month.keys())):
  560. f.write('<tr><td>%s</td><td>%d</td></tr>' % (yymm, data.commits_by_month[yymm]))
  561. f.write('</table></div>')
  562. f.write('<img src="commits_by_year_month.png" alt="Commits by year/month" />')
  563. fg = open(path + '/commits_by_year_month.dat', 'w')
  564. for yymm in sorted(data.commits_by_month.keys()):
  565. fg.write('%s %s\n' % (yymm, data.commits_by_month[yymm]))
  566. fg.close()
  567. # Commits by year
  568. f.write(html_header(2, 'Commits by Year'))
  569. f.write('<div class="vtable"><table><tr><th>Year</th><th>Commits (% of all)</th></tr>')
  570. for yy in reversed(sorted(data.commits_by_year.keys())):
  571. f.write('<tr><td>%s</td><td>%d (%.2f%%)</td></tr>' % (yy, data.commits_by_year[yy], (100.0 * data.commits_by_year[yy]) / data.getTotalCommits()))
  572. f.write('</table></div>')
  573. f.write('<img src="commits_by_year.png" alt="Commits by Year" />')
  574. fg = open(path + '/commits_by_year.dat', 'w')
  575. for yy in sorted(data.commits_by_year.keys()):
  576. fg.write('%d %d\n' % (yy, data.commits_by_year[yy]))
  577. fg.close()
  578. f.write('</body></html>')
  579. f.close()
  580. ###
  581. # Authors
  582. f = open(path + '/authors.html', 'w')
  583. self.printHeader(f)
  584. f.write('<h1>Authors</h1>')
  585. self.printNav(f)
  586. # Authors :: List of authors
  587. f.write(html_header(2, 'List of Authors'))
  588. f.write('<table class="authors sortable" id="authors">')
  589. f.write('<tr><th>Author</th><th>Commits (%)</th><th>First commit</th><th>Last commit</th><th class="unsortable">Age</th><th>Active days</th><th># by commits</th></tr>')
  590. for author in sorted(data.getAuthors()):
  591. info = data.getAuthorInfo(author)
  592. f.write('<tr><td>%s</td><td>%d (%.2f%%)</td><td>%s</td><td>%s</td><td>%s</td><td>%d</td><td>%d</td></tr>' % (author, info['commits'], info['commits_frac'], info['date_first'], info['date_last'], info['timedelta'], info['active_days'], info['place_by_commits']))
  593. f.write('</table>')
  594. # Authors :: Author of Month
  595. f.write(html_header(2, 'Author of Month'))
  596. f.write('<table class="sortable" id="aom">')
  597. f.write('<tr><th>Month</th><th>Author</th><th>Commits (%)</th><th class="unsortable">Next top 5</th></tr>')
  598. for yymm in reversed(sorted(data.author_of_month.keys())):
  599. authordict = data.author_of_month[yymm]
  600. authors = getkeyssortedbyvalues(authordict)
  601. authors.reverse()
  602. commits = data.author_of_month[yymm][authors[0]]
  603. next = ', '.join(authors[1:5])
  604. f.write('<tr><td>%s</td><td>%s</td><td>%d (%.2f%% of %d)</td><td>%s</td></tr>' % (yymm, authors[0], commits, (100.0 * commits) / data.commits_by_month[yymm], data.commits_by_month[yymm], next))
  605. f.write('</table>')
  606. f.write(html_header(2, 'Author of Year'))
  607. f.write('<table class="sortable" id="aoy"><tr><th>Year</th><th>Author</th><th>Commits (%)</th><th class="unsortable">Next top 5</th></tr>')
  608. for yy in reversed(sorted(data.author_of_year.keys())):
  609. authordict = data.author_of_year[yy]
  610. authors = getkeyssortedbyvalues(authordict)
  611. authors.reverse()
  612. commits = data.author_of_year[yy][authors[0]]
  613. next = ', '.join(authors[1:5])
  614. f.write('<tr><td>%s</td><td>%s</td><td>%d (%.2f%% of %d)</td><td>%s</td></tr>' % (yy, authors[0], commits, (100.0 * commits) / data.commits_by_year[yy], data.commits_by_year[yy], next))
  615. f.write('</table>')
  616. f.write('</body></html>')
  617. f.close()
  618. ###
  619. # Files
  620. f = open(path + '/files.html', 'w')
  621. self.printHeader(f)
  622. f.write('<h1>Files</h1>')
  623. self.printNav(f)
  624. f.write('<dl>\n')
  625. f.write('<dt>Total files</dt><dd>%d</dd>' % data.getTotalFiles())
  626. f.write('<dt>Total lines</dt><dd>%d</dd>' % data.getTotalLOC())
  627. f.write('<dt>Average file size</dt><dd>%.2f bytes</dd>' % ((100.0 * data.getTotalLOC()) / data.getTotalFiles()))
  628. f.write('</dl>\n')
  629. # Files :: File count by date
  630. f.write(html_header(2, 'File count by date'))
  631. fg = open(path + '/files_by_date.dat', 'w')
  632. for stamp in sorted(data.files_by_stamp.keys()):
  633. fg.write('%s %d\n' % (datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d'), data.files_by_stamp[stamp]))
  634. fg.close()
  635. f.write('<img src="files_by_date.png" alt="Files by Date" />')
  636. #f.write('<h2>Average file size by date</h2>')
  637. # Files :: Extensions
  638. f.write(html_header(2, 'Extensions'))
  639. f.write('<table class="sortable" id="ext"><tr><th>Extension</th><th>Files (%)</th><th>Lines (%)</th><th>Lines/file</th></tr>')
  640. for ext in sorted(data.extensions.keys()):
  641. files = data.extensions[ext]['files']
  642. lines = data.extensions[ext]['lines']
  643. f.write('<tr><td>%s</td><td>%d (%.2f%%)</td><td>%d (%.2f%%)</td><td>%d</td></tr>' % (ext, files, (100.0 * files) / data.getTotalFiles(), lines, (100.0 * lines) / data.getTotalLOC(), lines / files))
  644. f.write('</table>')
  645. f.write('</body></html>')
  646. f.close()
  647. ###
  648. # Lines
  649. f = open(path + '/lines.html', 'w')
  650. self.printHeader(f)
  651. f.write('<h1>Lines</h1>')
  652. self.printNav(f)
  653. f.write('<dl>\n')
  654. f.write('<dt>Total lines</dt><dd>%d</dd>' % data.getTotalLOC())
  655. f.write('</dl>\n')
  656. f.write(html_header(2, 'Lines of Code'))
  657. f.write('<img src="lines_of_code.png" />')
  658. fg = open(path + '/lines_of_code.dat', 'w')
  659. for stamp in sorted(data.changes_by_date.keys()):
  660. fg.write('%d %d\n' % (stamp, data.changes_by_date[stamp]['lines']))
  661. fg.close()
  662. f.write('</body></html>')
  663. f.close()
  664. ###
  665. # tags.html
  666. f = open(path + '/tags.html', 'w')
  667. self.printHeader(f)
  668. f.write('<h1>Tags</h1>')
  669. self.printNav(f)
  670. f.write('<dl>')
  671. f.write('<dt>Total tags</dt><dd>%d</dd>' % len(data.tags))
  672. if len(data.tags) > 0:
  673. f.write('<dt>Average commits per tag</dt><dd>%.2f</dd>' % (1.0 * data.getTotalCommits() / len(data.tags)))
  674. f.write('</dl>')
  675. f.write('<table>')
  676. f.write('<tr><th>Name</th><th>Date</th><th>Commits</th><th>Authors</th></tr>')
  677. # sort the tags by date desc
  678. tags_sorted_by_date_desc = map(lambda el : el[1], reversed(sorted(map(lambda el : (el[1]['date'], el[0]), data.tags.items()))))
  679. for tag in tags_sorted_by_date_desc:
  680. authorinfo = []
  681. authors_by_commits = getkeyssortedbyvalues(data.tags[tag]['authors'])
  682. for i in reversed(authors_by_commits):
  683. authorinfo.append('%s (%d)' % (i, data.tags[tag]['authors'][i]))
  684. f.write('<tr><td>%s</td><td>%s</td><td>%d</td><td>%s</td></tr>' % (tag, data.tags[tag]['date'], data.tags[tag]['commits'], ', '.join(authorinfo)))
  685. f.write('</table>')
  686. f.write('</body></html>')
  687. f.close()
  688. self.createGraphs(path)
  689. def createGraphs(self, path):
  690. print 'Generating graphs...'
  691. # hour of day
  692. f = open(path + '/hour_of_day.plot', 'w')
  693. f.write(GNUPLOT_COMMON)
  694. f.write(
  695. """
  696. set output 'hour_of_day.png'
  697. unset key
  698. set xrange [0.5:24.5]
  699. set xtics 4
  700. set ylabel "Commits"
  701. plot 'hour_of_day.dat' using 1:2:(0.5) w boxes fs solid
  702. """)
  703. f.close()
  704. # day of week
  705. f = open(path + '/day_of_week.plot', 'w')
  706. f.write(GNUPLOT_COMMON)
  707. f.write(
  708. """
  709. set output 'day_of_week.png'
  710. unset key
  711. set xrange [0.5:7.5]
  712. set xtics 1
  713. set ylabel "Commits"
  714. plot 'day_of_week.dat' using 1:2:(0.5) w boxes fs solid
  715. """)
  716. f.close()
  717. # Month of Year
  718. f = open(path + '/month_of_year.plot', 'w')
  719. f.write(GNUPLOT_COMMON)
  720. f.write(
  721. """
  722. set output 'month_of_year.png'
  723. unset key
  724. set xrange [0.5:12.5]
  725. set xtics 1
  726. set ylabel "Commits"
  727. plot 'month_of_year.dat' using 1:2:(0.5) w boxes fs solid
  728. """)
  729. f.close()
  730. # commits_by_year_month
  731. f = open(path + '/commits_by_year_month.plot', 'w')
  732. f.write(GNUPLOT_COMMON)
  733. f.write(
  734. """
  735. set output 'commits_by_year_month.png'
  736. unset key
  737. set xdata time
  738. set timefmt "%Y-%m"
  739. set format x "%Y-%m"
  740. set xtics rotate by 90 15768000
  741. set bmargin 5
  742. set ylabel "Commits"
  743. plot 'commits_by_year_month.dat' using 1:2:(0.5) w boxes fs solid
  744. """)
  745. f.close()
  746. # commits_by_year
  747. f = open(path + '/commits_by_year.plot', 'w')
  748. f.write(GNUPLOT_COMMON)
  749. f.write(
  750. """
  751. set output 'commits_by_year.png'
  752. unset key
  753. set xtics 1
  754. set ylabel "Commits"
  755. set yrange [0:]
  756. plot 'commits_by_year.dat' using 1:2:(0.5) w boxes fs solid
  757. """)
  758. f.close()
  759. # Files by date
  760. f = open(path + '/files_by_date.plot', 'w')
  761. f.write(GNUPLOT_COMMON)
  762. f.write(
  763. """
  764. set output 'files_by_date.png'
  765. unset key
  766. set xdata time
  767. set timefmt "%Y-%m-%d"
  768. set format x "%Y-%m-%d"
  769. set ylabel "Files"
  770. set xtics rotate by 90
  771. set ytics 1
  772. set bmargin 6
  773. plot 'files_by_date.dat' using 1:2 w steps
  774. """)
  775. f.close()
  776. # Lines of Code
  777. f = open(path + '/lines_of_code.plot', 'w')
  778. f.write(GNUPLOT_COMMON)
  779. f.write(
  780. """
  781. set output 'lines_of_code.png'
  782. unset key
  783. set xdata time
  784. set timefmt "%s"
  785. set format x "%Y-%m-%d"
  786. set ylabel "Lines"
  787. set xtics rotate by 90
  788. set bmargin 6
  789. plot 'lines_of_code.dat' using 1:2 w lines
  790. """)
  791. f.close()
  792. os.chdir(path)
  793. files = glob.glob(path + '/*.plot')
  794. for f in files:
  795. out = getpipeoutput([gnuplot_cmd + ' "%s"' % f])
  796. if len(out) > 0:
  797. print out
  798. def printHeader(self, f, title = ''):
  799. f.write(
  800. """<?xml version="1.0" encoding="UTF-8"?>
  801. <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
  802. <html xmlns="http://www.w3.org/1999/xhtml">
  803. <head>
  804. <title>GitStats - %s</title>
  805. <link rel="stylesheet" href="gitstats.css" type="text/css" />
  806. <meta name="generator" content="GitStats %s" />
  807. <script type="text/javascript" src="sortable.js"></script>
  808. </head>
  809. <body>
  810. """ % (self.title, getversion()))
  811. def printNav(self, f):
  812. f.write("""
  813. <div class="nav">
  814. <ul>
  815. <li><a href="index.html">General</a></li>
  816. <li><a href="activity.html">Activity</a></li>
  817. <li><a href="authors.html">Authors</a></li>
  818. <li><a href="files.html">Files</a></li>
  819. <li><a href="lines.html">Lines</a></li>
  820. <li><a href="tags.html">Tags</a></li>
  821. </ul>
  822. </div>
  823. """)
  824. usage = """
  825. Usage: gitstats [options] <gitpath> <outputpath>
  826. Options:
  827. """
  828. if len(sys.argv) < 3:
  829. print usage
  830. sys.exit(0)
  831. gitpath = sys.argv[1]
  832. outputpath = os.path.abspath(sys.argv[2])
  833. rundir = os.getcwd()
  834. try:
  835. os.makedirs(outputpath)
  836. except OSError:
  837. pass
  838. if not os.path.isdir(outputpath):
  839. print 'FATAL: Output path is not a directory or does not exist'
  840. sys.exit(1)
  841. print 'Git path: %s' % gitpath
  842. print 'Output path: %s' % outputpath
  843. os.chdir(gitpath)
  844. cachefile = os.path.join(outputpath, 'gitstats.cache')
  845. print 'Collecting data...'
  846. data = GitDataCollector()
  847. data.loadCache(cachefile)
  848. data.collect(gitpath)
  849. print 'Refining data...'
  850. data.saveCache(cachefile)
  851. data.refine()
  852. os.chdir(rundir)
  853. print 'Generating report...'
  854. report = HTMLReportCreator()
  855. report.create(data, outputpath)
  856. time_end = time.time()
  857. exectime_internal = time_end - time_start
  858. print 'Execution time %.5f secs, %.5f secs (%.2f %%) in external commands)' % (exectime_internal, exectime_external, (100.0 * exectime_external) / exectime_internal)