gitstats 32KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064
  1. #!/usr/bin/env python
  2. # Copyright (c) 2007-2009 Heikki Hokkanen <hoxu@users.sf.net> & others (see doc/author.txt)
  3. # GPLv2 / GPLv3
  4. import datetime
  5. import glob
  6. import os
  7. import pickle
  8. import platform
  9. import re
  10. import shutil
  11. import subprocess
  12. import sys
  13. import time
  14. import zlib
  15. GNUPLOT_COMMON = 'set terminal png transparent\nset size 0.5,0.5\n'
  16. MAX_EXT_LENGTH = 10 # maximum file extension length
  17. ON_LINUX = (platform.system() == 'Linux')
  18. exectime_internal = 0.0
  19. exectime_external = 0.0
  20. time_start = time.time()
  21. # By default, gnuplot is searched from path, but can be overridden with the
  22. # environment variable "GNUPLOT"
  23. gnuplot_cmd = 'gnuplot'
  24. if 'GNUPLOT' in os.environ:
  25. gnuplot_cmd = os.environ['GNUPLOT']
  26. def getpipeoutput(cmds, quiet = False):
  27. global exectime_external
  28. start = time.time()
  29. if not quiet and ON_LINUX and os.isatty(1):
  30. print '>> ' + ' | '.join(cmds),
  31. sys.stdout.flush()
  32. p0 = subprocess.Popen(cmds[0], stdout = subprocess.PIPE, shell = True)
  33. p = p0
  34. for x in cmds[1:]:
  35. p = subprocess.Popen(x, stdin = p0.stdout, stdout = subprocess.PIPE, shell = True)
  36. p0 = p
  37. output = p.communicate()[0]
  38. end = time.time()
  39. if not quiet:
  40. if ON_LINUX and os.isatty(1):
  41. print '\r',
  42. print '[%.5f] >> %s' % (end - start, ' | '.join(cmds))
  43. exectime_external += (end - start)
  44. return output.rstrip('\n')
  45. def getkeyssortedbyvalues(dict):
  46. return map(lambda el : el[1], sorted(map(lambda el : (el[1], el[0]), dict.items())))
  47. # dict['author'] = { 'commits': 512 } - ...key(dict, 'commits')
  48. def getkeyssortedbyvaluekey(d, key):
  49. return map(lambda el : el[1], sorted(map(lambda el : (d[el][key], el), d.keys())))
  50. VERSION = 0
  51. def getversion():
  52. global VERSION
  53. if VERSION == 0:
  54. VERSION = getpipeoutput(["git rev-parse --short HEAD"]).split('\n')[0]
  55. return VERSION
  56. class DataCollector:
  57. """Manages data collection from a revision control repository."""
  58. def __init__(self):
  59. self.stamp_created = time.time()
  60. self.cache = {}
  61. ##
  62. # This should be the main function to extract data from the repository.
  63. def collect(self, dir):
  64. self.dir = dir
  65. self.projectname = os.path.basename(os.path.abspath(dir))
  66. ##
  67. # Load cacheable data
  68. def loadCache(self, cachefile):
  69. if not os.path.exists(cachefile):
  70. return
  71. print 'Loading cache...'
  72. f = open(cachefile)
  73. try:
  74. self.cache = pickle.loads(zlib.decompress(f.read()))
  75. except:
  76. # temporary hack to upgrade non-compressed caches
  77. f.seek(0)
  78. self.cache = pickle.load(f)
  79. f.close()
  80. ##
  81. # Produce any additional statistics from the extracted data.
  82. def refine(self):
  83. pass
  84. ##
  85. # : get a dictionary of author
  86. def getAuthorInfo(self, author):
  87. return None
  88. def getActivityByDayOfWeek(self):
  89. return {}
  90. def getActivityByHourOfDay(self):
  91. return {}
  92. ##
  93. # Get a list of authors
  94. def getAuthors(self):
  95. return []
  96. def getFirstCommitDate(self):
  97. return datetime.datetime.now()
  98. def getLastCommitDate(self):
  99. return datetime.datetime.now()
  100. def getStampCreated(self):
  101. return self.stamp_created
  102. def getTags(self):
  103. return []
  104. def getTotalAuthors(self):
  105. return -1
  106. def getTotalCommits(self):
  107. return -1
  108. def getTotalFiles(self):
  109. return -1
  110. def getTotalLOC(self):
  111. return -1
  112. ##
  113. # Save cacheable data
  114. def saveCache(self, filename):
  115. print 'Saving cache...'
  116. f = open(cachefile, 'w')
  117. #pickle.dump(self.cache, f)
  118. data = zlib.compress(pickle.dumps(self.cache))
  119. f.write(data)
  120. f.close()
  121. class GitDataCollector(DataCollector):
  122. def collect(self, dir):
  123. DataCollector.collect(self, dir)
  124. try:
  125. self.total_authors = int(getpipeoutput(['git log', 'git shortlog -s', 'wc -l']))
  126. except:
  127. self.total_authors = 0
  128. #self.total_lines = int(getoutput('git-ls-files -z |xargs -0 cat |wc -l'))
  129. self.activity_by_hour_of_day = {} # hour -> commits
  130. self.activity_by_day_of_week = {} # day -> commits
  131. self.activity_by_month_of_year = {} # month [1-12] -> commits
  132. self.activity_by_hour_of_week = {} # weekday -> hour -> commits
  133. self.activity_by_hour_of_day_busiest = 0
  134. self.activity_by_hour_of_week_busiest = 0
  135. self.activity_by_year_week = {} # yy_wNN -> commits
  136. self.activity_by_year_week_peak = 0
  137. self.authors = {} # name -> {commits, first_commit_stamp, last_commit_stamp, last_active_day, active_days}
  138. # author of the month
  139. self.author_of_month = {} # month -> author -> commits
  140. self.author_of_year = {} # year -> author -> commits
  141. self.commits_by_month = {} # month -> commits
  142. self.commits_by_year = {} # year -> commits
  143. self.first_commit_stamp = 0
  144. self.last_commit_stamp = 0
  145. self.last_active_day = None
  146. self.active_days = 0
  147. # timezone
  148. self.commits_by_timezone = {} # timezone -> commits
  149. # tags
  150. self.tags = {}
  151. lines = getpipeoutput(['git show-ref --tags']).split('\n')
  152. for line in lines:
  153. if len(line) == 0:
  154. continue
  155. (hash, tag) = line.split(' ')
  156. tag = tag.replace('refs/tags/', '')
  157. output = getpipeoutput(['git log "%s" --pretty=format:"%%at %%an" -n 1' % hash])
  158. if len(output) > 0:
  159. parts = output.split(' ')
  160. stamp = 0
  161. try:
  162. stamp = int(parts[0])
  163. except ValueError:
  164. stamp = 0
  165. self.tags[tag] = { 'stamp': stamp, 'hash' : hash, 'date' : datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d'), 'commits': 0, 'authors': {} }
  166. # collect info on tags, starting from latest
  167. tags_sorted_by_date_desc = map(lambda el : el[1], reversed(sorted(map(lambda el : (el[1]['date'], el[0]), data.tags.items()))))
  168. prev = None
  169. for tag in reversed(tags_sorted_by_date_desc):
  170. cmd = 'git shortlog -s "%s"' % tag
  171. if prev != None:
  172. cmd += ' "^%s"' % prev
  173. output = getpipeoutput([cmd])
  174. if len(output) == 0:
  175. continue
  176. prev = tag
  177. for line in output.split('\n'):
  178. parts = re.split('\s+', line, 2)
  179. commits = int(parts[1])
  180. author = parts[2]
  181. self.tags[tag]['commits'] += commits
  182. self.tags[tag]['authors'][author] = commits
  183. # Collect revision statistics
  184. # Outputs "<stamp> <author>"
  185. lines = getpipeoutput(['git rev-list --pretty=format:"%at %ai %an" HEAD', 'grep -v ^commit']).split('\n')
  186. for line in lines:
  187. # linux-2.6 says "<unknown>" for one line O_o
  188. parts = line.split(' ')
  189. author = ''
  190. try:
  191. stamp = int(parts[0])
  192. except ValueError:
  193. stamp = 0
  194. timezone = parts[3]
  195. if len(parts) > 4:
  196. author = ' '.join(parts[1:])
  197. date = datetime.datetime.fromtimestamp(float(stamp))
  198. # First and last commit stamp
  199. if self.last_commit_stamp == 0:
  200. self.last_commit_stamp = stamp
  201. self.first_commit_stamp = stamp
  202. # activity
  203. # hour
  204. hour = date.hour
  205. if hour in self.activity_by_hour_of_day:
  206. self.activity_by_hour_of_day[hour] += 1
  207. else:
  208. self.activity_by_hour_of_day[hour] = 1
  209. # most active hour?
  210. if self.activity_by_hour_of_day[hour] > self.activity_by_hour_of_day_busiest:
  211. self.activity_by_hour_of_day_busiest = self.activity_by_hour_of_day[hour]
  212. # day of week
  213. day = date.weekday()
  214. if day in self.activity_by_day_of_week:
  215. self.activity_by_day_of_week[day] += 1
  216. else:
  217. self.activity_by_day_of_week[day] = 1
  218. # hour of week
  219. if day not in self.activity_by_hour_of_week:
  220. self.activity_by_hour_of_week[day] = {}
  221. if hour not in self.activity_by_hour_of_week[day]:
  222. self.activity_by_hour_of_week[day][hour] = 1
  223. else:
  224. self.activity_by_hour_of_week[day][hour] += 1
  225. # most active hour?
  226. if self.activity_by_hour_of_week[day][hour] > self.activity_by_hour_of_week_busiest:
  227. self.activity_by_hour_of_week_busiest = self.activity_by_hour_of_week[day][hour]
  228. # month of year
  229. month = date.month
  230. if month in self.activity_by_month_of_year:
  231. self.activity_by_month_of_year[month] += 1
  232. else:
  233. self.activity_by_month_of_year[month] = 1
  234. # yearly/weekly activity
  235. yyw = date.strftime('%Y-%W')
  236. if yyw not in self.activity_by_year_week:
  237. self.activity_by_year_week[yyw] = 1
  238. else:
  239. self.activity_by_year_week[yyw] += 1
  240. if self.activity_by_year_week_peak < self.activity_by_year_week[yyw]:
  241. self.activity_by_year_week_peak = self.activity_by_year_week[yyw]
  242. # author stats
  243. if author not in self.authors:
  244. self.authors[author] = {}
  245. # commits
  246. if 'last_commit_stamp' not in self.authors[author]:
  247. self.authors[author]['last_commit_stamp'] = stamp
  248. self.authors[author]['first_commit_stamp'] = stamp
  249. if 'commits' in self.authors[author]:
  250. self.authors[author]['commits'] += 1
  251. else:
  252. self.authors[author]['commits'] = 1
  253. # author of the month/year
  254. yymm = date.strftime('%Y-%m')
  255. if yymm in self.author_of_month:
  256. if author in self.author_of_month[yymm]:
  257. self.author_of_month[yymm][author] += 1
  258. else:
  259. self.author_of_month[yymm][author] = 1
  260. else:
  261. self.author_of_month[yymm] = {}
  262. self.author_of_month[yymm][author] = 1
  263. if yymm in self.commits_by_month:
  264. self.commits_by_month[yymm] += 1
  265. else:
  266. self.commits_by_month[yymm] = 1
  267. yy = date.year
  268. if yy in self.author_of_year:
  269. if author in self.author_of_year[yy]:
  270. self.author_of_year[yy][author] += 1
  271. else:
  272. self.author_of_year[yy][author] = 1
  273. else:
  274. self.author_of_year[yy] = {}
  275. self.author_of_year[yy][author] = 1
  276. if yy in self.commits_by_year:
  277. self.commits_by_year[yy] += 1
  278. else:
  279. self.commits_by_year[yy] = 1
  280. # authors: active days
  281. yymmdd = date.strftime('%Y-%m-%d')
  282. if 'last_active_day' not in self.authors[author]:
  283. self.authors[author]['last_active_day'] = yymmdd
  284. self.authors[author]['active_days'] = 1
  285. elif yymmdd != self.authors[author]['last_active_day']:
  286. self.authors[author]['last_active_day'] = yymmdd
  287. self.authors[author]['active_days'] += 1
  288. # project: active days
  289. if yymmdd != self.last_active_day:
  290. self.last_active_day = yymmdd
  291. self.active_days += 1
  292. # timezone
  293. self.commits_by_timezone[timezone] = self.commits_by_timezone.get(timezone, 0) + 1
  294. # TODO Optimize this, it's the worst bottleneck
  295. # outputs "<stamp> <files>" for each revision
  296. self.files_by_stamp = {} # stamp -> files
  297. revlines = getpipeoutput(['git rev-list --pretty=format:"%at %T" HEAD', 'grep -v ^commit']).strip().split('\n')
  298. lines = []
  299. for revline in revlines:
  300. time, rev = revline.split(' ')
  301. linecount = self.getFilesInCommit(rev)
  302. lines.append('%d %d' % (int(time), linecount))
  303. self.total_commits = len(lines)
  304. for line in lines:
  305. parts = line.split(' ')
  306. if len(parts) != 2:
  307. continue
  308. (stamp, files) = parts[0:2]
  309. try:
  310. self.files_by_stamp[int(stamp)] = int(files)
  311. except ValueError:
  312. print 'Warning: failed to parse line "%s"' % line
  313. # extensions
  314. self.extensions = {} # extension -> files, lines
  315. lines = getpipeoutput(['git ls-tree -r -z HEAD']).split('\000')
  316. self.total_files = len(lines)
  317. for line in lines:
  318. if len(line) == 0:
  319. continue
  320. parts = re.split('\s+', line, 4)
  321. sha1 = parts[2]
  322. filename = parts[3]
  323. if filename.find('.') == -1 or filename.rfind('.') == 0:
  324. ext = ''
  325. else:
  326. ext = filename[(filename.rfind('.') + 1):]
  327. if len(ext) > MAX_EXT_LENGTH:
  328. ext = ''
  329. if ext not in self.extensions:
  330. self.extensions[ext] = {'files': 0, 'lines': 0}
  331. self.extensions[ext]['files'] += 1
  332. try:
  333. self.extensions[ext]['lines'] += int(getpipeoutput(['git cat-file blob %s' % sha1, 'wc -l']).split()[0])
  334. except:
  335. print 'Warning: Could not count lines for file "%s"' % line
  336. # line statistics
  337. # outputs:
  338. # N files changed, N insertions (+), N deletions(-)
  339. # <stamp> <author>
  340. self.changes_by_date = {} # stamp -> { files, ins, del }
  341. lines = getpipeoutput(['git log --shortstat --pretty=format:"%at %an"']).split('\n')
  342. lines.reverse()
  343. files = 0; inserted = 0; deleted = 0; total_lines = 0
  344. for line in lines:
  345. if len(line) == 0:
  346. continue
  347. # <stamp> <author>
  348. if line.find('files changed,') == -1:
  349. pos = line.find(' ')
  350. if pos != -1:
  351. try:
  352. (stamp, author) = (int(line[:pos]), line[pos+1:])
  353. self.changes_by_date[stamp] = { 'files': files, 'ins': inserted, 'del': deleted, 'lines': total_lines }
  354. except ValueError:
  355. print 'Warning: unexpected line "%s"' % line
  356. else:
  357. print 'Warning: unexpected line "%s"' % line
  358. else:
  359. numbers = re.findall('\d+', line)
  360. if len(numbers) == 3:
  361. (files, inserted, deleted) = map(lambda el : int(el), numbers)
  362. total_lines += inserted
  363. total_lines -= deleted
  364. else:
  365. print 'Warning: failed to handle line "%s"' % line
  366. (files, inserted, deleted) = (0, 0, 0)
  367. #self.changes_by_date[stamp] = { 'files': files, 'ins': inserted, 'del': deleted }
  368. self.total_lines = total_lines
  369. def refine(self):
  370. # authors
  371. # name -> {place_by_commits, commits_frac, date_first, date_last, timedelta}
  372. authors_by_commits = getkeyssortedbyvaluekey(self.authors, 'commits')
  373. authors_by_commits.reverse() # most first
  374. for i, name in enumerate(authors_by_commits):
  375. self.authors[name]['place_by_commits'] = i + 1
  376. for name in self.authors.keys():
  377. a = self.authors[name]
  378. a['commits_frac'] = (100 * float(a['commits'])) / self.getTotalCommits()
  379. date_first = datetime.datetime.fromtimestamp(a['first_commit_stamp'])
  380. date_last = datetime.datetime.fromtimestamp(a['last_commit_stamp'])
  381. delta = date_last - date_first
  382. a['date_first'] = date_first.strftime('%Y-%m-%d')
  383. a['date_last'] = date_last.strftime('%Y-%m-%d')
  384. a['timedelta'] = delta
  385. def getActiveDays(self):
  386. return self.active_days
  387. def getActivityByDayOfWeek(self):
  388. return self.activity_by_day_of_week
  389. def getActivityByHourOfDay(self):
  390. return self.activity_by_hour_of_day
  391. def getAuthorInfo(self, author):
  392. return self.authors[author]
  393. def getAuthors(self):
  394. return self.authors.keys()
  395. def getCommitDeltaDays(self):
  396. return (self.last_commit_stamp - self.first_commit_stamp) / 86400
  397. def getFilesInCommit(self, rev):
  398. try:
  399. res = self.cache['files_in_tree'][rev]
  400. except:
  401. res = int(getpipeoutput(['git ls-tree -r --name-only "%s"' % rev, 'wc -l']).split('\n')[0])
  402. if 'files_in_tree' not in self.cache:
  403. self.cache['files_in_tree'] = {}
  404. self.cache['files_in_tree'][rev] = res
  405. return res
  406. def getFirstCommitDate(self):
  407. return datetime.datetime.fromtimestamp(self.first_commit_stamp)
  408. def getLastCommitDate(self):
  409. return datetime.datetime.fromtimestamp(self.last_commit_stamp)
  410. def getTags(self):
  411. lines = getpipeoutput(['git show-ref --tags', 'cut -d/ -f3'])
  412. return lines.split('\n')
  413. def getTagDate(self, tag):
  414. return self.revToDate('tags/' + tag)
  415. def getTotalAuthors(self):
  416. return self.total_authors
  417. def getTotalCommits(self):
  418. return self.total_commits
  419. def getTotalFiles(self):
  420. return self.total_files
  421. def getTotalLOC(self):
  422. return self.total_lines
  423. def revToDate(self, rev):
  424. stamp = int(getpipeoutput(['git log --pretty=format:%%at "%s" -n 1' % rev]))
  425. return datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d')
  426. class ReportCreator:
  427. """Creates the actual report based on given data."""
  428. def __init__(self):
  429. pass
  430. def create(self, data, path):
  431. self.data = data
  432. self.path = path
  433. def html_linkify(text):
  434. return text.lower().replace(' ', '_')
  435. def html_header(level, text):
  436. name = html_linkify(text)
  437. return '\n<h%d><a href="#%s" name="%s">%s</a></h%d>\n\n' % (level, name, name, text, level)
  438. class HTMLReportCreator(ReportCreator):
  439. def create(self, data, path):
  440. ReportCreator.create(self, data, path)
  441. self.title = data.projectname
  442. # copy static files. Looks in the binary directory, ../share/gitstats and /usr/share/gitstats
  443. binarypath = os.path.dirname(os.path.abspath(__file__))
  444. secondarypath = os.path.join(binarypath, '..', 'share', 'gitstats')
  445. basedirs = [binarypath, secondarypath, '/usr/share/gitstats']
  446. for file in ('gitstats.css', 'sortable.js', 'arrow-up.gif', 'arrow-down.gif', 'arrow-none.gif'):
  447. for base in basedirs:
  448. src = base + '/' + file
  449. if os.path.exists(src):
  450. shutil.copyfile(src, path + '/' + file)
  451. break
  452. else:
  453. print 'Warning: "%s" not found, so not copied (searched: %s)' % (file, basedirs)
  454. f = open(path + "/index.html", 'w')
  455. format = '%Y-%m-%d %H:%M:%S'
  456. self.printHeader(f)
  457. f.write('<h1>GitStats - %s</h1>' % data.projectname)
  458. self.printNav(f)
  459. f.write('<dl>')
  460. f.write('<dt>Project name</dt><dd>%s</dd>' % (data.projectname))
  461. f.write('<dt>Generated</dt><dd>%s (in %d seconds)</dd>' % (datetime.datetime.now().strftime(format), time.time() - data.getStampCreated()))
  462. f.write('<dt>Generator</dt><dd><a href="http://gitstats.sourceforge.net/">GitStats</a> (version %s)</dd>' % getversion())
  463. f.write('<dt>Report Period</dt><dd>%s to %s (%d days, %d active days)</dd>' % (data.getFirstCommitDate().strftime(format), data.getLastCommitDate().strftime(format), data.getCommitDeltaDays(), data.getActiveDays()))
  464. f.write('<dt>Total Files</dt><dd>%s</dd>' % data.getTotalFiles())
  465. f.write('<dt>Total Lines of Code</dt><dd>%s</dd>' % data.getTotalLOC())
  466. f.write('<dt>Total Commits</dt><dd>%s</dd>' % data.getTotalCommits())
  467. f.write('<dt>Authors</dt><dd>%s</dd>' % data.getTotalAuthors())
  468. f.write('</dl>')
  469. f.write('</body>\n</html>')
  470. f.close()
  471. ###
  472. # Activity
  473. f = open(path + '/activity.html', 'w')
  474. self.printHeader(f)
  475. f.write('<h1>Activity</h1>')
  476. self.printNav(f)
  477. #f.write('<h2>Last 30 days</h2>')
  478. #f.write('<h2>Last 12 months</h2>')
  479. # Weekly activity
  480. WEEKS = 32
  481. f.write(html_header(2, 'Weekly activity'))
  482. f.write('<p>Last %d weeks</p>' % WEEKS)
  483. # generate weeks to show (previous N weeks from now)
  484. now = datetime.datetime.now()
  485. deltaweek = datetime.timedelta(7)
  486. weeks = []
  487. stampcur = now
  488. for i in range(0, WEEKS):
  489. weeks.insert(0, stampcur.strftime('%Y-%W'))
  490. stampcur -= deltaweek
  491. # top row: commits & bar
  492. f.write('<table class="noborders"><tr>')
  493. for i in range(0, WEEKS):
  494. commits = 0
  495. if weeks[i] in data.activity_by_year_week:
  496. commits = data.activity_by_year_week[weeks[i]]
  497. percentage = 0
  498. if weeks[i] in data.activity_by_year_week:
  499. percentage = float(data.activity_by_year_week[weeks[i]]) / data.activity_by_year_week_peak
  500. height = max(1, int(200 * percentage))
  501. f.write('<td style="text-align: center; vertical-align: bottom">%d<div style="display: block; background-color: red; width: 20px; height: %dpx"></div></td>' % (commits, height))
  502. # bottom row: year/week
  503. f.write('</tr><tr>')
  504. for i in range(0, WEEKS):
  505. f.write('<td>%s</td>' % (WEEKS - i))
  506. f.write('</tr></table>')
  507. # Hour of Day
  508. f.write(html_header(2, 'Hour of Day'))
  509. hour_of_day = data.getActivityByHourOfDay()
  510. f.write('<table><tr><th>Hour</th>')
  511. for i in range(0, 24):
  512. f.write('<th>%d</th>' % i)
  513. f.write('</tr>\n<tr><th>Commits</th>')
  514. fp = open(path + '/hour_of_day.dat', 'w')
  515. for i in range(0, 24):
  516. if i in hour_of_day:
  517. r = 127 + int((float(hour_of_day[i]) / data.activity_by_hour_of_day_busiest) * 128)
  518. f.write('<td style="background-color: rgb(%d, 0, 0)">%d</td>' % (r, hour_of_day[i]))
  519. fp.write('%d %d\n' % (i, hour_of_day[i]))
  520. else:
  521. f.write('<td>0</td>')
  522. fp.write('%d 0\n' % i)
  523. fp.close()
  524. f.write('</tr>\n<tr><th>%</th>')
  525. totalcommits = data.getTotalCommits()
  526. for i in range(0, 24):
  527. if i in hour_of_day:
  528. r = 127 + int((float(hour_of_day[i]) / data.activity_by_hour_of_day_busiest) * 128)
  529. f.write('<td style="background-color: rgb(%d, 0, 0)">%.2f</td>' % (r, (100.0 * hour_of_day[i]) / totalcommits))
  530. else:
  531. f.write('<td>0.00</td>')
  532. f.write('</tr></table>')
  533. f.write('<img src="hour_of_day.png" alt="Hour of Day" />')
  534. fg = open(path + '/hour_of_day.dat', 'w')
  535. for i in range(0, 24):
  536. if i in hour_of_day:
  537. fg.write('%d %d\n' % (i + 1, hour_of_day[i]))
  538. else:
  539. fg.write('%d 0\n' % (i + 1))
  540. fg.close()
  541. # Day of Week
  542. f.write(html_header(2, 'Day of Week'))
  543. day_of_week = data.getActivityByDayOfWeek()
  544. f.write('<div class="vtable"><table>')
  545. f.write('<tr><th>Day</th><th>Total (%)</th></tr>')
  546. fp = open(path + '/day_of_week.dat', 'w')
  547. for d in range(0, 7):
  548. commits = 0
  549. if d in day_of_week:
  550. commits = day_of_week[d]
  551. fp.write('%d %d\n' % (d + 1, commits))
  552. f.write('<tr>')
  553. f.write('<th>%d</th>' % (d + 1))
  554. if d in day_of_week:
  555. f.write('<td>%d (%.2f%%)</td>' % (day_of_week[d], (100.0 * day_of_week[d]) / totalcommits))
  556. else:
  557. f.write('<td>0</td>')
  558. f.write('</tr>')
  559. f.write('</table></div>')
  560. f.write('<img src="day_of_week.png" alt="Day of Week" />')
  561. fp.close()
  562. # Hour of Week
  563. f.write(html_header(2, 'Hour of Week'))
  564. f.write('<table>')
  565. f.write('<tr><th>Weekday</th>')
  566. for hour in range(0, 24):
  567. f.write('<th>%d</th>' % (hour))
  568. f.write('</tr>')
  569. for weekday in range(0, 7):
  570. f.write('<tr><th>%d</th>' % (weekday + 1))
  571. for hour in range(0, 24):
  572. try:
  573. commits = data.activity_by_hour_of_week[weekday][hour]
  574. except KeyError:
  575. commits = 0
  576. if commits != 0:
  577. f.write('<td')
  578. r = 127 + int((float(commits) / data.activity_by_hour_of_week_busiest) * 128)
  579. f.write(' style="background-color: rgb(%d, 0, 0)"' % r)
  580. f.write('>%d</td>' % commits)
  581. else:
  582. f.write('<td></td>')
  583. f.write('</tr>')
  584. f.write('</table>')
  585. # Month of Year
  586. f.write(html_header(2, 'Month of Year'))
  587. f.write('<div class="vtable"><table>')
  588. f.write('<tr><th>Month</th><th>Commits (%)</th></tr>')
  589. fp = open (path + '/month_of_year.dat', 'w')
  590. for mm in range(1, 13):
  591. commits = 0
  592. if mm in data.activity_by_month_of_year:
  593. commits = data.activity_by_month_of_year[mm]
  594. f.write('<tr><td>%d</td><td>%d (%.2f %%)</td></tr>' % (mm, commits, (100.0 * commits) / data.getTotalCommits()))
  595. fp.write('%d %d\n' % (mm, commits))
  596. fp.close()
  597. f.write('</table></div>')
  598. f.write('<img src="month_of_year.png" alt="Month of Year" />')
  599. # Commits by year/month
  600. f.write(html_header(2, 'Commits by year/month'))
  601. f.write('<div class="vtable"><table><tr><th>Month</th><th>Commits</th></tr>')
  602. for yymm in reversed(sorted(data.commits_by_month.keys())):
  603. f.write('<tr><td>%s</td><td>%d</td></tr>' % (yymm, data.commits_by_month[yymm]))
  604. f.write('</table></div>')
  605. f.write('<img src="commits_by_year_month.png" alt="Commits by year/month" />')
  606. fg = open(path + '/commits_by_year_month.dat', 'w')
  607. for yymm in sorted(data.commits_by_month.keys()):
  608. fg.write('%s %s\n' % (yymm, data.commits_by_month[yymm]))
  609. fg.close()
  610. # Commits by year
  611. f.write(html_header(2, 'Commits by Year'))
  612. f.write('<div class="vtable"><table><tr><th>Year</th><th>Commits (% of all)</th></tr>')
  613. for yy in reversed(sorted(data.commits_by_year.keys())):
  614. f.write('<tr><td>%s</td><td>%d (%.2f%%)</td></tr>' % (yy, data.commits_by_year[yy], (100.0 * data.commits_by_year[yy]) / data.getTotalCommits()))
  615. f.write('</table></div>')
  616. f.write('<img src="commits_by_year.png" alt="Commits by Year" />')
  617. fg = open(path + '/commits_by_year.dat', 'w')
  618. for yy in sorted(data.commits_by_year.keys()):
  619. fg.write('%d %d\n' % (yy, data.commits_by_year[yy]))
  620. fg.close()
  621. # Commits by timezone
  622. f.write(html_header(2, 'Commits by Timezone'))
  623. f.write('<table><tr>')
  624. f.write('<th>Timezone</th><th>Commits</th>')
  625. max_commits_on_tz = max(data.commits_by_timezone.values())
  626. for i in sorted(data.commits_by_timezone.keys(), key = lambda n : int(n)):
  627. commits = data.commits_by_timezone[i]
  628. r = 127 + int((float(commits) / max_commits_on_tz) * 128)
  629. f.write('<tr><th>%s</th><td style="background-color: rgb(%d, 0, 0)">%d</td></tr>' % (i, r, commits))
  630. f.write('</tr></table>')
  631. f.write('</body></html>')
  632. f.close()
  633. ###
  634. # Authors
  635. f = open(path + '/authors.html', 'w')
  636. self.printHeader(f)
  637. f.write('<h1>Authors</h1>')
  638. self.printNav(f)
  639. # Authors :: List of authors
  640. f.write(html_header(2, 'List of Authors'))
  641. f.write('<table class="authors sortable" id="authors">')
  642. f.write('<tr><th>Author</th><th>Commits (%)</th><th>First commit</th><th>Last commit</th><th class="unsortable">Age</th><th>Active days</th><th># by commits</th></tr>')
  643. for author in sorted(data.getAuthors()):
  644. info = data.getAuthorInfo(author)
  645. f.write('<tr><td>%s</td><td>%d (%.2f%%)</td><td>%s</td><td>%s</td><td>%s</td><td>%d</td><td>%d</td></tr>' % (author, info['commits'], info['commits_frac'], info['date_first'], info['date_last'], info['timedelta'], info['active_days'], info['place_by_commits']))
  646. f.write('</table>')
  647. # Authors :: Author of Month
  648. f.write(html_header(2, 'Author of Month'))
  649. f.write('<table class="sortable" id="aom">')
  650. f.write('<tr><th>Month</th><th>Author</th><th>Commits (%)</th><th class="unsortable">Next top 5</th></tr>')
  651. for yymm in reversed(sorted(data.author_of_month.keys())):
  652. authordict = data.author_of_month[yymm]
  653. authors = getkeyssortedbyvalues(authordict)
  654. authors.reverse()
  655. commits = data.author_of_month[yymm][authors[0]]
  656. next = ', '.join(authors[1:5])
  657. f.write('<tr><td>%s</td><td>%s</td><td>%d (%.2f%% of %d)</td><td>%s</td></tr>' % (yymm, authors[0], commits, (100.0 * commits) / data.commits_by_month[yymm], data.commits_by_month[yymm], next))
  658. f.write('</table>')
  659. f.write(html_header(2, 'Author of Year'))
  660. f.write('<table class="sortable" id="aoy"><tr><th>Year</th><th>Author</th><th>Commits (%)</th><th class="unsortable">Next top 5</th></tr>')
  661. for yy in reversed(sorted(data.author_of_year.keys())):
  662. authordict = data.author_of_year[yy]
  663. authors = getkeyssortedbyvalues(authordict)
  664. authors.reverse()
  665. commits = data.author_of_year[yy][authors[0]]
  666. next = ', '.join(authors[1:5])
  667. f.write('<tr><td>%s</td><td>%s</td><td>%d (%.2f%% of %d)</td><td>%s</td></tr>' % (yy, authors[0], commits, (100.0 * commits) / data.commits_by_year[yy], data.commits_by_year[yy], next))
  668. f.write('</table>')
  669. f.write('</body></html>')
  670. f.close()
  671. ###
  672. # Files
  673. f = open(path + '/files.html', 'w')
  674. self.printHeader(f)
  675. f.write('<h1>Files</h1>')
  676. self.printNav(f)
  677. f.write('<dl>\n')
  678. f.write('<dt>Total files</dt><dd>%d</dd>' % data.getTotalFiles())
  679. f.write('<dt>Total lines</dt><dd>%d</dd>' % data.getTotalLOC())
  680. f.write('<dt>Average file size</dt><dd>%.2f bytes</dd>' % ((100.0 * data.getTotalLOC()) / data.getTotalFiles()))
  681. f.write('</dl>\n')
  682. # Files :: File count by date
  683. f.write(html_header(2, 'File count by date'))
  684. fg = open(path + '/files_by_date.dat', 'w')
  685. for stamp in sorted(data.files_by_stamp.keys()):
  686. fg.write('%s %d\n' % (datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d'), data.files_by_stamp[stamp]))
  687. fg.close()
  688. f.write('<img src="files_by_date.png" alt="Files by Date" />')
  689. #f.write('<h2>Average file size by date</h2>')
  690. # Files :: Extensions
  691. f.write(html_header(2, 'Extensions'))
  692. f.write('<table class="sortable" id="ext"><tr><th>Extension</th><th>Files (%)</th><th>Lines (%)</th><th>Lines/file</th></tr>')
  693. for ext in sorted(data.extensions.keys()):
  694. files = data.extensions[ext]['files']
  695. lines = data.extensions[ext]['lines']
  696. f.write('<tr><td>%s</td><td>%d (%.2f%%)</td><td>%d (%.2f%%)</td><td>%d</td></tr>' % (ext, files, (100.0 * files) / data.getTotalFiles(), lines, (100.0 * lines) / data.getTotalLOC(), lines / files))
  697. f.write('</table>')
  698. f.write('</body></html>')
  699. f.close()
  700. ###
  701. # Lines
  702. f = open(path + '/lines.html', 'w')
  703. self.printHeader(f)
  704. f.write('<h1>Lines</h1>')
  705. self.printNav(f)
  706. f.write('<dl>\n')
  707. f.write('<dt>Total lines</dt><dd>%d</dd>' % data.getTotalLOC())
  708. f.write('</dl>\n')
  709. f.write(html_header(2, 'Lines of Code'))
  710. f.write('<img src="lines_of_code.png" />')
  711. fg = open(path + '/lines_of_code.dat', 'w')
  712. for stamp in sorted(data.changes_by_date.keys()):
  713. fg.write('%d %d\n' % (stamp, data.changes_by_date[stamp]['lines']))
  714. fg.close()
  715. f.write('</body></html>')
  716. f.close()
  717. ###
  718. # tags.html
  719. f = open(path + '/tags.html', 'w')
  720. self.printHeader(f)
  721. f.write('<h1>Tags</h1>')
  722. self.printNav(f)
  723. f.write('<dl>')
  724. f.write('<dt>Total tags</dt><dd>%d</dd>' % len(data.tags))
  725. if len(data.tags) > 0:
  726. f.write('<dt>Average commits per tag</dt><dd>%.2f</dd>' % (1.0 * data.getTotalCommits() / len(data.tags)))
  727. f.write('</dl>')
  728. f.write('<table>')
  729. f.write('<tr><th>Name</th><th>Date</th><th>Commits</th><th>Authors</th></tr>')
  730. # sort the tags by date desc
  731. tags_sorted_by_date_desc = map(lambda el : el[1], reversed(sorted(map(lambda el : (el[1]['date'], el[0]), data.tags.items()))))
  732. for tag in tags_sorted_by_date_desc:
  733. authorinfo = []
  734. authors_by_commits = getkeyssortedbyvalues(data.tags[tag]['authors'])
  735. for i in reversed(authors_by_commits):
  736. authorinfo.append('%s (%d)' % (i, data.tags[tag]['authors'][i]))
  737. f.write('<tr><td>%s</td><td>%s</td><td>%d</td><td>%s</td></tr>' % (tag, data.tags[tag]['date'], data.tags[tag]['commits'], ', '.join(authorinfo)))
  738. f.write('</table>')
  739. f.write('</body></html>')
  740. f.close()
  741. self.createGraphs(path)
  742. def createGraphs(self, path):
  743. print 'Generating graphs...'
  744. # hour of day
  745. f = open(path + '/hour_of_day.plot', 'w')
  746. f.write(GNUPLOT_COMMON)
  747. f.write(
  748. """
  749. set output 'hour_of_day.png'
  750. unset key
  751. set xrange [0.5:24.5]
  752. set xtics 4
  753. set ylabel "Commits"
  754. plot 'hour_of_day.dat' using 1:2:(0.5) w boxes fs solid
  755. """)
  756. f.close()
  757. # day of week
  758. f = open(path + '/day_of_week.plot', 'w')
  759. f.write(GNUPLOT_COMMON)
  760. f.write(
  761. """
  762. set output 'day_of_week.png'
  763. unset key
  764. set xrange [0.5:7.5]
  765. set xtics 1
  766. set ylabel "Commits"
  767. plot 'day_of_week.dat' using 1:2:(0.5) w boxes fs solid
  768. """)
  769. f.close()
  770. # Month of Year
  771. f = open(path + '/month_of_year.plot', 'w')
  772. f.write(GNUPLOT_COMMON)
  773. f.write(
  774. """
  775. set output 'month_of_year.png'
  776. unset key
  777. set xrange [0.5:12.5]
  778. set xtics 1
  779. set ylabel "Commits"
  780. plot 'month_of_year.dat' using 1:2:(0.5) w boxes fs solid
  781. """)
  782. f.close()
  783. # commits_by_year_month
  784. f = open(path + '/commits_by_year_month.plot', 'w')
  785. f.write(GNUPLOT_COMMON)
  786. f.write(
  787. """
  788. set output 'commits_by_year_month.png'
  789. unset key
  790. set xdata time
  791. set timefmt "%Y-%m"
  792. set format x "%Y-%m"
  793. set xtics rotate by 90 15768000
  794. set bmargin 5
  795. set ylabel "Commits"
  796. plot 'commits_by_year_month.dat' using 1:2:(0.5) w boxes fs solid
  797. """)
  798. f.close()
  799. # commits_by_year
  800. f = open(path + '/commits_by_year.plot', 'w')
  801. f.write(GNUPLOT_COMMON)
  802. f.write(
  803. """
  804. set output 'commits_by_year.png'
  805. unset key
  806. set xtics 1
  807. set ylabel "Commits"
  808. set yrange [0:]
  809. plot 'commits_by_year.dat' using 1:2:(0.5) w boxes fs solid
  810. """)
  811. f.close()
  812. # Files by date
  813. f = open(path + '/files_by_date.plot', 'w')
  814. f.write(GNUPLOT_COMMON)
  815. f.write(
  816. """
  817. set output 'files_by_date.png'
  818. unset key
  819. set xdata time
  820. set timefmt "%Y-%m-%d"
  821. set format x "%Y-%m-%d"
  822. set ylabel "Files"
  823. set xtics rotate by 90
  824. set ytics 1
  825. set bmargin 6
  826. plot 'files_by_date.dat' using 1:2 w steps
  827. """)
  828. f.close()
  829. # Lines of Code
  830. f = open(path + '/lines_of_code.plot', 'w')
  831. f.write(GNUPLOT_COMMON)
  832. f.write(
  833. """
  834. set output 'lines_of_code.png'
  835. unset key
  836. set xdata time
  837. set timefmt "%s"
  838. set format x "%Y-%m-%d"
  839. set ylabel "Lines"
  840. set xtics rotate by 90
  841. set bmargin 6
  842. plot 'lines_of_code.dat' using 1:2 w lines
  843. """)
  844. f.close()
  845. os.chdir(path)
  846. files = glob.glob(path + '/*.plot')
  847. for f in files:
  848. out = getpipeoutput([gnuplot_cmd + ' "%s"' % f])
  849. if len(out) > 0:
  850. print out
  851. def printHeader(self, f, title = ''):
  852. f.write(
  853. """<?xml version="1.0" encoding="UTF-8"?>
  854. <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
  855. <html xmlns="http://www.w3.org/1999/xhtml">
  856. <head>
  857. <title>GitStats - %s</title>
  858. <link rel="stylesheet" href="gitstats.css" type="text/css" />
  859. <meta name="generator" content="GitStats %s" />
  860. <script type="text/javascript" src="sortable.js"></script>
  861. </head>
  862. <body>
  863. """ % (self.title, getversion()))
  864. def printNav(self, f):
  865. f.write("""
  866. <div class="nav">
  867. <ul>
  868. <li><a href="index.html">General</a></li>
  869. <li><a href="activity.html">Activity</a></li>
  870. <li><a href="authors.html">Authors</a></li>
  871. <li><a href="files.html">Files</a></li>
  872. <li><a href="lines.html">Lines</a></li>
  873. <li><a href="tags.html">Tags</a></li>
  874. </ul>
  875. </div>
  876. """)
  877. usage = """
  878. Usage: gitstats [options] <gitpath> <outputpath>
  879. Options:
  880. """
  881. if len(sys.argv) < 3:
  882. print usage
  883. sys.exit(0)
  884. gitpath = sys.argv[1]
  885. outputpath = os.path.abspath(sys.argv[2])
  886. rundir = os.getcwd()
  887. try:
  888. os.makedirs(outputpath)
  889. except OSError:
  890. pass
  891. if not os.path.isdir(outputpath):
  892. print 'FATAL: Output path is not a directory or does not exist'
  893. sys.exit(1)
  894. print 'Git path: %s' % gitpath
  895. print 'Output path: %s' % outputpath
  896. os.chdir(gitpath)
  897. cachefile = os.path.join(outputpath, 'gitstats.cache')
  898. print 'Collecting data...'
  899. data = GitDataCollector()
  900. data.loadCache(cachefile)
  901. data.collect(gitpath)
  902. print 'Refining data...'
  903. data.saveCache(cachefile)
  904. data.refine()
  905. os.chdir(rundir)
  906. print 'Generating report...'
  907. report = HTMLReportCreator()
  908. report.create(data, outputpath)
  909. time_end = time.time()
  910. exectime_internal = time_end - time_start
  911. print 'Execution time %.5f secs, %.5f secs (%.2f %%) in external commands)' % (exectime_internal, exectime_external, (100.0 * exectime_external) / exectime_internal)