12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046
  1. #!/usr/bin/env python
  2. # Copyright (c) 2007-2009 Heikki Hokkanen <hoxu@users.sf.net> & others (see doc/author.txt)
  3. # GPLv2 / GPLv3
  4. import datetime
  5. import glob
  6. import os
  7. import pickle
  8. import platform
  9. import re
  10. import shutil
  11. import subprocess
  12. import sys
  13. import time
  14. import zlib
  15. GNUPLOT_COMMON = 'set terminal png transparent\nset size 0.5,0.5\n'
  16. MAX_EXT_LENGTH = 10 # maximum file extension length
  17. ON_LINUX = (platform.system() == 'Linux')
  18. exectime_internal = 0.0
  19. exectime_external = 0.0
  20. time_start = time.time()
  21. # By default, gnuplot is searched from path, but can be overridden with the
  22. # environment variable "GNUPLOT"
  23. gnuplot_cmd = 'gnuplot'
  24. if 'GNUPLOT' in os.environ:
  25. gnuplot_cmd = os.environ['GNUPLOT']
  26. def getpipeoutput(cmds, quiet = False):
  27. global exectime_external
  28. start = time.time()
  29. if not quiet and ON_LINUX and os.isatty(1):
  30. print '>> ' + ' | '.join(cmds),
  31. sys.stdout.flush()
  32. p0 = subprocess.Popen(cmds[0], stdout = subprocess.PIPE, shell = True)
  33. p = p0
  34. for x in cmds[1:]:
  35. p = subprocess.Popen(x, stdin = p0.stdout, stdout = subprocess.PIPE, shell = True)
  36. p0 = p
  37. output = p.communicate()[0]
  38. end = time.time()
  39. if not quiet:
  40. if ON_LINUX and os.isatty(1):
  41. print '\r',
  42. print '[%.5f] >> %s' % (end - start, ' | '.join(cmds))
  43. exectime_external += (end - start)
  44. return output.rstrip('\n')
  45. def getkeyssortedbyvalues(dict):
  46. return map(lambda el : el[1], sorted(map(lambda el : (el[1], el[0]), dict.items())))
  47. # dict['author'] = { 'commits': 512 } - ...key(dict, 'commits')
  48. def getkeyssortedbyvaluekey(d, key):
  49. return map(lambda el : el[1], sorted(map(lambda el : (d[el][key], el), d.keys())))
  50. VERSION = 0
  51. def getversion():
  52. global VERSION
  53. if VERSION == 0:
  54. VERSION = getpipeoutput(["git rev-parse --short HEAD"]).split('\n')[0]
  55. return VERSION
  56. class DataCollector:
  57. """Manages data collection from a revision control repository."""
  58. def __init__(self):
  59. self.stamp_created = time.time()
  60. self.cache = {}
  61. ##
  62. # This should be the main function to extract data from the repository.
  63. def collect(self, dir):
  64. self.dir = dir
  65. self.projectname = os.path.basename(os.path.abspath(dir))
  66. ##
  67. # Load cacheable data
  68. def loadCache(self, cachefile):
  69. if not os.path.exists(cachefile):
  70. return
  71. print 'Loading cache...'
  72. f = open(cachefile)
  73. try:
  74. self.cache = pickle.loads(zlib.decompress(f.read()))
  75. except:
  76. # temporary hack to upgrade non-compressed caches
  77. f.seek(0)
  78. self.cache = pickle.load(f)
  79. f.close()
  80. ##
  81. # Produce any additional statistics from the extracted data.
  82. def refine(self):
  83. pass
  84. ##
  85. # : get a dictionary of author
  86. def getAuthorInfo(self, author):
  87. return None
  88. def getActivityByDayOfWeek(self):
  89. return {}
  90. def getActivityByHourOfDay(self):
  91. return {}
  92. ##
  93. # Get a list of authors
  94. def getAuthors(self):
  95. return []
  96. def getFirstCommitDate(self):
  97. return datetime.datetime.now()
  98. def getLastCommitDate(self):
  99. return datetime.datetime.now()
  100. def getStampCreated(self):
  101. return self.stamp_created
  102. def getTags(self):
  103. return []
  104. def getTotalAuthors(self):
  105. return -1
  106. def getTotalCommits(self):
  107. return -1
  108. def getTotalFiles(self):
  109. return -1
  110. def getTotalLOC(self):
  111. return -1
  112. ##
  113. # Save cacheable data
  114. def saveCache(self, filename):
  115. print 'Saving cache...'
  116. f = open(cachefile, 'w')
  117. #pickle.dump(self.cache, f)
  118. data = zlib.compress(pickle.dumps(self.cache))
  119. f.write(data)
  120. f.close()
  121. class GitDataCollector(DataCollector):
  122. def collect(self, dir):
  123. DataCollector.collect(self, dir)
  124. try:
  125. self.total_authors = int(getpipeoutput(['git log', 'git shortlog -s', 'wc -l']))
  126. except:
  127. self.total_authors = 0
  128. #self.total_lines = int(getoutput('git-ls-files -z |xargs -0 cat |wc -l'))
  129. self.activity_by_hour_of_day = {} # hour -> commits
  130. self.activity_by_day_of_week = {} # day -> commits
  131. self.activity_by_month_of_year = {} # month [1-12] -> commits
  132. self.activity_by_hour_of_week = {} # weekday -> hour -> commits
  133. self.activity_by_hour_of_day_busiest = 0
  134. self.activity_by_hour_of_week_busiest = 0
  135. self.activity_by_year_week = {} # yy_wNN -> commits
  136. self.activity_by_year_week_peak = 0
  137. self.authors = {} # name -> {commits, first_commit_stamp, last_commit_stamp, last_active_day, active_days, lines_added, lines_removed}
  138. # author of the month
  139. self.author_of_month = {} # month -> author -> commits
  140. self.author_of_year = {} # year -> author -> commits
  141. self.commits_by_month = {} # month -> commits
  142. self.commits_by_year = {} # year -> commits
  143. self.first_commit_stamp = 0
  144. self.last_commit_stamp = 0
  145. self.last_active_day = None
  146. self.active_days = set()
  147. # lines
  148. self.total_lines = 0
  149. self.total_lines_added = 0
  150. self.total_lines_removed = 0
  151. # timezone
  152. self.commits_by_timezone = {} # timezone -> commits
  153. # tags
  154. self.tags = {}
  155. lines = getpipeoutput(['git show-ref --tags']).split('\n')
  156. for line in lines:
  157. if len(line) == 0:
  158. continue
  159. (hash, tag) = line.split(' ')
  160. tag = tag.replace('refs/tags/', '')
  161. output = getpipeoutput(['git log "%s" --pretty=format:"%%at %%an" -n 1' % hash])
  162. if len(output) > 0:
  163. parts = output.split(' ')
  164. stamp = 0
  165. try:
  166. stamp = int(parts[0])
  167. except ValueError:
  168. stamp = 0
  169. self.tags[tag] = { 'stamp': stamp, 'hash' : hash, 'date' : datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d'), 'commits': 0, 'authors': {} }
  170. # collect info on tags, starting from latest
  171. tags_sorted_by_date_desc = map(lambda el : el[1], reversed(sorted(map(lambda el : (el[1]['date'], el[0]), data.tags.items()))))
  172. prev = None
  173. for tag in reversed(tags_sorted_by_date_desc):
  174. cmd = 'git shortlog -s "%s"' % tag
  175. if prev != None:
  176. cmd += ' "^%s"' % prev
  177. output = getpipeoutput([cmd])
  178. if len(output) == 0:
  179. continue
  180. prev = tag
  181. for line in output.split('\n'):
  182. parts = re.split('\s+', line, 2)
  183. commits = int(parts[1])
  184. author = parts[2]
  185. self.tags[tag]['commits'] += commits
  186. self.tags[tag]['authors'][author] = commits
  187. # Collect revision statistics
  188. # Outputs "<stamp> <author>"
  189. lines = getpipeoutput(['git rev-list --pretty=format:"%at %ai %an" HEAD', 'grep -v ^commit']).split('\n')
  190. for line in lines:
  191. parts = line.split(' ')
  192. author = ''
  193. try:
  194. stamp = int(parts[0])
  195. except ValueError:
  196. stamp = 0
  197. timezone = parts[3]
  198. if len(parts) > 4:
  199. author = ' '.join(parts[4:])
  200. date = datetime.datetime.fromtimestamp(float(stamp))
  201. # First and last commit stamp
  202. if self.last_commit_stamp == 0:
  203. self.last_commit_stamp = stamp
  204. self.first_commit_stamp = stamp
  205. # activity
  206. # hour
  207. hour = date.hour
  208. self.activity_by_hour_of_day[hour] = self.activity_by_hour_of_day.get(hour, 0) + 1
  209. # most active hour?
  210. if self.activity_by_hour_of_day[hour] > self.activity_by_hour_of_day_busiest:
  211. self.activity_by_hour_of_day_busiest = self.activity_by_hour_of_day[hour]
  212. # day of week
  213. day = date.weekday()
  214. self.activity_by_day_of_week[day] = self.activity_by_day_of_week.get(day, 0) + 1
  215. # hour of week
  216. if day not in self.activity_by_hour_of_week:
  217. self.activity_by_hour_of_week[day] = {}
  218. self.activity_by_hour_of_week[day][hour] = self.activity_by_hour_of_week[day].get(hour, 0) + 1
  219. # most active hour?
  220. if self.activity_by_hour_of_week[day][hour] > self.activity_by_hour_of_week_busiest:
  221. self.activity_by_hour_of_week_busiest = self.activity_by_hour_of_week[day][hour]
  222. # month of year
  223. month = date.month
  224. self.activity_by_month_of_year[month] = self.activity_by_month_of_year.get(month, 0) + 1
  225. # yearly/weekly activity
  226. yyw = date.strftime('%Y-%W')
  227. self.activity_by_year_week[yyw] = self.activity_by_year_week.get(yyw, 0) + 1
  228. if self.activity_by_year_week_peak < self.activity_by_year_week[yyw]:
  229. self.activity_by_year_week_peak = self.activity_by_year_week[yyw]
  230. # author stats
  231. if author not in self.authors:
  232. self.authors[author] = {}
  233. # commits
  234. if 'last_commit_stamp' not in self.authors[author]:
  235. self.authors[author]['last_commit_stamp'] = stamp
  236. self.authors[author]['first_commit_stamp'] = stamp
  237. self.authors[author]['commits'] = self.authors[author].get('commits', 0) + 1
  238. # author of the month/year
  239. yymm = date.strftime('%Y-%m')
  240. if yymm in self.author_of_month:
  241. self.author_of_month[yymm][author] = self.author_of_month[yymm].get(author, 0) + 1
  242. else:
  243. self.author_of_month[yymm] = {}
  244. self.author_of_month[yymm][author] = 1
  245. self.commits_by_month[yymm] = self.commits_by_month.get(yymm, 0) + 1
  246. yy = date.year
  247. if yy in self.author_of_year:
  248. self.author_of_year[yy][author] = self.author_of_year[yy].get(author, 0) + 1
  249. else:
  250. self.author_of_year[yy] = {}
  251. self.author_of_year[yy][author] = 1
  252. self.commits_by_year[yy] = self.commits_by_year.get(yy, 0) + 1
  253. # authors: active days
  254. yymmdd = date.strftime('%Y-%m-%d')
  255. if 'last_active_day' not in self.authors[author]:
  256. self.authors[author]['last_active_day'] = yymmdd
  257. self.authors[author]['active_days'] = 1
  258. elif yymmdd != self.authors[author]['last_active_day']:
  259. self.authors[author]['last_active_day'] = yymmdd
  260. self.authors[author]['active_days'] += 1
  261. # project: active days
  262. if yymmdd != self.last_active_day:
  263. self.last_active_day = yymmdd
  264. self.active_days.add(yymmdd)
  265. # timezone
  266. self.commits_by_timezone[timezone] = self.commits_by_timezone.get(timezone, 0) + 1
  267. # TODO Optimize this, it's the worst bottleneck
  268. # outputs "<stamp> <files>" for each revision
  269. self.files_by_stamp = {} # stamp -> files
  270. revlines = getpipeoutput(['git rev-list --pretty=format:"%at %T" HEAD', 'grep -v ^commit']).strip().split('\n')
  271. lines = []
  272. for revline in revlines:
  273. time, rev = revline.split(' ')
  274. linecount = self.getFilesInCommit(rev)
  275. lines.append('%d %d' % (int(time), linecount))
  276. self.total_commits = len(lines)
  277. for line in lines:
  278. parts = line.split(' ')
  279. if len(parts) != 2:
  280. continue
  281. (stamp, files) = parts[0:2]
  282. try:
  283. self.files_by_stamp[int(stamp)] = int(files)
  284. except ValueError:
  285. print 'Warning: failed to parse line "%s"' % line
  286. # extensions
  287. self.extensions = {} # extension -> files, lines
  288. lines = getpipeoutput(['git ls-tree -r -z HEAD']).split('\000')
  289. self.total_files = len(lines)
  290. for line in lines:
  291. if len(line) == 0:
  292. continue
  293. parts = re.split('\s+', line, 4)
  294. sha1 = parts[2]
  295. filename = parts[3]
  296. if filename.find('.') == -1 or filename.rfind('.') == 0:
  297. ext = ''
  298. else:
  299. ext = filename[(filename.rfind('.') + 1):]
  300. if len(ext) > MAX_EXT_LENGTH:
  301. ext = ''
  302. if ext not in self.extensions:
  303. self.extensions[ext] = {'files': 0, 'lines': 0}
  304. self.extensions[ext]['files'] += 1
  305. try:
  306. self.extensions[ext]['lines'] += int(getpipeoutput(['git cat-file blob %s' % sha1, 'wc -l']).split()[0])
  307. except:
  308. print 'Warning: Could not count lines for file "%s"' % line
  309. # line statistics
  310. # outputs:
  311. # N files changed, N insertions (+), N deletions(-)
  312. # <stamp> <author>
  313. self.changes_by_date = {} # stamp -> { files, ins, del }
  314. lines = getpipeoutput(['git log --shortstat --pretty=format:"%at %an"']).split('\n')
  315. lines.reverse()
  316. files = 0; inserted = 0; deleted = 0; total_lines = 0
  317. author = None
  318. for line in lines:
  319. if len(line) == 0:
  320. continue
  321. # <stamp> <author>
  322. if line.find('files changed,') == -1:
  323. pos = line.find(' ')
  324. if pos != -1:
  325. try:
  326. (stamp, author) = (int(line[:pos]), line[pos+1:])
  327. self.changes_by_date[stamp] = { 'files': files, 'ins': inserted, 'del': deleted, 'lines': total_lines }
  328. if author not in self.authors:
  329. self.authors[author] = { 'lines_added' : 0, 'lines_removed' : 0 }
  330. self.authors[author]['lines_added'] = self.authors[author].get('lines_added', 0) + inserted
  331. self.authors[author]['lines_removed'] = self.authors[author].get('lines_removed', 0) + deleted
  332. except ValueError:
  333. print 'Warning: unexpected line "%s"' % line
  334. else:
  335. print 'Warning: unexpected line "%s"' % line
  336. else:
  337. numbers = re.findall('\d+', line)
  338. if len(numbers) == 3:
  339. (files, inserted, deleted) = map(lambda el : int(el), numbers)
  340. total_lines += inserted
  341. total_lines -= deleted
  342. self.total_lines_added += inserted
  343. self.total_lines_removed += deleted
  344. else:
  345. print 'Warning: failed to handle line "%s"' % line
  346. (files, inserted, deleted) = (0, 0, 0)
  347. #self.changes_by_date[stamp] = { 'files': files, 'ins': inserted, 'del': deleted }
  348. self.total_lines = total_lines
  349. def refine(self):
  350. # authors
  351. # name -> {place_by_commits, commits_frac, date_first, date_last, timedelta}
  352. authors_by_commits = getkeyssortedbyvaluekey(self.authors, 'commits')
  353. authors_by_commits.reverse() # most first
  354. for i, name in enumerate(authors_by_commits):
  355. self.authors[name]['place_by_commits'] = i + 1
  356. for name in self.authors.keys():
  357. a = self.authors[name]
  358. a['commits_frac'] = (100 * float(a['commits'])) / self.getTotalCommits()
  359. date_first = datetime.datetime.fromtimestamp(a['first_commit_stamp'])
  360. date_last = datetime.datetime.fromtimestamp(a['last_commit_stamp'])
  361. delta = date_last - date_first
  362. a['date_first'] = date_first.strftime('%Y-%m-%d')
  363. a['date_last'] = date_last.strftime('%Y-%m-%d')
  364. a['timedelta'] = delta
  365. def getActiveDays(self):
  366. return self.active_days
  367. def getActivityByDayOfWeek(self):
  368. return self.activity_by_day_of_week
  369. def getActivityByHourOfDay(self):
  370. return self.activity_by_hour_of_day
  371. def getAuthorInfo(self, author):
  372. return self.authors[author]
  373. def getAuthors(self):
  374. return self.authors.keys()
  375. def getCommitDeltaDays(self):
  376. return (self.last_commit_stamp - self.first_commit_stamp) / 86400
  377. def getFilesInCommit(self, rev):
  378. try:
  379. res = self.cache['files_in_tree'][rev]
  380. except:
  381. res = int(getpipeoutput(['git ls-tree -r --name-only "%s"' % rev, 'wc -l']).split('\n')[0])
  382. if 'files_in_tree' not in self.cache:
  383. self.cache['files_in_tree'] = {}
  384. self.cache['files_in_tree'][rev] = res
  385. return res
  386. def getFirstCommitDate(self):
  387. return datetime.datetime.fromtimestamp(self.first_commit_stamp)
  388. def getLastCommitDate(self):
  389. return datetime.datetime.fromtimestamp(self.last_commit_stamp)
  390. def getTags(self):
  391. lines = getpipeoutput(['git show-ref --tags', 'cut -d/ -f3'])
  392. return lines.split('\n')
  393. def getTagDate(self, tag):
  394. return self.revToDate('tags/' + tag)
  395. def getTotalAuthors(self):
  396. return self.total_authors
  397. def getTotalCommits(self):
  398. return self.total_commits
  399. def getTotalFiles(self):
  400. return self.total_files
  401. def getTotalLOC(self):
  402. return self.total_lines
  403. def revToDate(self, rev):
  404. stamp = int(getpipeoutput(['git log --pretty=format:%%at "%s" -n 1' % rev]))
  405. return datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d')
  406. class ReportCreator:
  407. """Creates the actual report based on given data."""
  408. def __init__(self):
  409. pass
  410. def create(self, data, path):
  411. self.data = data
  412. self.path = path
  413. def html_linkify(text):
  414. return text.lower().replace(' ', '_')
  415. def html_header(level, text):
  416. name = html_linkify(text)
  417. return '\n<h%d><a href="#%s" name="%s">%s</a></h%d>\n\n' % (level, name, name, text, level)
  418. class HTMLReportCreator(ReportCreator):
  419. def create(self, data, path):
  420. ReportCreator.create(self, data, path)
  421. self.title = data.projectname
  422. # copy static files. Looks in the binary directory, ../share/gitstats and /usr/share/gitstats
  423. binarypath = os.path.dirname(os.path.abspath(__file__))
  424. secondarypath = os.path.join(binarypath, '..', 'share', 'gitstats')
  425. basedirs = [binarypath, secondarypath, '/usr/share/gitstats']
  426. for file in ('gitstats.css', 'sortable.js', 'arrow-up.gif', 'arrow-down.gif', 'arrow-none.gif'):
  427. for base in basedirs:
  428. src = base + '/' + file
  429. if os.path.exists(src):
  430. shutil.copyfile(src, path + '/' + file)
  431. break
  432. else:
  433. print 'Warning: "%s" not found, so not copied (searched: %s)' % (file, basedirs)
  434. f = open(path + "/index.html", 'w')
  435. format = '%Y-%m-%d %H:%M:%S'
  436. self.printHeader(f)
  437. f.write('<h1>GitStats - %s</h1>' % data.projectname)
  438. self.printNav(f)
  439. f.write('<dl>')
  440. f.write('<dt>Project name</dt><dd>%s</dd>' % (data.projectname))
  441. f.write('<dt>Generated</dt><dd>%s (in %d seconds)</dd>' % (datetime.datetime.now().strftime(format), time.time() - data.getStampCreated()))
  442. f.write('<dt>Generator</dt><dd><a href="http://gitstats.sourceforge.net/">GitStats</a> (version %s)</dd>' % getversion())
  443. f.write('<dt>Report Period</dt><dd>%s to %s</dd>' % (data.getFirstCommitDate().strftime(format), data.getLastCommitDate().strftime(format)))
  444. f.write('<dt>Age</dt><dd>%d days, %d active days (%3.2f%%)</dd>' % (data.getCommitDeltaDays(), len(data.getActiveDays()), (100.0 * len(data.getActiveDays()) / data.getCommitDeltaDays())))
  445. f.write('<dt>Total Files</dt><dd>%s</dd>' % data.getTotalFiles())
  446. f.write('<dt>Total Lines of Code</dt><dd>%s (%d added, %d removed)</dd>' % (data.getTotalLOC(), data.total_lines_added, data.total_lines_removed))
  447. f.write('<dt>Total Commits</dt><dd>%s (average %.1f commits per active day, %.1f per all days)</dd>' % (data.getTotalCommits(), float(data.getTotalCommits()) / len(data.getActiveDays()), float(data.getTotalCommits()) / data.getCommitDeltaDays()))
  448. f.write('<dt>Authors</dt><dd>%s</dd>' % data.getTotalAuthors())
  449. f.write('</dl>')
  450. f.write('</body>\n</html>')
  451. f.close()
  452. ###
  453. # Activity
  454. f = open(path + '/activity.html', 'w')
  455. self.printHeader(f)
  456. f.write('<h1>Activity</h1>')
  457. self.printNav(f)
  458. #f.write('<h2>Last 30 days</h2>')
  459. #f.write('<h2>Last 12 months</h2>')
  460. # Weekly activity
  461. WEEKS = 32
  462. f.write(html_header(2, 'Weekly activity'))
  463. f.write('<p>Last %d weeks</p>' % WEEKS)
  464. # generate weeks to show (previous N weeks from now)
  465. now = datetime.datetime.now()
  466. deltaweek = datetime.timedelta(7)
  467. weeks = []
  468. stampcur = now
  469. for i in range(0, WEEKS):
  470. weeks.insert(0, stampcur.strftime('%Y-%W'))
  471. stampcur -= deltaweek
  472. # top row: commits & bar
  473. f.write('<table class="noborders"><tr>')
  474. for i in range(0, WEEKS):
  475. commits = 0
  476. if weeks[i] in data.activity_by_year_week:
  477. commits = data.activity_by_year_week[weeks[i]]
  478. percentage = 0
  479. if weeks[i] in data.activity_by_year_week:
  480. percentage = float(data.activity_by_year_week[weeks[i]]) / data.activity_by_year_week_peak
  481. height = max(1, int(200 * percentage))
  482. f.write('<td style="text-align: center; vertical-align: bottom">%d<div style="display: block; background-color: red; width: 20px; height: %dpx"></div></td>' % (commits, height))
  483. # bottom row: year/week
  484. f.write('</tr><tr>')
  485. for i in range(0, WEEKS):
  486. f.write('<td>%s</td>' % (WEEKS - i))
  487. f.write('</tr></table>')
  488. # Hour of Day
  489. f.write(html_header(2, 'Hour of Day'))
  490. hour_of_day = data.getActivityByHourOfDay()
  491. f.write('<table><tr><th>Hour</th>')
  492. for i in range(0, 24):
  493. f.write('<th>%d</th>' % i)
  494. f.write('</tr>\n<tr><th>Commits</th>')
  495. fp = open(path + '/hour_of_day.dat', 'w')
  496. for i in range(0, 24):
  497. if i in hour_of_day:
  498. r = 127 + int((float(hour_of_day[i]) / data.activity_by_hour_of_day_busiest) * 128)
  499. f.write('<td style="background-color: rgb(%d, 0, 0)">%d</td>' % (r, hour_of_day[i]))
  500. fp.write('%d %d\n' % (i, hour_of_day[i]))
  501. else:
  502. f.write('<td>0</td>')
  503. fp.write('%d 0\n' % i)
  504. fp.close()
  505. f.write('</tr>\n<tr><th>%</th>')
  506. totalcommits = data.getTotalCommits()
  507. for i in range(0, 24):
  508. if i in hour_of_day:
  509. r = 127 + int((float(hour_of_day[i]) / data.activity_by_hour_of_day_busiest) * 128)
  510. f.write('<td style="background-color: rgb(%d, 0, 0)">%.2f</td>' % (r, (100.0 * hour_of_day[i]) / totalcommits))
  511. else:
  512. f.write('<td>0.00</td>')
  513. f.write('</tr></table>')
  514. f.write('<img src="hour_of_day.png" alt="Hour of Day" />')
  515. fg = open(path + '/hour_of_day.dat', 'w')
  516. for i in range(0, 24):
  517. if i in hour_of_day:
  518. fg.write('%d %d\n' % (i + 1, hour_of_day[i]))
  519. else:
  520. fg.write('%d 0\n' % (i + 1))
  521. fg.close()
  522. # Day of Week
  523. f.write(html_header(2, 'Day of Week'))
  524. day_of_week = data.getActivityByDayOfWeek()
  525. f.write('<div class="vtable"><table>')
  526. f.write('<tr><th>Day</th><th>Total (%)</th></tr>')
  527. fp = open(path + '/day_of_week.dat', 'w')
  528. for d in range(0, 7):
  529. commits = 0
  530. if d in day_of_week:
  531. commits = day_of_week[d]
  532. fp.write('%d %d\n' % (d + 1, commits))
  533. f.write('<tr>')
  534. f.write('<th>%d</th>' % (d + 1))
  535. if d in day_of_week:
  536. f.write('<td>%d (%.2f%%)</td>' % (day_of_week[d], (100.0 * day_of_week[d]) / totalcommits))
  537. else:
  538. f.write('<td>0</td>')
  539. f.write('</tr>')
  540. f.write('</table></div>')
  541. f.write('<img src="day_of_week.png" alt="Day of Week" />')
  542. fp.close()
  543. # Hour of Week
  544. f.write(html_header(2, 'Hour of Week'))
  545. f.write('<table>')
  546. f.write('<tr><th>Weekday</th>')
  547. for hour in range(0, 24):
  548. f.write('<th>%d</th>' % (hour))
  549. f.write('</tr>')
  550. for weekday in range(0, 7):
  551. f.write('<tr><th>%d</th>' % (weekday + 1))
  552. for hour in range(0, 24):
  553. try:
  554. commits = data.activity_by_hour_of_week[weekday][hour]
  555. except KeyError:
  556. commits = 0
  557. if commits != 0:
  558. f.write('<td')
  559. r = 127 + int((float(commits) / data.activity_by_hour_of_week_busiest) * 128)
  560. f.write(' style="background-color: rgb(%d, 0, 0)"' % r)
  561. f.write('>%d</td>' % commits)
  562. else:
  563. f.write('<td></td>')
  564. f.write('</tr>')
  565. f.write('</table>')
  566. # Month of Year
  567. f.write(html_header(2, 'Month of Year'))
  568. f.write('<div class="vtable"><table>')
  569. f.write('<tr><th>Month</th><th>Commits (%)</th></tr>')
  570. fp = open (path + '/month_of_year.dat', 'w')
  571. for mm in range(1, 13):
  572. commits = 0
  573. if mm in data.activity_by_month_of_year:
  574. commits = data.activity_by_month_of_year[mm]
  575. f.write('<tr><td>%d</td><td>%d (%.2f %%)</td></tr>' % (mm, commits, (100.0 * commits) / data.getTotalCommits()))
  576. fp.write('%d %d\n' % (mm, commits))
  577. fp.close()
  578. f.write('</table></div>')
  579. f.write('<img src="month_of_year.png" alt="Month of Year" />')
  580. # Commits by year/month
  581. f.write(html_header(2, 'Commits by year/month'))
  582. f.write('<div class="vtable"><table><tr><th>Month</th><th>Commits</th></tr>')
  583. for yymm in reversed(sorted(data.commits_by_month.keys())):
  584. f.write('<tr><td>%s</td><td>%d</td></tr>' % (yymm, data.commits_by_month[yymm]))
  585. f.write('</table></div>')
  586. f.write('<img src="commits_by_year_month.png" alt="Commits by year/month" />')
  587. fg = open(path + '/commits_by_year_month.dat', 'w')
  588. for yymm in sorted(data.commits_by_month.keys()):
  589. fg.write('%s %s\n' % (yymm, data.commits_by_month[yymm]))
  590. fg.close()
  591. # Commits by year
  592. f.write(html_header(2, 'Commits by Year'))
  593. f.write('<div class="vtable"><table><tr><th>Year</th><th>Commits (% of all)</th></tr>')
  594. for yy in reversed(sorted(data.commits_by_year.keys())):
  595. f.write('<tr><td>%s</td><td>%d (%.2f%%)</td></tr>' % (yy, data.commits_by_year[yy], (100.0 * data.commits_by_year[yy]) / data.getTotalCommits()))
  596. f.write('</table></div>')
  597. f.write('<img src="commits_by_year.png" alt="Commits by Year" />')
  598. fg = open(path + '/commits_by_year.dat', 'w')
  599. for yy in sorted(data.commits_by_year.keys()):
  600. fg.write('%d %d\n' % (yy, data.commits_by_year[yy]))
  601. fg.close()
  602. # Commits by timezone
  603. f.write(html_header(2, 'Commits by Timezone'))
  604. f.write('<table><tr>')
  605. f.write('<th>Timezone</th><th>Commits</th>')
  606. max_commits_on_tz = max(data.commits_by_timezone.values())
  607. for i in sorted(data.commits_by_timezone.keys(), key = lambda n : int(n)):
  608. commits = data.commits_by_timezone[i]
  609. r = 127 + int((float(commits) / max_commits_on_tz) * 128)
  610. f.write('<tr><th>%s</th><td style="background-color: rgb(%d, 0, 0)">%d</td></tr>' % (i, r, commits))
  611. f.write('</tr></table>')
  612. f.write('</body></html>')
  613. f.close()
  614. ###
  615. # Authors
  616. f = open(path + '/authors.html', 'w')
  617. self.printHeader(f)
  618. f.write('<h1>Authors</h1>')
  619. self.printNav(f)
  620. # Authors :: List of authors
  621. f.write(html_header(2, 'List of Authors'))
  622. f.write('<table class="authors sortable" id="authors">')
  623. f.write('<tr><th>Author</th><th>Commits (%)</th><th>+ lines</th><th>- lines</th><th>First commit</th><th>Last commit</th><th class="unsortable">Age</th><th>Active days</th><th># by commits</th></tr>')
  624. for author in sorted(data.getAuthors()):
  625. info = data.getAuthorInfo(author)
  626. f.write('<tr><td>%s</td><td>%d (%.2f%%)</td><td>%d</td><td>%d</td><td>%s</td><td>%s</td><td>%s</td><td>%d</td><td>%d</td></tr>' % (author, info['commits'], info['lines_added'], info['lines_removed'], info['commits_frac'], info['date_first'], info['date_last'], info['timedelta'], info['active_days'], info['place_by_commits']))
  627. f.write('</table>')
  628. # Authors :: Author of Month
  629. f.write(html_header(2, 'Author of Month'))
  630. f.write('<table class="sortable" id="aom">')
  631. f.write('<tr><th>Month</th><th>Author</th><th>Commits (%)</th><th class="unsortable">Next top 5</th></tr>')
  632. for yymm in reversed(sorted(data.author_of_month.keys())):
  633. authordict = data.author_of_month[yymm]
  634. authors = getkeyssortedbyvalues(authordict)
  635. authors.reverse()
  636. commits = data.author_of_month[yymm][authors[0]]
  637. next = ', '.join(authors[1:5])
  638. f.write('<tr><td>%s</td><td>%s</td><td>%d (%.2f%% of %d)</td><td>%s</td></tr>' % (yymm, authors[0], commits, (100.0 * commits) / data.commits_by_month[yymm], data.commits_by_month[yymm], next))
  639. f.write('</table>')
  640. f.write(html_header(2, 'Author of Year'))
  641. f.write('<table class="sortable" id="aoy"><tr><th>Year</th><th>Author</th><th>Commits (%)</th><th class="unsortable">Next top 5</th></tr>')
  642. for yy in reversed(sorted(data.author_of_year.keys())):
  643. authordict = data.author_of_year[yy]
  644. authors = getkeyssortedbyvalues(authordict)
  645. authors.reverse()
  646. commits = data.author_of_year[yy][authors[0]]
  647. next = ', '.join(authors[1:5])
  648. f.write('<tr><td>%s</td><td>%s</td><td>%d (%.2f%% of %d)</td><td>%s</td></tr>' % (yy, authors[0], commits, (100.0 * commits) / data.commits_by_year[yy], data.commits_by_year[yy], next))
  649. f.write('</table>')
  650. f.write('</body></html>')
  651. f.close()
  652. ###
  653. # Files
  654. f = open(path + '/files.html', 'w')
  655. self.printHeader(f)
  656. f.write('<h1>Files</h1>')
  657. self.printNav(f)
  658. f.write('<dl>\n')
  659. f.write('<dt>Total files</dt><dd>%d</dd>' % data.getTotalFiles())
  660. f.write('<dt>Total lines</dt><dd>%d</dd>' % data.getTotalLOC())
  661. f.write('<dt>Average file size</dt><dd>%.2f bytes</dd>' % ((100.0 * data.getTotalLOC()) / data.getTotalFiles()))
  662. f.write('</dl>\n')
  663. # Files :: File count by date
  664. f.write(html_header(2, 'File count by date'))
  665. fg = open(path + '/files_by_date.dat', 'w')
  666. for stamp in sorted(data.files_by_stamp.keys()):
  667. fg.write('%s %d\n' % (datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d'), data.files_by_stamp[stamp]))
  668. fg.close()
  669. f.write('<img src="files_by_date.png" alt="Files by Date" />')
  670. #f.write('<h2>Average file size by date</h2>')
  671. # Files :: Extensions
  672. f.write(html_header(2, 'Extensions'))
  673. f.write('<table class="sortable" id="ext"><tr><th>Extension</th><th>Files (%)</th><th>Lines (%)</th><th>Lines/file</th></tr>')
  674. for ext in sorted(data.extensions.keys()):
  675. files = data.extensions[ext]['files']
  676. lines = data.extensions[ext]['lines']
  677. f.write('<tr><td>%s</td><td>%d (%.2f%%)</td><td>%d (%.2f%%)</td><td>%d</td></tr>' % (ext, files, (100.0 * files) / data.getTotalFiles(), lines, (100.0 * lines) / data.getTotalLOC(), lines / files))
  678. f.write('</table>')
  679. f.write('</body></html>')
  680. f.close()
  681. ###
  682. # Lines
  683. f = open(path + '/lines.html', 'w')
  684. self.printHeader(f)
  685. f.write('<h1>Lines</h1>')
  686. self.printNav(f)
  687. f.write('<dl>\n')
  688. f.write('<dt>Total lines</dt><dd>%d</dd>' % data.getTotalLOC())
  689. f.write('</dl>\n')
  690. f.write(html_header(2, 'Lines of Code'))
  691. f.write('<img src="lines_of_code.png" />')
  692. fg = open(path + '/lines_of_code.dat', 'w')
  693. for stamp in sorted(data.changes_by_date.keys()):
  694. fg.write('%d %d\n' % (stamp, data.changes_by_date[stamp]['lines']))
  695. fg.close()
  696. f.write('</body></html>')
  697. f.close()
  698. ###
  699. # tags.html
  700. f = open(path + '/tags.html', 'w')
  701. self.printHeader(f)
  702. f.write('<h1>Tags</h1>')
  703. self.printNav(f)
  704. f.write('<dl>')
  705. f.write('<dt>Total tags</dt><dd>%d</dd>' % len(data.tags))
  706. if len(data.tags) > 0:
  707. f.write('<dt>Average commits per tag</dt><dd>%.2f</dd>' % (1.0 * data.getTotalCommits() / len(data.tags)))
  708. f.write('</dl>')
  709. f.write('<table class="tags">')
  710. f.write('<tr><th>Name</th><th>Date</th><th>Commits</th><th>Authors</th></tr>')
  711. # sort the tags by date desc
  712. tags_sorted_by_date_desc = map(lambda el : el[1], reversed(sorted(map(lambda el : (el[1]['date'], el[0]), data.tags.items()))))
  713. for tag in tags_sorted_by_date_desc:
  714. authorinfo = []
  715. authors_by_commits = getkeyssortedbyvalues(data.tags[tag]['authors'])
  716. for i in reversed(authors_by_commits):
  717. authorinfo.append('%s (%d)' % (i, data.tags[tag]['authors'][i]))
  718. f.write('<tr><td>%s</td><td>%s</td><td>%d</td><td>%s</td></tr>' % (tag, data.tags[tag]['date'], data.tags[tag]['commits'], ', '.join(authorinfo)))
  719. f.write('</table>')
  720. f.write('</body></html>')
  721. f.close()
  722. self.createGraphs(path)
  723. def createGraphs(self, path):
  724. print 'Generating graphs...'
  725. # hour of day
  726. f = open(path + '/hour_of_day.plot', 'w')
  727. f.write(GNUPLOT_COMMON)
  728. f.write(
  729. """
  730. set output 'hour_of_day.png'
  731. unset key
  732. set xrange [0.5:24.5]
  733. set xtics 4
  734. set ylabel "Commits"
  735. plot 'hour_of_day.dat' using 1:2:(0.5) w boxes fs solid
  736. """)
  737. f.close()
  738. # day of week
  739. f = open(path + '/day_of_week.plot', 'w')
  740. f.write(GNUPLOT_COMMON)
  741. f.write(
  742. """
  743. set output 'day_of_week.png'
  744. unset key
  745. set xrange [0.5:7.5]
  746. set xtics 1
  747. set ylabel "Commits"
  748. plot 'day_of_week.dat' using 1:2:(0.5) w boxes fs solid
  749. """)
  750. f.close()
  751. # Month of Year
  752. f = open(path + '/month_of_year.plot', 'w')
  753. f.write(GNUPLOT_COMMON)
  754. f.write(
  755. """
  756. set output 'month_of_year.png'
  757. unset key
  758. set xrange [0.5:12.5]
  759. set xtics 1
  760. set ylabel "Commits"
  761. plot 'month_of_year.dat' using 1:2:(0.5) w boxes fs solid
  762. """)
  763. f.close()
  764. # commits_by_year_month
  765. f = open(path + '/commits_by_year_month.plot', 'w')
  766. f.write(GNUPLOT_COMMON)
  767. f.write(
  768. """
  769. set output 'commits_by_year_month.png'
  770. unset key
  771. set xdata time
  772. set timefmt "%Y-%m"
  773. set format x "%Y-%m"
  774. set xtics rotate by 90 15768000
  775. set bmargin 5
  776. set ylabel "Commits"
  777. plot 'commits_by_year_month.dat' using 1:2:(0.5) w boxes fs solid
  778. """)
  779. f.close()
  780. # commits_by_year
  781. f = open(path + '/commits_by_year.plot', 'w')
  782. f.write(GNUPLOT_COMMON)
  783. f.write(
  784. """
  785. set output 'commits_by_year.png'
  786. unset key
  787. set xtics 1
  788. set ylabel "Commits"
  789. set yrange [0:]
  790. plot 'commits_by_year.dat' using 1:2:(0.5) w boxes fs solid
  791. """)
  792. f.close()
  793. # Files by date
  794. f = open(path + '/files_by_date.plot', 'w')
  795. f.write(GNUPLOT_COMMON)
  796. f.write(
  797. """
  798. set output 'files_by_date.png'
  799. unset key
  800. set xdata time
  801. set timefmt "%Y-%m-%d"
  802. set format x "%Y-%m-%d"
  803. set ylabel "Files"
  804. set xtics rotate by 90
  805. set ytics 1
  806. set bmargin 6
  807. plot 'files_by_date.dat' using 1:2 w steps
  808. """)
  809. f.close()
  810. # Lines of Code
  811. f = open(path + '/lines_of_code.plot', 'w')
  812. f.write(GNUPLOT_COMMON)
  813. f.write(
  814. """
  815. set output 'lines_of_code.png'
  816. unset key
  817. set xdata time
  818. set timefmt "%s"
  819. set format x "%Y-%m-%d"
  820. set ylabel "Lines"
  821. set xtics rotate by 90
  822. set bmargin 6
  823. plot 'lines_of_code.dat' using 1:2 w lines
  824. """)
  825. f.close()
  826. os.chdir(path)
  827. files = glob.glob(path + '/*.plot')
  828. for f in files:
  829. out = getpipeoutput([gnuplot_cmd + ' "%s"' % f])
  830. if len(out) > 0:
  831. print out
  832. def printHeader(self, f, title = ''):
  833. f.write(
  834. """<?xml version="1.0" encoding="UTF-8"?>
  835. <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
  836. <html xmlns="http://www.w3.org/1999/xhtml">
  837. <head>
  838. <title>GitStats - %s</title>
  839. <link rel="stylesheet" href="gitstats.css" type="text/css" />
  840. <meta name="generator" content="GitStats %s" />
  841. <script type="text/javascript" src="sortable.js"></script>
  842. </head>
  843. <body>
  844. """ % (self.title, getversion()))
  845. def printNav(self, f):
  846. f.write("""
  847. <div class="nav">
  848. <ul>
  849. <li><a href="index.html">General</a></li>
  850. <li><a href="activity.html">Activity</a></li>
  851. <li><a href="authors.html">Authors</a></li>
  852. <li><a href="files.html">Files</a></li>
  853. <li><a href="lines.html">Lines</a></li>
  854. <li><a href="tags.html">Tags</a></li>
  855. </ul>
  856. </div>
  857. """)
  858. usage = """
  859. Usage: gitstats [options] <gitpath> <outputpath>
  860. Options:
  861. """
  862. if len(sys.argv) < 3:
  863. print usage
  864. sys.exit(0)
  865. gitpath = sys.argv[1]
  866. outputpath = os.path.abspath(sys.argv[2])
  867. rundir = os.getcwd()
  868. try:
  869. os.makedirs(outputpath)
  870. except OSError:
  871. pass
  872. if not os.path.isdir(outputpath):
  873. print 'FATAL: Output path is not a directory or does not exist'
  874. sys.exit(1)
  875. print 'Git path: %s' % gitpath
  876. print 'Output path: %s' % outputpath
  877. os.chdir(gitpath)
  878. cachefile = os.path.join(outputpath, 'gitstats.cache')
  879. print 'Collecting data...'
  880. data = GitDataCollector()
  881. data.loadCache(cachefile)
  882. data.collect(gitpath)
  883. print 'Refining data...'
  884. data.saveCache(cachefile)
  885. data.refine()
  886. os.chdir(rundir)
  887. print 'Generating report...'
  888. report = HTMLReportCreator()
  889. report.create(data, outputpath)
  890. time_end = time.time()
  891. exectime_internal = time_end - time_start
  892. print 'Execution time %.5f secs, %.5f secs (%.2f %%) in external commands)' % (exectime_internal, exectime_external, (100.0 * exectime_external) / exectime_internal)