1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387
  1. #!/usr/bin/env python
  2. # Copyright (c) 2007-2011 Heikki Hokkanen <hoxu@users.sf.net> & others (see doc/author.txt)
  3. # GPLv2 / GPLv3
  4. import datetime
  5. import getopt
  6. import glob
  7. import os
  8. import pickle
  9. import platform
  10. import re
  11. import shutil
  12. import subprocess
  13. import sys
  14. import time
  15. import zlib
  16. GNUPLOT_COMMON = 'set terminal png transparent size 640,240\nset size 1.0,1.0\n'
  17. ON_LINUX = (platform.system() == 'Linux')
  18. WEEKDAYS = ('Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun')
  19. exectime_internal = 0.0
  20. exectime_external = 0.0
  21. time_start = time.time()
  22. # By default, gnuplot is searched from path, but can be overridden with the
  23. # environment variable "GNUPLOT"
  24. gnuplot_cmd = 'gnuplot'
  25. if 'GNUPLOT' in os.environ:
  26. gnuplot_cmd = os.environ['GNUPLOT']
  27. conf = {
  28. 'max_domains': 10,
  29. 'max_ext_length': 10,
  30. 'style': 'gitstats.css',
  31. 'max_authors': 20,
  32. 'authors_top': 5,
  33. 'commit_begin': '',
  34. 'commit_end': 'HEAD',
  35. 'linear_linestats': 1,
  36. 'project_name': '',
  37. }
  38. def getpipeoutput(cmds, quiet = False):
  39. global exectime_external
  40. start = time.time()
  41. if not quiet and ON_LINUX and os.isatty(1):
  42. print '>> ' + ' | '.join(cmds),
  43. sys.stdout.flush()
  44. p0 = subprocess.Popen(cmds[0], stdout = subprocess.PIPE, shell = True)
  45. p = p0
  46. for x in cmds[1:]:
  47. p = subprocess.Popen(x, stdin = p0.stdout, stdout = subprocess.PIPE, shell = True)
  48. p0 = p
  49. output = p.communicate()[0]
  50. end = time.time()
  51. if not quiet:
  52. if ON_LINUX and os.isatty(1):
  53. print '\r',
  54. print '[%.5f] >> %s' % (end - start, ' | '.join(cmds))
  55. exectime_external += (end - start)
  56. return output.rstrip('\n')
  57. def getcommitrange(defaultrange = 'HEAD', end_only = False):
  58. if len(conf['commit_end']) > 0:
  59. if end_only or len(conf['commit_begin']) == 0:
  60. return conf['commit_end']
  61. return '%s..%s' % (conf['commit_begin'], conf['commit_end'])
  62. return defaultrange
  63. def getkeyssortedbyvalues(dict):
  64. return map(lambda el : el[1], sorted(map(lambda el : (el[1], el[0]), dict.items())))
  65. # dict['author'] = { 'commits': 512 } - ...key(dict, 'commits')
  66. def getkeyssortedbyvaluekey(d, key):
  67. return map(lambda el : el[1], sorted(map(lambda el : (d[el][key], el), d.keys())))
  68. VERSION = 0
  69. def getversion():
  70. global VERSION
  71. if VERSION == 0:
  72. VERSION = getpipeoutput(["git rev-parse --short %s" % getcommitrange('HEAD')]).split('\n')[0]
  73. return VERSION
  74. def getgitversion():
  75. return getpipeoutput(['git --version']).split('\n')[0]
  76. def getgnuplotversion():
  77. return getpipeoutput(['gnuplot --version']).split('\n')[0]
  78. class DataCollector:
  79. """Manages data collection from a revision control repository."""
  80. def __init__(self):
  81. self.stamp_created = time.time()
  82. self.cache = {}
  83. self.total_authors = 0
  84. self.activity_by_hour_of_day = {} # hour -> commits
  85. self.activity_by_day_of_week = {} # day -> commits
  86. self.activity_by_month_of_year = {} # month [1-12] -> commits
  87. self.activity_by_hour_of_week = {} # weekday -> hour -> commits
  88. self.activity_by_hour_of_day_busiest = 0
  89. self.activity_by_hour_of_week_busiest = 0
  90. self.activity_by_year_week = {} # yy_wNN -> commits
  91. self.activity_by_year_week_peak = 0
  92. self.authors = {} # name -> {commits, first_commit_stamp, last_commit_stamp, last_active_day, active_days, lines_added, lines_removed}
  93. self.total_commits = 0
  94. self.total_files = 0
  95. self.authors_by_commits = 0
  96. # domains
  97. self.domains = {} # domain -> commits
  98. # author of the month
  99. self.author_of_month = {} # month -> author -> commits
  100. self.author_of_year = {} # year -> author -> commits
  101. self.commits_by_month = {} # month -> commits
  102. self.commits_by_year = {} # year -> commits
  103. self.lines_added_by_month = {} # month -> lines added
  104. self.lines_added_by_year = {} # year -> lines added
  105. self.lines_removed_by_month = {} # month -> lines removed
  106. self.lines_removed_by_year = {} # year -> lines removed
  107. self.first_commit_stamp = 0
  108. self.last_commit_stamp = 0
  109. self.last_active_day = None
  110. self.active_days = set()
  111. # lines
  112. self.total_lines = 0
  113. self.total_lines_added = 0
  114. self.total_lines_removed = 0
  115. # size
  116. self.total_size = 0
  117. # timezone
  118. self.commits_by_timezone = {} # timezone -> commits
  119. # tags
  120. self.tags = {}
  121. self.files_by_stamp = {} # stamp -> files
  122. # extensions
  123. self.extensions = {} # extension -> files, lines
  124. # line statistics
  125. self.changes_by_date = {} # stamp -> { files, ins, del }
  126. ##
  127. # This should be the main function to extract data from the repository.
  128. def collect(self, dir):
  129. self.dir = dir
  130. if len(conf['project_name']) == 0:
  131. self.projectname = os.path.basename(os.path.abspath(dir))
  132. else:
  133. self.projectname = conf['project_name']
  134. ##
  135. # Load cacheable data
  136. def loadCache(self, cachefile):
  137. if not os.path.exists(cachefile):
  138. return
  139. print 'Loading cache...'
  140. f = open(cachefile, 'rb')
  141. try:
  142. self.cache = pickle.loads(zlib.decompress(f.read()))
  143. except:
  144. # temporary hack to upgrade non-compressed caches
  145. f.seek(0)
  146. self.cache = pickle.load(f)
  147. f.close()
  148. ##
  149. # Produce any additional statistics from the extracted data.
  150. def refine(self):
  151. pass
  152. ##
  153. # : get a dictionary of author
  154. def getAuthorInfo(self, author):
  155. return None
  156. def getActivityByDayOfWeek(self):
  157. return {}
  158. def getActivityByHourOfDay(self):
  159. return {}
  160. # : get a dictionary of domains
  161. def getDomainInfo(self, domain):
  162. return None
  163. ##
  164. # Get a list of authors
  165. def getAuthors(self):
  166. return []
  167. def getFirstCommitDate(self):
  168. return datetime.datetime.now()
  169. def getLastCommitDate(self):
  170. return datetime.datetime.now()
  171. def getStampCreated(self):
  172. return self.stamp_created
  173. def getTags(self):
  174. return []
  175. def getTotalAuthors(self):
  176. return -1
  177. def getTotalCommits(self):
  178. return -1
  179. def getTotalFiles(self):
  180. return -1
  181. def getTotalLOC(self):
  182. return -1
  183. ##
  184. # Save cacheable data
  185. def saveCache(self, cachefile):
  186. print 'Saving cache...'
  187. f = open(cachefile, 'wb')
  188. #pickle.dump(self.cache, f)
  189. data = zlib.compress(pickle.dumps(self.cache))
  190. f.write(data)
  191. f.close()
  192. class GitDataCollector(DataCollector):
  193. def collect(self, dir):
  194. DataCollector.collect(self, dir)
  195. try:
  196. self.total_authors += int(getpipeoutput(['git shortlog -s %s' % getcommitrange(), 'wc -l']))
  197. except:
  198. self.total_authors = 0
  199. #self.total_lines = int(getoutput('git-ls-files -z |xargs -0 cat |wc -l'))
  200. # tags
  201. lines = getpipeoutput(['git show-ref --tags']).split('\n')
  202. for line in lines:
  203. if len(line) == 0:
  204. continue
  205. (hash, tag) = line.split(' ')
  206. tag = tag.replace('refs/tags/', '')
  207. output = getpipeoutput(['git log "%s" --pretty=format:"%%at %%aN" -n 1' % hash])
  208. if len(output) > 0:
  209. parts = output.split(' ')
  210. stamp = 0
  211. try:
  212. stamp = int(parts[0])
  213. except ValueError:
  214. stamp = 0
  215. self.tags[tag] = { 'stamp': stamp, 'hash' : hash, 'date' : datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d'), 'commits': 0, 'authors': {} }
  216. # collect info on tags, starting from latest
  217. tags_sorted_by_date_desc = map(lambda el : el[1], reversed(sorted(map(lambda el : (el[1]['date'], el[0]), self.tags.items()))))
  218. prev = None
  219. for tag in reversed(tags_sorted_by_date_desc):
  220. cmd = 'git shortlog -s "%s"' % tag
  221. if prev != None:
  222. cmd += ' "^%s"' % prev
  223. output = getpipeoutput([cmd])
  224. if len(output) == 0:
  225. continue
  226. prev = tag
  227. for line in output.split('\n'):
  228. parts = re.split('\s+', line, 2)
  229. commits = int(parts[1])
  230. author = parts[2]
  231. self.tags[tag]['commits'] += commits
  232. self.tags[tag]['authors'][author] = commits
  233. # Collect revision statistics
  234. # Outputs "<stamp> <date> <time> <timezone> <author> '<' <mail> '>'"
  235. lines = getpipeoutput(['git rev-list --pretty=format:"%%at %%ai %%aN <%%aE>" %s' % getcommitrange('HEAD'), 'grep -v ^commit']).split('\n')
  236. for line in lines:
  237. parts = line.split(' ', 4)
  238. author = ''
  239. try:
  240. stamp = int(parts[0])
  241. except ValueError:
  242. stamp = 0
  243. timezone = parts[3]
  244. author, mail = parts[4].split('<', 1)
  245. author = author.rstrip()
  246. mail = mail.rstrip('>')
  247. domain = '?'
  248. if mail.find('@') != -1:
  249. domain = mail.rsplit('@', 1)[1]
  250. date = datetime.datetime.fromtimestamp(float(stamp))
  251. # First and last commit stamp (may be in any order because of cherry-picking and patches)
  252. if stamp > self.last_commit_stamp:
  253. self.last_commit_stamp = stamp
  254. if self.first_commit_stamp == 0 or stamp < self.first_commit_stamp:
  255. self.first_commit_stamp = stamp
  256. # activity
  257. # hour
  258. hour = date.hour
  259. self.activity_by_hour_of_day[hour] = self.activity_by_hour_of_day.get(hour, 0) + 1
  260. # most active hour?
  261. if self.activity_by_hour_of_day[hour] > self.activity_by_hour_of_day_busiest:
  262. self.activity_by_hour_of_day_busiest = self.activity_by_hour_of_day[hour]
  263. # day of week
  264. day = date.weekday()
  265. self.activity_by_day_of_week[day] = self.activity_by_day_of_week.get(day, 0) + 1
  266. # domain stats
  267. if domain not in self.domains:
  268. self.domains[domain] = {}
  269. # commits
  270. self.domains[domain]['commits'] = self.domains[domain].get('commits', 0) + 1
  271. # hour of week
  272. if day not in self.activity_by_hour_of_week:
  273. self.activity_by_hour_of_week[day] = {}
  274. self.activity_by_hour_of_week[day][hour] = self.activity_by_hour_of_week[day].get(hour, 0) + 1
  275. # most active hour?
  276. if self.activity_by_hour_of_week[day][hour] > self.activity_by_hour_of_week_busiest:
  277. self.activity_by_hour_of_week_busiest = self.activity_by_hour_of_week[day][hour]
  278. # month of year
  279. month = date.month
  280. self.activity_by_month_of_year[month] = self.activity_by_month_of_year.get(month, 0) + 1
  281. # yearly/weekly activity
  282. yyw = date.strftime('%Y-%W')
  283. self.activity_by_year_week[yyw] = self.activity_by_year_week.get(yyw, 0) + 1
  284. if self.activity_by_year_week_peak < self.activity_by_year_week[yyw]:
  285. self.activity_by_year_week_peak = self.activity_by_year_week[yyw]
  286. # author stats
  287. if author not in self.authors:
  288. self.authors[author] = {}
  289. # commits, note again that commits may be in any date order because of cherry-picking and patches
  290. if 'last_commit_stamp' not in self.authors[author]:
  291. self.authors[author]['last_commit_stamp'] = stamp
  292. if stamp > self.authors[author]['last_commit_stamp']:
  293. self.authors[author]['last_commit_stamp'] = stamp
  294. if 'first_commit_stamp' not in self.authors[author]:
  295. self.authors[author]['first_commit_stamp'] = stamp
  296. if stamp < self.authors[author]['first_commit_stamp']:
  297. self.authors[author]['first_commit_stamp'] = stamp
  298. # author of the month/year
  299. yymm = date.strftime('%Y-%m')
  300. if yymm in self.author_of_month:
  301. self.author_of_month[yymm][author] = self.author_of_month[yymm].get(author, 0) + 1
  302. else:
  303. self.author_of_month[yymm] = {}
  304. self.author_of_month[yymm][author] = 1
  305. self.commits_by_month[yymm] = self.commits_by_month.get(yymm, 0) + 1
  306. yy = date.year
  307. if yy in self.author_of_year:
  308. self.author_of_year[yy][author] = self.author_of_year[yy].get(author, 0) + 1
  309. else:
  310. self.author_of_year[yy] = {}
  311. self.author_of_year[yy][author] = 1
  312. self.commits_by_year[yy] = self.commits_by_year.get(yy, 0) + 1
  313. # authors: active days
  314. yymmdd = date.strftime('%Y-%m-%d')
  315. if 'last_active_day' not in self.authors[author]:
  316. self.authors[author]['last_active_day'] = yymmdd
  317. self.authors[author]['active_days'] = set([yymmdd])
  318. elif yymmdd != self.authors[author]['last_active_day']:
  319. self.authors[author]['last_active_day'] = yymmdd
  320. self.authors[author]['active_days'].add(yymmdd)
  321. # project: active days
  322. if yymmdd != self.last_active_day:
  323. self.last_active_day = yymmdd
  324. self.active_days.add(yymmdd)
  325. # timezone
  326. self.commits_by_timezone[timezone] = self.commits_by_timezone.get(timezone, 0) + 1
  327. # TODO Optimize this, it's the worst bottleneck
  328. # outputs "<stamp> <files>" for each revision
  329. revlines = getpipeoutput(['git rev-list --pretty=format:"%%at %%T" %s' % getcommitrange('HEAD'), 'grep -v ^commit']).strip().split('\n')
  330. lines = []
  331. for revline in revlines:
  332. time, rev = revline.split(' ')
  333. linecount = self.getFilesInCommit(rev)
  334. lines.append('%d %d' % (int(time), linecount))
  335. self.total_commits += len(lines)
  336. for line in lines:
  337. parts = line.split(' ')
  338. if len(parts) != 2:
  339. continue
  340. (stamp, files) = parts[0:2]
  341. try:
  342. self.files_by_stamp[int(stamp)] = int(files)
  343. except ValueError:
  344. print 'Warning: failed to parse line "%s"' % line
  345. # extensions and size of files
  346. lines = getpipeoutput(['git ls-tree -r -l -z %s' % getcommitrange('HEAD', end_only = True)]).split('\000')
  347. for line in lines:
  348. if len(line) == 0:
  349. continue
  350. parts = re.split('\s+', line, 5)
  351. if parts[0] == '160000' and parts[3] == '-':
  352. # skip submodules
  353. continue
  354. sha1 = parts[2]
  355. size = int(parts[3])
  356. fullpath = parts[4]
  357. self.total_size += size
  358. self.total_files += 1
  359. filename = fullpath.split('/')[-1] # strip directories
  360. if filename.find('.') == -1 or filename.rfind('.') == 0:
  361. ext = ''
  362. else:
  363. ext = filename[(filename.rfind('.') + 1):]
  364. if len(ext) > conf['max_ext_length']:
  365. ext = ''
  366. if ext not in self.extensions:
  367. self.extensions[ext] = {'files': 0, 'lines': 0}
  368. self.extensions[ext]['files'] += 1
  369. try:
  370. self.extensions[ext]['lines'] += self.getLinesInBlob(sha1)
  371. except:
  372. print 'Warning: Could not count lines for file "%s"' % line
  373. # line statistics
  374. # outputs:
  375. # N files changed, N insertions (+), N deletions(-)
  376. # <stamp> <author>
  377. self.changes_by_date = {} # stamp -> { files, ins, del }
  378. # computation of lines of code by date is better done
  379. # on a linear history.
  380. extra = ''
  381. if conf['linear_linestats']:
  382. extra = '--first-parent -m'
  383. lines = getpipeoutput(['git log --shortstat %s --pretty=format:"%%at %%aN" %s' % (extra, getcommitrange('HEAD'))]).split('\n')
  384. lines.reverse()
  385. files = 0; inserted = 0; deleted = 0; total_lines = 0
  386. author = None
  387. for line in lines:
  388. if len(line) == 0:
  389. continue
  390. # <stamp> <author>
  391. if line.find('files changed,') == -1:
  392. pos = line.find(' ')
  393. if pos != -1:
  394. try:
  395. (stamp, author) = (int(line[:pos]), line[pos+1:])
  396. self.changes_by_date[stamp] = { 'files': files, 'ins': inserted, 'del': deleted, 'lines': total_lines }
  397. date = datetime.datetime.fromtimestamp(stamp)
  398. yymm = date.strftime('%Y-%m')
  399. self.lines_added_by_month[yymm] = self.lines_added_by_month.get(yymm, 0) + inserted
  400. self.lines_removed_by_month[yymm] = self.lines_removed_by_month.get(yymm, 0) + deleted
  401. yy = date.year
  402. self.lines_added_by_year[yy] = self.lines_added_by_year.get(yy,0) + inserted
  403. self.lines_removed_by_year[yy] = self.lines_removed_by_year.get(yy, 0) + deleted
  404. files, inserted, deleted = 0, 0, 0
  405. except ValueError:
  406. print 'Warning: unexpected line "%s"' % line
  407. else:
  408. print 'Warning: unexpected line "%s"' % line
  409. else:
  410. numbers = re.findall('\d+', line)
  411. if len(numbers) == 3:
  412. (files, inserted, deleted) = map(lambda el : int(el), numbers)
  413. total_lines += inserted
  414. total_lines -= deleted
  415. self.total_lines_added += inserted
  416. self.total_lines_removed += deleted
  417. else:
  418. print 'Warning: failed to handle line "%s"' % line
  419. (files, inserted, deleted) = (0, 0, 0)
  420. #self.changes_by_date[stamp] = { 'files': files, 'ins': inserted, 'del': deleted }
  421. self.total_lines = total_lines
  422. # Per-author statistics
  423. # defined for stamp, author only if author commited at this timestamp.
  424. self.changes_by_date_by_author = {} # stamp -> author -> lines_added
  425. # Similar to the above, but never use --first-parent
  426. # (we need to walk through every commit to know who
  427. # committed what, not just through mainline)
  428. lines = getpipeoutput(['git log --shortstat --date-order --pretty=format:"%%at %%aN" %s' % (getcommitrange('HEAD'))]).split('\n')
  429. lines.reverse()
  430. files = 0; inserted = 0; deleted = 0
  431. author = None
  432. stamp = 0
  433. for line in lines:
  434. if len(line) == 0:
  435. continue
  436. # <stamp> <author>
  437. if line.find('files changed,') == -1:
  438. pos = line.find(' ')
  439. if pos != -1:
  440. try:
  441. oldstamp = stamp
  442. (stamp, author) = (int(line[:pos]), line[pos+1:])
  443. if oldstamp > stamp:
  444. # clock skew, keep old timestamp to avoid having ugly graph
  445. stamp = oldstamp
  446. if author not in self.authors:
  447. self.authors[author] = { 'lines_added' : 0, 'lines_removed' : 0, 'commits' : 0}
  448. self.authors[author]['commits'] = self.authors[author].get('commits', 0) + 1
  449. self.authors[author]['lines_added'] = self.authors[author].get('lines_added', 0) + inserted
  450. self.authors[author]['lines_removed'] = self.authors[author].get('lines_removed', 0) + deleted
  451. if stamp not in self.changes_by_date_by_author:
  452. self.changes_by_date_by_author[stamp] = {}
  453. if author not in self.changes_by_date_by_author[stamp]:
  454. self.changes_by_date_by_author[stamp][author] = {}
  455. self.changes_by_date_by_author[stamp][author]['lines_added'] = self.authors[author]['lines_added']
  456. self.changes_by_date_by_author[stamp][author]['commits'] = self.authors[author]['commits']
  457. files, inserted, deleted = 0, 0, 0
  458. except ValueError:
  459. print 'Warning: unexpected line "%s"' % line
  460. else:
  461. print 'Warning: unexpected line "%s"' % line
  462. else:
  463. numbers = re.findall('\d+', line)
  464. if len(numbers) == 3:
  465. (files, inserted, deleted) = map(lambda el : int(el), numbers)
  466. else:
  467. print 'Warning: failed to handle line "%s"' % line
  468. (files, inserted, deleted) = (0, 0, 0)
  469. def refine(self):
  470. # authors
  471. # name -> {place_by_commits, commits_frac, date_first, date_last, timedelta}
  472. self.authors_by_commits = getkeyssortedbyvaluekey(self.authors, 'commits')
  473. self.authors_by_commits.reverse() # most first
  474. for i, name in enumerate(self.authors_by_commits):
  475. self.authors[name]['place_by_commits'] = i + 1
  476. for name in self.authors.keys():
  477. a = self.authors[name]
  478. a['commits_frac'] = (100 * float(a['commits'])) / self.getTotalCommits()
  479. date_first = datetime.datetime.fromtimestamp(a['first_commit_stamp'])
  480. date_last = datetime.datetime.fromtimestamp(a['last_commit_stamp'])
  481. delta = date_last - date_first
  482. a['date_first'] = date_first.strftime('%Y-%m-%d')
  483. a['date_last'] = date_last.strftime('%Y-%m-%d')
  484. a['timedelta'] = delta
  485. if 'lines_added' not in a: a['lines_added'] = 0
  486. if 'lines_removed' not in a: a['lines_removed'] = 0
  487. def getActiveDays(self):
  488. return self.active_days
  489. def getActivityByDayOfWeek(self):
  490. return self.activity_by_day_of_week
  491. def getActivityByHourOfDay(self):
  492. return self.activity_by_hour_of_day
  493. def getAuthorInfo(self, author):
  494. return self.authors[author]
  495. def getAuthors(self, limit = None):
  496. res = getkeyssortedbyvaluekey(self.authors, 'commits')
  497. res.reverse()
  498. return res[:limit]
  499. def getCommitDeltaDays(self):
  500. return (self.last_commit_stamp - self.first_commit_stamp) / 86400 + 1
  501. def getDomainInfo(self, domain):
  502. return self.domains[domain]
  503. def getDomains(self):
  504. return self.domains.keys()
  505. def getFilesInCommit(self, rev):
  506. try:
  507. res = self.cache['files_in_tree'][rev]
  508. except:
  509. res = int(getpipeoutput(['git ls-tree -r --name-only "%s"' % rev, 'wc -l']).split('\n')[0])
  510. if 'files_in_tree' not in self.cache:
  511. self.cache['files_in_tree'] = {}
  512. self.cache['files_in_tree'][rev] = res
  513. return res
  514. def getFirstCommitDate(self):
  515. return datetime.datetime.fromtimestamp(self.first_commit_stamp)
  516. def getLastCommitDate(self):
  517. return datetime.datetime.fromtimestamp(self.last_commit_stamp)
  518. def getLinesInBlob(self, sha1):
  519. try:
  520. res = self.cache['lines_in_blob'][sha1]
  521. except:
  522. res = int(getpipeoutput(['git cat-file blob %s' % sha1, 'wc -l']).split()[0])
  523. if 'lines_in_blob' not in self.cache:
  524. self.cache['lines_in_blob'] = {}
  525. self.cache['lines_in_blob'][sha1] = res
  526. return res
  527. def getTags(self):
  528. lines = getpipeoutput(['git show-ref --tags', 'cut -d/ -f3'])
  529. return lines.split('\n')
  530. def getTagDate(self, tag):
  531. return self.revToDate('tags/' + tag)
  532. def getTotalAuthors(self):
  533. return self.total_authors
  534. def getTotalCommits(self):
  535. return self.total_commits
  536. def getTotalFiles(self):
  537. return self.total_files
  538. def getTotalLOC(self):
  539. return self.total_lines
  540. def getTotalSize(self):
  541. return self.total_size
  542. def revToDate(self, rev):
  543. stamp = int(getpipeoutput(['git log --pretty=format:%%at "%s" -n 1' % rev]))
  544. return datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d')
  545. class ReportCreator:
  546. """Creates the actual report based on given data."""
  547. def __init__(self):
  548. pass
  549. def create(self, data, path):
  550. self.data = data
  551. self.path = path
  552. def html_linkify(text):
  553. return text.lower().replace(' ', '_')
  554. def html_header(level, text):
  555. name = html_linkify(text)
  556. return '\n<h%d><a href="#%s" name="%s">%s</a></h%d>\n\n' % (level, name, name, text, level)
  557. class HTMLReportCreator(ReportCreator):
  558. def create(self, data, path):
  559. ReportCreator.create(self, data, path)
  560. self.title = data.projectname
  561. # copy static files. Looks in the binary directory, ../share/gitstats and /usr/share/gitstats
  562. binarypath = os.path.dirname(os.path.abspath(__file__))
  563. secondarypath = os.path.join(binarypath, '..', 'share', 'gitstats')
  564. basedirs = [binarypath, secondarypath, '/usr/share/gitstats']
  565. for file in ('gitstats.css', 'sortable.js', 'arrow-up.gif', 'arrow-down.gif', 'arrow-none.gif'):
  566. for base in basedirs:
  567. src = base + '/' + file
  568. if os.path.exists(src):
  569. shutil.copyfile(src, path + '/' + file)
  570. break
  571. else:
  572. print 'Warning: "%s" not found, so not copied (searched: %s)' % (file, basedirs)
  573. f = open(path + "/index.html", 'w')
  574. format = '%Y-%m-%d %H:%M:%S'
  575. self.printHeader(f)
  576. f.write('<h1>GitStats - %s</h1>' % data.projectname)
  577. self.printNav(f)
  578. f.write('<dl>')
  579. f.write('<dt>Project name</dt><dd>%s</dd>' % (data.projectname))
  580. f.write('<dt>Generated</dt><dd>%s (in %d seconds)</dd>' % (datetime.datetime.now().strftime(format), time.time() - data.getStampCreated()))
  581. f.write('<dt>Generator</dt><dd><a href="http://gitstats.sourceforge.net/">GitStats</a> (version %s), %s, %s</dd>' % (getversion(), getgitversion(), getgnuplotversion()))
  582. f.write('<dt>Report Period</dt><dd>%s to %s</dd>' % (data.getFirstCommitDate().strftime(format), data.getLastCommitDate().strftime(format)))
  583. f.write('<dt>Age</dt><dd>%d days, %d active days (%3.2f%%)</dd>' % (data.getCommitDeltaDays(), len(data.getActiveDays()), (100.0 * len(data.getActiveDays()) / data.getCommitDeltaDays())))
  584. f.write('<dt>Total Files</dt><dd>%s</dd>' % data.getTotalFiles())
  585. f.write('<dt>Total Lines of Code</dt><dd>%s (%d added, %d removed)</dd>' % (data.getTotalLOC(), data.total_lines_added, data.total_lines_removed))
  586. f.write('<dt>Total Commits</dt><dd>%s (average %.1f commits per active day, %.1f per all days)</dd>' % (data.getTotalCommits(), float(data.getTotalCommits()) / len(data.getActiveDays()), float(data.getTotalCommits()) / data.getCommitDeltaDays()))
  587. f.write('<dt>Authors</dt><dd>%s (average %.1f commits per author)</dd>' % (data.getTotalAuthors(), (1.0 * data.getTotalCommits()) / data.getTotalAuthors()))
  588. f.write('</dl>')
  589. f.write('</body>\n</html>')
  590. f.close()
  591. ###
  592. # Activity
  593. f = open(path + '/activity.html', 'w')
  594. self.printHeader(f)
  595. f.write('<h1>Activity</h1>')
  596. self.printNav(f)
  597. #f.write('<h2>Last 30 days</h2>')
  598. #f.write('<h2>Last 12 months</h2>')
  599. # Weekly activity
  600. WEEKS = 32
  601. f.write(html_header(2, 'Weekly activity'))
  602. f.write('<p>Last %d weeks</p>' % WEEKS)
  603. # generate weeks to show (previous N weeks from now)
  604. now = datetime.datetime.now()
  605. deltaweek = datetime.timedelta(7)
  606. weeks = []
  607. stampcur = now
  608. for i in range(0, WEEKS):
  609. weeks.insert(0, stampcur.strftime('%Y-%W'))
  610. stampcur -= deltaweek
  611. # top row: commits & bar
  612. f.write('<table class="noborders"><tr>')
  613. for i in range(0, WEEKS):
  614. commits = 0
  615. if weeks[i] in data.activity_by_year_week:
  616. commits = data.activity_by_year_week[weeks[i]]
  617. percentage = 0
  618. if weeks[i] in data.activity_by_year_week:
  619. percentage = float(data.activity_by_year_week[weeks[i]]) / data.activity_by_year_week_peak
  620. height = max(1, int(200 * percentage))
  621. f.write('<td style="text-align: center; vertical-align: bottom">%d<div style="display: block; background-color: red; width: 20px; height: %dpx"></div></td>' % (commits, height))
  622. # bottom row: year/week
  623. f.write('</tr><tr>')
  624. for i in range(0, WEEKS):
  625. f.write('<td>%s</td>' % (WEEKS - i))
  626. f.write('</tr></table>')
  627. # Hour of Day
  628. f.write(html_header(2, 'Hour of Day'))
  629. hour_of_day = data.getActivityByHourOfDay()
  630. f.write('<table><tr><th>Hour</th>')
  631. for i in range(0, 24):
  632. f.write('<th>%d</th>' % i)
  633. f.write('</tr>\n<tr><th>Commits</th>')
  634. fp = open(path + '/hour_of_day.dat', 'w')
  635. for i in range(0, 24):
  636. if i in hour_of_day:
  637. r = 127 + int((float(hour_of_day[i]) / data.activity_by_hour_of_day_busiest) * 128)
  638. f.write('<td style="background-color: rgb(%d, 0, 0)">%d</td>' % (r, hour_of_day[i]))
  639. fp.write('%d %d\n' % (i, hour_of_day[i]))
  640. else:
  641. f.write('<td>0</td>')
  642. fp.write('%d 0\n' % i)
  643. fp.close()
  644. f.write('</tr>\n<tr><th>%</th>')
  645. totalcommits = data.getTotalCommits()
  646. for i in range(0, 24):
  647. if i in hour_of_day:
  648. r = 127 + int((float(hour_of_day[i]) / data.activity_by_hour_of_day_busiest) * 128)
  649. f.write('<td style="background-color: rgb(%d, 0, 0)">%.2f</td>' % (r, (100.0 * hour_of_day[i]) / totalcommits))
  650. else:
  651. f.write('<td>0.00</td>')
  652. f.write('</tr></table>')
  653. f.write('<img src="hour_of_day.png" alt="Hour of Day" />')
  654. fg = open(path + '/hour_of_day.dat', 'w')
  655. for i in range(0, 24):
  656. if i in hour_of_day:
  657. fg.write('%d %d\n' % (i + 1, hour_of_day[i]))
  658. else:
  659. fg.write('%d 0\n' % (i + 1))
  660. fg.close()
  661. # Day of Week
  662. f.write(html_header(2, 'Day of Week'))
  663. day_of_week = data.getActivityByDayOfWeek()
  664. f.write('<div class="vtable"><table>')
  665. f.write('<tr><th>Day</th><th>Total (%)</th></tr>')
  666. fp = open(path + '/day_of_week.dat', 'w')
  667. for d in range(0, 7):
  668. commits = 0
  669. if d in day_of_week:
  670. commits = day_of_week[d]
  671. fp.write('%d %s %d\n' % (d + 1, WEEKDAYS[d], commits))
  672. f.write('<tr>')
  673. f.write('<th>%s</th>' % (WEEKDAYS[d]))
  674. if d in day_of_week:
  675. f.write('<td>%d (%.2f%%)</td>' % (day_of_week[d], (100.0 * day_of_week[d]) / totalcommits))
  676. else:
  677. f.write('<td>0</td>')
  678. f.write('</tr>')
  679. f.write('</table></div>')
  680. f.write('<img src="day_of_week.png" alt="Day of Week" />')
  681. fp.close()
  682. # Hour of Week
  683. f.write(html_header(2, 'Hour of Week'))
  684. f.write('<table>')
  685. f.write('<tr><th>Weekday</th>')
  686. for hour in range(0, 24):
  687. f.write('<th>%d</th>' % (hour))
  688. f.write('</tr>')
  689. for weekday in range(0, 7):
  690. f.write('<tr><th>%s</th>' % (WEEKDAYS[weekday]))
  691. for hour in range(0, 24):
  692. try:
  693. commits = data.activity_by_hour_of_week[weekday][hour]
  694. except KeyError:
  695. commits = 0
  696. if commits != 0:
  697. f.write('<td')
  698. r = 127 + int((float(commits) / data.activity_by_hour_of_week_busiest) * 128)
  699. f.write(' style="background-color: rgb(%d, 0, 0)"' % r)
  700. f.write('>%d</td>' % commits)
  701. else:
  702. f.write('<td></td>')
  703. f.write('</tr>')
  704. f.write('</table>')
  705. # Month of Year
  706. f.write(html_header(2, 'Month of Year'))
  707. f.write('<div class="vtable"><table>')
  708. f.write('<tr><th>Month</th><th>Commits (%)</th></tr>')
  709. fp = open (path + '/month_of_year.dat', 'w')
  710. for mm in range(1, 13):
  711. commits = 0
  712. if mm in data.activity_by_month_of_year:
  713. commits = data.activity_by_month_of_year[mm]
  714. f.write('<tr><td>%d</td><td>%d (%.2f %%)</td></tr>' % (mm, commits, (100.0 * commits) / data.getTotalCommits()))
  715. fp.write('%d %d\n' % (mm, commits))
  716. fp.close()
  717. f.write('</table></div>')
  718. f.write('<img src="month_of_year.png" alt="Month of Year" />')
  719. # Commits by year/month
  720. f.write(html_header(2, 'Commits by year/month'))
  721. f.write('<div class="vtable"><table><tr><th>Month</th><th>Commits</th><th>Lines added</th><th>Lines removed</th></tr>')
  722. for yymm in reversed(sorted(data.commits_by_month.keys())):
  723. f.write('<tr><td>%s</td><td>%d</td><td>%d</td><td>%d</td></tr>' % (yymm, data.commits_by_month.get(yymm,0), data.lines_added_by_month.get(yymm,0), data.lines_removed_by_month.get(yymm,0)))
  724. f.write('</table></div>')
  725. f.write('<img src="commits_by_year_month.png" alt="Commits by year/month" />')
  726. fg = open(path + '/commits_by_year_month.dat', 'w')
  727. for yymm in sorted(data.commits_by_month.keys()):
  728. fg.write('%s %s\n' % (yymm, data.commits_by_month[yymm]))
  729. fg.close()
  730. # Commits by year
  731. f.write(html_header(2, 'Commits by Year'))
  732. f.write('<div class="vtable"><table><tr><th>Year</th><th>Commits (% of all)</th><th>Lines added</th><th>Lines removed</th></tr>')
  733. for yy in reversed(sorted(data.commits_by_year.keys())):
  734. f.write('<tr><td>%s</td><td>%d (%.2f%%)</td><td>%d</td><td>%d</td></tr>' % (yy, data.commits_by_year.get(yy,0), (100.0 * data.commits_by_year.get(yy,0)) / data.getTotalCommits(), data.lines_added_by_year.get(yy,0), data.lines_removed_by_year.get(yy,0)))
  735. f.write('</table></div>')
  736. f.write('<img src="commits_by_year.png" alt="Commits by Year" />')
  737. fg = open(path + '/commits_by_year.dat', 'w')
  738. for yy in sorted(data.commits_by_year.keys()):
  739. fg.write('%d %d\n' % (yy, data.commits_by_year[yy]))
  740. fg.close()
  741. # Commits by timezone
  742. f.write(html_header(2, 'Commits by Timezone'))
  743. f.write('<table><tr>')
  744. f.write('<th>Timezone</th><th>Commits</th>')
  745. max_commits_on_tz = max(data.commits_by_timezone.values())
  746. for i in sorted(data.commits_by_timezone.keys(), key = lambda n : int(n)):
  747. commits = data.commits_by_timezone[i]
  748. r = 127 + int((float(commits) / max_commits_on_tz) * 128)
  749. f.write('<tr><th>%s</th><td style="background-color: rgb(%d, 0, 0)">%d</td></tr>' % (i, r, commits))
  750. f.write('</tr></table>')
  751. f.write('</body></html>')
  752. f.close()
  753. ###
  754. # Authors
  755. f = open(path + '/authors.html', 'w')
  756. self.printHeader(f)
  757. f.write('<h1>Authors</h1>')
  758. self.printNav(f)
  759. # Authors :: List of authors
  760. f.write(html_header(2, 'List of Authors'))
  761. f.write('<table class="authors sortable" id="authors">')
  762. f.write('<tr><th>Author</th><th>Commits (%)</th><th>+ lines</th><th>- lines</th><th>First commit</th><th>Last commit</th><th class="unsortable">Age</th><th>Active days</th><th># by commits</th></tr>')
  763. for author in data.getAuthors(conf['max_authors']):
  764. info = data.getAuthorInfo(author)
  765. f.write('<tr><td>%s</td><td>%d (%.2f%%)</td><td>%d</td><td>%d</td><td>%s</td><td>%s</td><td>%s</td><td>%d</td><td>%d</td></tr>' % (author, info['commits'], info['commits_frac'], info['lines_added'], info['lines_removed'], info['date_first'], info['date_last'], info['timedelta'], len(info['active_days']), info['place_by_commits']))
  766. f.write('</table>')
  767. allauthors = data.getAuthors()
  768. if len(allauthors) > conf['max_authors']:
  769. rest = allauthors[conf['max_authors']:]
  770. f.write('<p class="moreauthors">These didn\'t make it to the top: %s</p>' % ', '.join(rest))
  771. f.write(html_header(2, 'Cumulated Added Lines of Code per Author'))
  772. f.write('<img src="lines_of_code_by_author.png" alt="Lines of code per Author" />')
  773. if len(allauthors) > conf['max_authors']:
  774. f.write('<p class="moreauthors">Only top %d authors shown</p>' % conf['max_authors'])
  775. f.write(html_header(2, 'Commits per Author'))
  776. f.write('<img src="commits_by_author.png" alt="Commits per Author" />')
  777. if len(allauthors) > conf['max_authors']:
  778. f.write('<p class="moreauthors">Only top %d authors shown</p>' % conf['max_authors'])
  779. fgl = open(path + '/lines_of_code_by_author.dat', 'w')
  780. fgc = open(path + '/commits_by_author.dat', 'w')
  781. lines_by_authors = {} # cumulated added lines by
  782. # author. to save memory,
  783. # changes_by_date_by_author[stamp][author] is defined
  784. # only at points where author commits.
  785. # lines_by_authors allows us to generate all the
  786. # points in the .dat file.
  787. # Don't rely on getAuthors to give the same order each
  788. # time. Be robust and keep the list in a variable.
  789. commits_by_authors = {} # cumulated added lines by
  790. self.authors_to_plot = data.getAuthors(conf['max_authors'])
  791. for author in self.authors_to_plot:
  792. lines_by_authors[author] = 0
  793. commits_by_authors[author] = 0
  794. for stamp in sorted(data.changes_by_date_by_author.keys()):
  795. fgl.write('%d' % stamp)
  796. fgc.write('%d' % stamp)
  797. for author in self.authors_to_plot:
  798. if author in data.changes_by_date_by_author[stamp].keys():
  799. lines_by_authors[author] = data.changes_by_date_by_author[stamp][author]['lines_added']
  800. commits_by_authors[author] = data.changes_by_date_by_author[stamp][author]['commits']
  801. fgl.write(' %d' % lines_by_authors[author])
  802. fgc.write(' %d' % commits_by_authors[author])
  803. fgl.write('\n')
  804. fgc.write('\n')
  805. fgl.close()
  806. fgc.close()
  807. # Authors :: Author of Month
  808. f.write(html_header(2, 'Author of Month'))
  809. f.write('<table class="sortable" id="aom">')
  810. f.write('<tr><th>Month</th><th>Author</th><th>Commits (%%)</th><th class="unsortable">Next top %d</th><th>Number of authors</th></tr>' % conf['authors_top'])
  811. for yymm in reversed(sorted(data.author_of_month.keys())):
  812. authordict = data.author_of_month[yymm]
  813. authors = getkeyssortedbyvalues(authordict)
  814. authors.reverse()
  815. commits = data.author_of_month[yymm][authors[0]]
  816. next = ', '.join(authors[1:conf['authors_top']+1])
  817. f.write('<tr><td>%s</td><td>%s</td><td>%d (%.2f%% of %d)</td><td>%s</td><td>%d</td></tr>' % (yymm, authors[0], commits, (100.0 * commits) / data.commits_by_month[yymm], data.commits_by_month[yymm], next, len(authors)))
  818. f.write('</table>')
  819. f.write(html_header(2, 'Author of Year'))
  820. f.write('<table class="sortable" id="aoy"><tr><th>Year</th><th>Author</th><th>Commits (%%)</th><th class="unsortable">Next top %d</th><th>Number of authors</th></tr>' % conf['authors_top'])
  821. for yy in reversed(sorted(data.author_of_year.keys())):
  822. authordict = data.author_of_year[yy]
  823. authors = getkeyssortedbyvalues(authordict)
  824. authors.reverse()
  825. commits = data.author_of_year[yy][authors[0]]
  826. next = ', '.join(authors[1:conf['authors_top']+1])
  827. f.write('<tr><td>%s</td><td>%s</td><td>%d (%.2f%% of %d)</td><td>%s</td><td>%d</td></tr>' % (yy, authors[0], commits, (100.0 * commits) / data.commits_by_year[yy], data.commits_by_year[yy], next, len(authors)))
  828. f.write('</table>')
  829. # Domains
  830. f.write(html_header(2, 'Commits by Domains'))
  831. domains_by_commits = getkeyssortedbyvaluekey(data.domains, 'commits')
  832. domains_by_commits.reverse() # most first
  833. f.write('<div class="vtable"><table>')
  834. f.write('<tr><th>Domains</th><th>Total (%)</th></tr>')
  835. fp = open(path + '/domains.dat', 'w')
  836. n = 0
  837. for domain in domains_by_commits:
  838. if n == conf['max_domains']:
  839. break
  840. commits = 0
  841. n += 1
  842. info = data.getDomainInfo(domain)
  843. fp.write('%s %d %d\n' % (domain, n , info['commits']))
  844. f.write('<tr><th>%s</th><td>%d (%.2f%%)</td></tr>' % (domain, info['commits'], (100.0 * info['commits'] / totalcommits)))
  845. f.write('</table></div>')
  846. f.write('<img src="domains.png" alt="Commits by Domains" />')
  847. fp.close()
  848. f.write('</body></html>')
  849. f.close()
  850. ###
  851. # Files
  852. f = open(path + '/files.html', 'w')
  853. self.printHeader(f)
  854. f.write('<h1>Files</h1>')
  855. self.printNav(f)
  856. f.write('<dl>\n')
  857. f.write('<dt>Total files</dt><dd>%d</dd>' % data.getTotalFiles())
  858. f.write('<dt>Total lines</dt><dd>%d</dd>' % data.getTotalLOC())
  859. f.write('<dt>Average file size</dt><dd>%.2f bytes</dd>' % (float(data.getTotalSize()) / data.getTotalFiles()))
  860. f.write('</dl>\n')
  861. # Files :: File count by date
  862. f.write(html_header(2, 'File count by date'))
  863. # use set to get rid of duplicate/unnecessary entries
  864. files_by_date = set()
  865. for stamp in sorted(data.files_by_stamp.keys()):
  866. files_by_date.add('%s %d' % (datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d'), data.files_by_stamp[stamp]))
  867. fg = open(path + '/files_by_date.dat', 'w')
  868. for line in sorted(list(files_by_date)):
  869. fg.write('%s\n' % line)
  870. #for stamp in sorted(data.files_by_stamp.keys()):
  871. # fg.write('%s %d\n' % (datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d'), data.files_by_stamp[stamp]))
  872. fg.close()
  873. f.write('<img src="files_by_date.png" alt="Files by Date" />')
  874. #f.write('<h2>Average file size by date</h2>')
  875. # Files :: Extensions
  876. f.write(html_header(2, 'Extensions'))
  877. f.write('<table class="sortable" id="ext"><tr><th>Extension</th><th>Files (%)</th><th>Lines (%)</th><th>Lines/file</th></tr>')
  878. for ext in sorted(data.extensions.keys()):
  879. files = data.extensions[ext]['files']
  880. lines = data.extensions[ext]['lines']
  881. f.write('<tr><td>%s</td><td>%d (%.2f%%)</td><td>%d (%.2f%%)</td><td>%d</td></tr>' % (ext, files, (100.0 * files) / data.getTotalFiles(), lines, (100.0 * lines) / data.getTotalLOC(), lines / files))
  882. f.write('</table>')
  883. f.write('</body></html>')
  884. f.close()
  885. ###
  886. # Lines
  887. f = open(path + '/lines.html', 'w')
  888. self.printHeader(f)
  889. f.write('<h1>Lines</h1>')
  890. self.printNav(f)
  891. f.write('<dl>\n')
  892. f.write('<dt>Total lines</dt><dd>%d</dd>' % data.getTotalLOC())
  893. f.write('</dl>\n')
  894. f.write(html_header(2, 'Lines of Code'))
  895. f.write('<img src="lines_of_code.png" />')
  896. fg = open(path + '/lines_of_code.dat', 'w')
  897. for stamp in sorted(data.changes_by_date.keys()):
  898. fg.write('%d %d\n' % (stamp, data.changes_by_date[stamp]['lines']))
  899. fg.close()
  900. f.write('</body></html>')
  901. f.close()
  902. ###
  903. # tags.html
  904. f = open(path + '/tags.html', 'w')
  905. self.printHeader(f)
  906. f.write('<h1>Tags</h1>')
  907. self.printNav(f)
  908. f.write('<dl>')
  909. f.write('<dt>Total tags</dt><dd>%d</dd>' % len(data.tags))
  910. if len(data.tags) > 0:
  911. f.write('<dt>Average commits per tag</dt><dd>%.2f</dd>' % (1.0 * data.getTotalCommits() / len(data.tags)))
  912. f.write('</dl>')
  913. f.write('<table class="tags">')
  914. f.write('<tr><th>Name</th><th>Date</th><th>Commits</th><th>Authors</th></tr>')
  915. # sort the tags by date desc
  916. tags_sorted_by_date_desc = map(lambda el : el[1], reversed(sorted(map(lambda el : (el[1]['date'], el[0]), data.tags.items()))))
  917. for tag in tags_sorted_by_date_desc:
  918. authorinfo = []
  919. self.authors_by_commits = getkeyssortedbyvalues(data.tags[tag]['authors'])
  920. for i in reversed(self.authors_by_commits):
  921. authorinfo.append('%s (%d)' % (i, data.tags[tag]['authors'][i]))
  922. f.write('<tr><td>%s</td><td>%s</td><td>%d</td><td>%s</td></tr>' % (tag, data.tags[tag]['date'], data.tags[tag]['commits'], ', '.join(authorinfo)))
  923. f.write('</table>')
  924. f.write('</body></html>')
  925. f.close()
  926. self.createGraphs(path)
  927. def createGraphs(self, path):
  928. print 'Generating graphs...'
  929. # hour of day
  930. f = open(path + '/hour_of_day.plot', 'w')
  931. f.write(GNUPLOT_COMMON)
  932. f.write(
  933. """
  934. set output 'hour_of_day.png'
  935. unset key
  936. set xrange [0.5:24.5]
  937. set xtics 4
  938. set grid y
  939. set ylabel "Commits"
  940. plot 'hour_of_day.dat' using 1:2:(0.5) w boxes fs solid
  941. """)
  942. f.close()
  943. # day of week
  944. f = open(path + '/day_of_week.plot', 'w')
  945. f.write(GNUPLOT_COMMON)
  946. f.write(
  947. """
  948. set output 'day_of_week.png'
  949. unset key
  950. set xrange [0.5:7.5]
  951. set xtics 1
  952. set grid y
  953. set ylabel "Commits"
  954. plot 'day_of_week.dat' using 1:3:(0.5):xtic(2) w boxes fs solid
  955. """)
  956. f.close()
  957. # Domains
  958. f = open(path + '/domains.plot', 'w')
  959. f.write(GNUPLOT_COMMON)
  960. f.write(
  961. """
  962. set output 'domains.png'
  963. unset key
  964. unset xtics
  965. set yrange [0:]
  966. set grid y
  967. set ylabel "Commits"
  968. plot 'domains.dat' using 2:3:(0.5) with boxes fs solid, '' using 2:3:1 with labels rotate by 45 offset 0,1
  969. """)
  970. f.close()
  971. # Month of Year
  972. f = open(path + '/month_of_year.plot', 'w')
  973. f.write(GNUPLOT_COMMON)
  974. f.write(
  975. """
  976. set output 'month_of_year.png'
  977. unset key
  978. set xrange [0.5:12.5]
  979. set xtics 1
  980. set grid y
  981. set ylabel "Commits"
  982. plot 'month_of_year.dat' using 1:2:(0.5) w boxes fs solid
  983. """)
  984. f.close()
  985. # commits_by_year_month
  986. f = open(path + '/commits_by_year_month.plot', 'w')
  987. f.write(GNUPLOT_COMMON)
  988. f.write(
  989. """
  990. set output 'commits_by_year_month.png'
  991. unset key
  992. set xdata time
  993. set timefmt "%Y-%m"
  994. set format x "%Y-%m"
  995. set xtics rotate
  996. set bmargin 5
  997. set grid y
  998. set ylabel "Commits"
  999. plot 'commits_by_year_month.dat' using 1:2:(0.5) w boxes fs solid
  1000. """)
  1001. f.close()
  1002. # commits_by_year
  1003. f = open(path + '/commits_by_year.plot', 'w')
  1004. f.write(GNUPLOT_COMMON)
  1005. f.write(
  1006. """
  1007. set output 'commits_by_year.png'
  1008. unset key
  1009. set xtics 1 rotate
  1010. set grid y
  1011. set ylabel "Commits"
  1012. set yrange [0:]
  1013. plot 'commits_by_year.dat' using 1:2:(0.5) w boxes fs solid
  1014. """)
  1015. f.close()
  1016. # Files by date
  1017. f = open(path + '/files_by_date.plot', 'w')
  1018. f.write(GNUPLOT_COMMON)
  1019. f.write(
  1020. """
  1021. set output 'files_by_date.png'
  1022. unset key
  1023. set xdata time
  1024. set timefmt "%Y-%m-%d"
  1025. set format x "%Y-%m-%d"
  1026. set grid y
  1027. set ylabel "Files"
  1028. set xtics rotate
  1029. set ytics autofreq
  1030. set bmargin 6
  1031. plot 'files_by_date.dat' using 1:2 w steps
  1032. """)
  1033. f.close()
  1034. # Lines of Code
  1035. f = open(path + '/lines_of_code.plot', 'w')
  1036. f.write(GNUPLOT_COMMON)
  1037. f.write(
  1038. """
  1039. set output 'lines_of_code.png'
  1040. unset key
  1041. set xdata time
  1042. set timefmt "%s"
  1043. set format x "%Y-%m-%d"
  1044. set grid y
  1045. set ylabel "Lines"
  1046. set xtics rotate
  1047. set bmargin 6
  1048. plot 'lines_of_code.dat' using 1:2 w lines
  1049. """)
  1050. f.close()
  1051. # Lines of Code Added per author
  1052. f = open(path + '/lines_of_code_by_author.plot', 'w')
  1053. f.write(GNUPLOT_COMMON)
  1054. f.write(
  1055. """
  1056. set terminal png transparent size 640,480
  1057. set output 'lines_of_code_by_author.png'
  1058. set key left top
  1059. set xdata time
  1060. set timefmt "%s"
  1061. set format x "%Y-%m-%d"
  1062. set grid y
  1063. set ylabel "Lines"
  1064. set xtics rotate
  1065. set bmargin 6
  1066. plot """
  1067. )
  1068. i = 1
  1069. plots = []
  1070. for a in self.authors_to_plot:
  1071. i = i + 1
  1072. plots.append("""'lines_of_code_by_author.dat' using 1:%d title "%s" w lines""" % (i, a.replace("\"", "\\\"")))
  1073. f.write(", ".join(plots))
  1074. f.write('\n')
  1075. f.close()
  1076. # Commits per author
  1077. f = open(path + '/commits_by_author.plot', 'w')
  1078. f.write(GNUPLOT_COMMON)
  1079. f.write(
  1080. """
  1081. set terminal png transparent size 640,480
  1082. set output 'commits_by_author.png'
  1083. set key left top
  1084. set xdata time
  1085. set timefmt "%s"
  1086. set format x "%Y-%m-%d"
  1087. set grid y
  1088. set ylabel "Commits"
  1089. set xtics rotate
  1090. set bmargin 6
  1091. plot """
  1092. )
  1093. i = 1
  1094. plots = []
  1095. for a in self.authors_to_plot:
  1096. i = i + 1
  1097. plots.append("""'commits_by_author.dat' using 1:%d title "%s" w lines""" % (i, a.replace("\"", "\\\"")))
  1098. f.write(", ".join(plots))
  1099. f.write('\n')
  1100. f.close()
  1101. os.chdir(path)
  1102. files = glob.glob(path + '/*.plot')
  1103. for f in files:
  1104. out = getpipeoutput([gnuplot_cmd + ' "%s"' % f])
  1105. if len(out) > 0:
  1106. print out
  1107. def printHeader(self, f, title = ''):
  1108. f.write(
  1109. """<?xml version="1.0" encoding="UTF-8"?>
  1110. <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
  1111. <html xmlns="http://www.w3.org/1999/xhtml">
  1112. <head>
  1113. <title>GitStats - %s</title>
  1114. <link rel="stylesheet" href="%s" type="text/css" />
  1115. <meta name="generator" content="GitStats %s" />
  1116. <script type="text/javascript" src="sortable.js"></script>
  1117. </head>
  1118. <body>
  1119. """ % (self.title, conf['style'], getversion()))
  1120. def printNav(self, f):
  1121. f.write("""
  1122. <div class="nav">
  1123. <ul>
  1124. <li><a href="index.html">General</a></li>
  1125. <li><a href="activity.html">Activity</a></li>
  1126. <li><a href="authors.html">Authors</a></li>
  1127. <li><a href="files.html">Files</a></li>
  1128. <li><a href="lines.html">Lines</a></li>
  1129. <li><a href="tags.html">Tags</a></li>
  1130. </ul>
  1131. </div>
  1132. """)
  1133. class GitStats:
  1134. def run(self, args_orig):
  1135. optlist, args = getopt.getopt(args_orig, 'c:')
  1136. for o,v in optlist:
  1137. if o == '-c':
  1138. key, value = v.split('=', 1)
  1139. if key not in conf:
  1140. raise KeyError('no such key "%s" in config' % key)
  1141. if isinstance(conf[key], int):
  1142. conf[key] = int(value)
  1143. else:
  1144. conf[key] = value
  1145. if len(args) < 2:
  1146. print """
  1147. Usage: gitstats [options] <gitpath..> <outputpath>
  1148. Options:
  1149. -c key=value Override configuration value
  1150. Default config values:
  1151. %s
  1152. """ % conf
  1153. sys.exit(0)
  1154. outputpath = os.path.abspath(args[-1])
  1155. rundir = os.getcwd()
  1156. try:
  1157. os.makedirs(outputpath)
  1158. except OSError:
  1159. pass
  1160. if not os.path.isdir(outputpath):
  1161. print 'FATAL: Output path is not a directory or does not exist'
  1162. sys.exit(1)
  1163. print 'Output path: %s' % outputpath
  1164. cachefile = os.path.join(outputpath, 'gitstats.cache')
  1165. data = GitDataCollector()
  1166. data.loadCache(cachefile)
  1167. for gitpath in args[0:-1]:
  1168. print 'Git path: %s' % gitpath
  1169. os.chdir(gitpath)
  1170. print 'Collecting data...'
  1171. data.collect(gitpath)
  1172. print 'Refining data...'
  1173. data.saveCache(cachefile)
  1174. data.refine()
  1175. os.chdir(rundir)
  1176. print 'Generating report...'
  1177. report = HTMLReportCreator()
  1178. report.create(data, outputpath)
  1179. time_end = time.time()
  1180. exectime_internal = time_end - time_start
  1181. print 'Execution time %.5f secs, %.5f secs (%.2f %%) in external commands)' % (exectime_internal, exectime_external, (100.0 * exectime_external) / exectime_internal)
  1182. if sys.stdin.isatty():
  1183. print 'You may now run:'
  1184. print
  1185. print ' sensible-browser \'%s\'' % os.path.join(outputpath, 'index.html').replace("'", "'\\''")
  1186. print
  1187. if __name__=='__main__':
  1188. g = GitStats()
  1189. g.run(sys.argv[1:])