gitstats 46KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433
  1. #!/usr/bin/env python
  2. # Copyright (c) 2007-2012 Heikki Hokkanen <hoxu@users.sf.net> & others (see doc/author.txt)
  3. # GPLv2 / GPLv3
  4. import datetime
  5. import getopt
  6. import glob
  7. import os
  8. import pickle
  9. import platform
  10. import re
  11. import shutil
  12. import subprocess
  13. import sys
  14. import time
  15. import zlib
  16. os.environ['LC_ALL'] = 'C'
  17. GNUPLOT_COMMON = 'set terminal png transparent size 640,240\nset size 1.0,1.0\n'
  18. ON_LINUX = (platform.system() == 'Linux')
  19. WEEKDAYS = ('Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun')
  20. exectime_internal = 0.0
  21. exectime_external = 0.0
  22. time_start = time.time()
  23. # By default, gnuplot is searched from path, but can be overridden with the
  24. # environment variable "GNUPLOT"
  25. gnuplot_cmd = 'gnuplot'
  26. if 'GNUPLOT' in os.environ:
  27. gnuplot_cmd = os.environ['GNUPLOT']
  28. conf = {
  29. 'max_domains': 10,
  30. 'max_ext_length': 10,
  31. 'style': 'gitstats.css',
  32. 'max_authors': 20,
  33. 'authors_top': 5,
  34. 'commit_begin': '',
  35. 'commit_end': 'HEAD',
  36. 'linear_linestats': 1,
  37. 'project_name': '',
  38. 'merge_authors': {}
  39. }
  40. def getpipeoutput(cmds, quiet = False):
  41. global exectime_external
  42. start = time.time()
  43. if not quiet and ON_LINUX and os.isatty(1):
  44. print '>> ' + ' | '.join(cmds),
  45. sys.stdout.flush()
  46. p0 = subprocess.Popen(cmds[0], stdout = subprocess.PIPE, shell = True)
  47. p = p0
  48. for x in cmds[1:]:
  49. p = subprocess.Popen(x, stdin = p0.stdout, stdout = subprocess.PIPE, shell = True)
  50. p0 = p
  51. output = p.communicate()[0]
  52. end = time.time()
  53. if not quiet:
  54. if ON_LINUX and os.isatty(1):
  55. print '\r',
  56. print '[%.5f] >> %s' % (end - start, ' | '.join(cmds))
  57. exectime_external += (end - start)
  58. return output.rstrip('\n')
  59. def getcommitrange(defaultrange = 'HEAD', end_only = False):
  60. if len(conf['commit_end']) > 0:
  61. if end_only or len(conf['commit_begin']) == 0:
  62. return conf['commit_end']
  63. return '%s..%s' % (conf['commit_begin'], conf['commit_end'])
  64. return defaultrange
  65. def getkeyssortedbyvalues(dict):
  66. return map(lambda el : el[1], sorted(map(lambda el : (el[1], el[0]), dict.items())))
  67. # dict['author'] = { 'commits': 512 } - ...key(dict, 'commits')
  68. def getkeyssortedbyvaluekey(d, key):
  69. return map(lambda el : el[1], sorted(map(lambda el : (d[el][key], el), d.keys())))
  70. def getstatsummarycounts(line):
  71. numbers = re.findall('\d+', line)
  72. if len(numbers) == 1:
  73. # neither insertions nor deletions: may probably only happen for "0 files changed"
  74. numbers.append(0);
  75. numbers.append(0);
  76. elif len(numbers) == 2 and line.find('(+)') != -1:
  77. numbers.append(0); # only insertions were printed on line
  78. elif len(numbers) == 2 and line.find('(-)') != -1:
  79. numbers.insert(1, 0); # only deletions were printed on line
  80. return numbers
  81. VERSION = 0
  82. def getversion():
  83. global VERSION
  84. if VERSION == 0:
  85. gitstats_repo = os.path.dirname(os.path.abspath(__file__))
  86. VERSION = getpipeoutput(["git --git-dir=%s/.git --work-tree=%s rev-parse --short %s" %
  87. (gitstats_repo, gitstats_repo, getcommitrange('HEAD').split('\n')[0])])
  88. return VERSION
  89. def getgitversion():
  90. return getpipeoutput(['git --version']).split('\n')[0]
  91. def getgnuplotversion():
  92. return getpipeoutput(['%s --version' % gnuplot_cmd]).split('\n')[0]
  93. class DataCollector:
  94. """Manages data collection from a revision control repository."""
  95. def __init__(self):
  96. self.stamp_created = time.time()
  97. self.cache = {}
  98. self.total_authors = 0
  99. self.activity_by_hour_of_day = {} # hour -> commits
  100. self.activity_by_day_of_week = {} # day -> commits
  101. self.activity_by_month_of_year = {} # month [1-12] -> commits
  102. self.activity_by_hour_of_week = {} # weekday -> hour -> commits
  103. self.activity_by_hour_of_day_busiest = 0
  104. self.activity_by_hour_of_week_busiest = 0
  105. self.activity_by_year_week = {} # yy_wNN -> commits
  106. self.activity_by_year_week_peak = 0
  107. self.authors = {} # name -> {commits, first_commit_stamp, last_commit_stamp, last_active_day, active_days, lines_added, lines_removed}
  108. self.total_commits = 0
  109. self.total_files = 0
  110. self.authors_by_commits = 0
  111. # domains
  112. self.domains = {} # domain -> commits
  113. # author of the month
  114. self.author_of_month = {} # month -> author -> commits
  115. self.author_of_year = {} # year -> author -> commits
  116. self.commits_by_month = {} # month -> commits
  117. self.commits_by_year = {} # year -> commits
  118. self.lines_added_by_month = {} # month -> lines added
  119. self.lines_added_by_year = {} # year -> lines added
  120. self.lines_removed_by_month = {} # month -> lines removed
  121. self.lines_removed_by_year = {} # year -> lines removed
  122. self.first_commit_stamp = 0
  123. self.last_commit_stamp = 0
  124. self.last_active_day = None
  125. self.active_days = set()
  126. # lines
  127. self.total_lines = 0
  128. self.total_lines_added = 0
  129. self.total_lines_removed = 0
  130. # size
  131. self.total_size = 0
  132. # timezone
  133. self.commits_by_timezone = {} # timezone -> commits
  134. # tags
  135. self.tags = {}
  136. self.files_by_stamp = {} # stamp -> files
  137. # extensions
  138. self.extensions = {} # extension -> files, lines
  139. # line statistics
  140. self.changes_by_date = {} # stamp -> { files, ins, del }
  141. ##
  142. # This should be the main function to extract data from the repository.
  143. def collect(self, dir):
  144. self.dir = dir
  145. if len(conf['project_name']) == 0:
  146. self.projectname = os.path.basename(os.path.abspath(dir))
  147. else:
  148. self.projectname = conf['project_name']
  149. ##
  150. # Load cacheable data
  151. def loadCache(self, cachefile):
  152. if not os.path.exists(cachefile):
  153. return
  154. print 'Loading cache...'
  155. f = open(cachefile, 'rb')
  156. try:
  157. self.cache = pickle.loads(zlib.decompress(f.read()))
  158. except:
  159. # temporary hack to upgrade non-compressed caches
  160. f.seek(0)
  161. self.cache = pickle.load(f)
  162. f.close()
  163. ##
  164. # Produce any additional statistics from the extracted data.
  165. def refine(self):
  166. pass
  167. ##
  168. # : get a dictionary of author
  169. def getAuthorInfo(self, author):
  170. return None
  171. def getActivityByDayOfWeek(self):
  172. return {}
  173. def getActivityByHourOfDay(self):
  174. return {}
  175. # : get a dictionary of domains
  176. def getDomainInfo(self, domain):
  177. return None
  178. ##
  179. # Get a list of authors
  180. def getAuthors(self):
  181. return []
  182. def getFirstCommitDate(self):
  183. return datetime.datetime.now()
  184. def getLastCommitDate(self):
  185. return datetime.datetime.now()
  186. def getStampCreated(self):
  187. return self.stamp_created
  188. def getTags(self):
  189. return []
  190. def getTotalAuthors(self):
  191. return -1
  192. def getTotalCommits(self):
  193. return -1
  194. def getTotalFiles(self):
  195. return -1
  196. def getTotalLOC(self):
  197. return -1
  198. ##
  199. # Save cacheable data
  200. def saveCache(self, cachefile):
  201. print 'Saving cache...'
  202. tempfile = cachefile + '.tmp'
  203. f = open(tempfile, 'wb')
  204. #pickle.dump(self.cache, f)
  205. data = zlib.compress(pickle.dumps(self.cache))
  206. f.write(data)
  207. f.close()
  208. try:
  209. os.remove(cachefile)
  210. except OSError:
  211. pass
  212. os.rename(tempfile, cachefile)
  213. class GitDataCollector(DataCollector):
  214. def collect(self, dir):
  215. DataCollector.collect(self, dir)
  216. self.total_authors += int(getpipeoutput(['git shortlog -s %s' % getcommitrange(), 'wc -l']))
  217. #self.total_lines = int(getoutput('git-ls-files -z |xargs -0 cat |wc -l'))
  218. # tags
  219. lines = getpipeoutput(['git show-ref --tags']).split('\n')
  220. for line in lines:
  221. if len(line) == 0:
  222. continue
  223. (hash, tag) = line.split(' ')
  224. tag = tag.replace('refs/tags/', '')
  225. output = getpipeoutput(['git log "%s" --pretty=format:"%%at %%aN" -n 1' % hash])
  226. if len(output) > 0:
  227. parts = output.split(' ')
  228. stamp = 0
  229. try:
  230. stamp = int(parts[0])
  231. except ValueError:
  232. stamp = 0
  233. self.tags[tag] = { 'stamp': stamp, 'hash' : hash, 'date' : datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d'), 'commits': 0, 'authors': {} }
  234. # collect info on tags, starting from latest
  235. tags_sorted_by_date_desc = map(lambda el : el[1], reversed(sorted(map(lambda el : (el[1]['date'], el[0]), self.tags.items()))))
  236. prev = None
  237. for tag in reversed(tags_sorted_by_date_desc):
  238. cmd = 'git shortlog -s "%s"' % tag
  239. if prev != None:
  240. cmd += ' "^%s"' % prev
  241. output = getpipeoutput([cmd])
  242. if len(output) == 0:
  243. continue
  244. prev = tag
  245. for line in output.split('\n'):
  246. parts = re.split('\s+', line, 2)
  247. commits = int(parts[1])
  248. author = parts[2]
  249. if author in conf['merge_authors']:
  250. author = conf['merge_authors'][author]
  251. self.tags[tag]['commits'] += commits
  252. self.tags[tag]['authors'][author] = commits
  253. # Collect revision statistics
  254. # Outputs "<stamp> <date> <time> <timezone> <author> '<' <mail> '>'"
  255. lines = getpipeoutput(['git rev-list --pretty=format:"%%at %%ai %%aN <%%aE>" %s' % getcommitrange('HEAD'), 'grep -v ^commit']).split('\n')
  256. for line in lines:
  257. parts = line.split(' ', 4)
  258. author = ''
  259. try:
  260. stamp = int(parts[0])
  261. except ValueError:
  262. stamp = 0
  263. timezone = parts[3]
  264. author, mail = parts[4].split('<', 1)
  265. author = author.rstrip()
  266. if author in conf['merge_authors']:
  267. author = conf['merge_authors'][author]
  268. mail = mail.rstrip('>')
  269. domain = '?'
  270. if mail.find('@') != -1:
  271. domain = mail.rsplit('@', 1)[1]
  272. date = datetime.datetime.fromtimestamp(float(stamp))
  273. # First and last commit stamp (may be in any order because of cherry-picking and patches)
  274. if stamp > self.last_commit_stamp:
  275. self.last_commit_stamp = stamp
  276. if self.first_commit_stamp == 0 or stamp < self.first_commit_stamp:
  277. self.first_commit_stamp = stamp
  278. # activity
  279. # hour
  280. hour = date.hour
  281. self.activity_by_hour_of_day[hour] = self.activity_by_hour_of_day.get(hour, 0) + 1
  282. # most active hour?
  283. if self.activity_by_hour_of_day[hour] > self.activity_by_hour_of_day_busiest:
  284. self.activity_by_hour_of_day_busiest = self.activity_by_hour_of_day[hour]
  285. # day of week
  286. day = date.weekday()
  287. self.activity_by_day_of_week[day] = self.activity_by_day_of_week.get(day, 0) + 1
  288. # domain stats
  289. if domain not in self.domains:
  290. self.domains[domain] = {}
  291. # commits
  292. self.domains[domain]['commits'] = self.domains[domain].get('commits', 0) + 1
  293. # hour of week
  294. if day not in self.activity_by_hour_of_week:
  295. self.activity_by_hour_of_week[day] = {}
  296. self.activity_by_hour_of_week[day][hour] = self.activity_by_hour_of_week[day].get(hour, 0) + 1
  297. # most active hour?
  298. if self.activity_by_hour_of_week[day][hour] > self.activity_by_hour_of_week_busiest:
  299. self.activity_by_hour_of_week_busiest = self.activity_by_hour_of_week[day][hour]
  300. # month of year
  301. month = date.month
  302. self.activity_by_month_of_year[month] = self.activity_by_month_of_year.get(month, 0) + 1
  303. # yearly/weekly activity
  304. yyw = date.strftime('%Y-%W')
  305. self.activity_by_year_week[yyw] = self.activity_by_year_week.get(yyw, 0) + 1
  306. if self.activity_by_year_week_peak < self.activity_by_year_week[yyw]:
  307. self.activity_by_year_week_peak = self.activity_by_year_week[yyw]
  308. # author stats
  309. if author not in self.authors:
  310. self.authors[author] = {}
  311. # commits, note again that commits may be in any date order because of cherry-picking and patches
  312. if 'last_commit_stamp' not in self.authors[author]:
  313. self.authors[author]['last_commit_stamp'] = stamp
  314. if stamp > self.authors[author]['last_commit_stamp']:
  315. self.authors[author]['last_commit_stamp'] = stamp
  316. if 'first_commit_stamp' not in self.authors[author]:
  317. self.authors[author]['first_commit_stamp'] = stamp
  318. if stamp < self.authors[author]['first_commit_stamp']:
  319. self.authors[author]['first_commit_stamp'] = stamp
  320. # author of the month/year
  321. yymm = date.strftime('%Y-%m')
  322. if yymm in self.author_of_month:
  323. self.author_of_month[yymm][author] = self.author_of_month[yymm].get(author, 0) + 1
  324. else:
  325. self.author_of_month[yymm] = {}
  326. self.author_of_month[yymm][author] = 1
  327. self.commits_by_month[yymm] = self.commits_by_month.get(yymm, 0) + 1
  328. yy = date.year
  329. if yy in self.author_of_year:
  330. self.author_of_year[yy][author] = self.author_of_year[yy].get(author, 0) + 1
  331. else:
  332. self.author_of_year[yy] = {}
  333. self.author_of_year[yy][author] = 1
  334. self.commits_by_year[yy] = self.commits_by_year.get(yy, 0) + 1
  335. # authors: active days
  336. yymmdd = date.strftime('%Y-%m-%d')
  337. if 'last_active_day' not in self.authors[author]:
  338. self.authors[author]['last_active_day'] = yymmdd
  339. self.authors[author]['active_days'] = set([yymmdd])
  340. elif yymmdd != self.authors[author]['last_active_day']:
  341. self.authors[author]['last_active_day'] = yymmdd
  342. self.authors[author]['active_days'].add(yymmdd)
  343. # project: active days
  344. if yymmdd != self.last_active_day:
  345. self.last_active_day = yymmdd
  346. self.active_days.add(yymmdd)
  347. # timezone
  348. self.commits_by_timezone[timezone] = self.commits_by_timezone.get(timezone, 0) + 1
  349. # TODO Optimize this, it's the worst bottleneck
  350. # outputs "<stamp> <files>" for each revision
  351. revlines = getpipeoutput(['git rev-list --pretty=format:"%%at %%T" %s' % getcommitrange('HEAD'), 'grep -v ^commit']).strip().split('\n')
  352. lines = []
  353. for revline in revlines:
  354. time, rev = revline.split(' ')
  355. linecount = self.getFilesInCommit(rev)
  356. lines.append('%d %d' % (int(time), linecount))
  357. self.total_commits += len(lines)
  358. for line in lines:
  359. parts = line.split(' ')
  360. if len(parts) != 2:
  361. continue
  362. (stamp, files) = parts[0:2]
  363. try:
  364. self.files_by_stamp[int(stamp)] = int(files)
  365. except ValueError:
  366. print 'Warning: failed to parse line "%s"' % line
  367. # extensions and size of files
  368. lines = getpipeoutput(['git ls-tree -r -l -z %s' % getcommitrange('HEAD', end_only = True)]).split('\000')
  369. for line in lines:
  370. if len(line) == 0:
  371. continue
  372. parts = re.split('\s+', line, 5)
  373. if parts[0] == '160000' and parts[3] == '-':
  374. # skip submodules
  375. continue
  376. sha1 = parts[2]
  377. size = int(parts[3])
  378. fullpath = parts[4]
  379. self.total_size += size
  380. self.total_files += 1
  381. filename = fullpath.split('/')[-1] # strip directories
  382. if filename.find('.') == -1 or filename.rfind('.') == 0:
  383. ext = ''
  384. else:
  385. ext = filename[(filename.rfind('.') + 1):]
  386. if len(ext) > conf['max_ext_length']:
  387. ext = ''
  388. if ext not in self.extensions:
  389. self.extensions[ext] = {'files': 0, 'lines': 0}
  390. self.extensions[ext]['files'] += 1
  391. try:
  392. self.extensions[ext]['lines'] += self.getLinesInBlob(sha1)
  393. except:
  394. print 'Warning: Could not count lines for file "%s"' % line
  395. # line statistics
  396. # outputs:
  397. # N files changed, N insertions (+), N deletions(-)
  398. # <stamp> <author>
  399. self.changes_by_date = {} # stamp -> { files, ins, del }
  400. # computation of lines of code by date is better done
  401. # on a linear history.
  402. extra = ''
  403. if conf['linear_linestats']:
  404. extra = '--first-parent -m'
  405. lines = getpipeoutput(['git log --shortstat %s --pretty=format:"%%at %%aN" %s' % (extra, getcommitrange('HEAD'))]).split('\n')
  406. lines.reverse()
  407. files = 0; inserted = 0; deleted = 0; total_lines = 0
  408. author = None
  409. for line in lines:
  410. if len(line) == 0:
  411. continue
  412. # <stamp> <author>
  413. if re.search('files? changed', line) == None:
  414. pos = line.find(' ')
  415. if pos != -1:
  416. try:
  417. (stamp, author) = (int(line[:pos]), line[pos+1:])
  418. if author in conf['merge_authors']:
  419. author = conf['merge_authors'][author]
  420. self.changes_by_date[stamp] = { 'files': files, 'ins': inserted, 'del': deleted, 'lines': total_lines }
  421. date = datetime.datetime.fromtimestamp(stamp)
  422. yymm = date.strftime('%Y-%m')
  423. self.lines_added_by_month[yymm] = self.lines_added_by_month.get(yymm, 0) + inserted
  424. self.lines_removed_by_month[yymm] = self.lines_removed_by_month.get(yymm, 0) + deleted
  425. yy = date.year
  426. self.lines_added_by_year[yy] = self.lines_added_by_year.get(yy,0) + inserted
  427. self.lines_removed_by_year[yy] = self.lines_removed_by_year.get(yy, 0) + deleted
  428. files, inserted, deleted = 0, 0, 0
  429. except ValueError:
  430. print 'Warning: unexpected line "%s"' % line
  431. else:
  432. print 'Warning: unexpected line "%s"' % line
  433. else:
  434. numbers = getstatsummarycounts(line)
  435. if len(numbers) == 3:
  436. (files, inserted, deleted) = map(lambda el : int(el), numbers)
  437. total_lines += inserted
  438. total_lines -= deleted
  439. self.total_lines_added += inserted
  440. self.total_lines_removed += deleted
  441. else:
  442. print 'Warning: failed to handle line "%s"' % line
  443. (files, inserted, deleted) = (0, 0, 0)
  444. #self.changes_by_date[stamp] = { 'files': files, 'ins': inserted, 'del': deleted }
  445. self.total_lines = total_lines
  446. # Per-author statistics
  447. # defined for stamp, author only if author commited at this timestamp.
  448. self.changes_by_date_by_author = {} # stamp -> author -> lines_added
  449. # Similar to the above, but never use --first-parent
  450. # (we need to walk through every commit to know who
  451. # committed what, not just through mainline)
  452. lines = getpipeoutput(['git log --shortstat --date-order --pretty=format:"%%at %%aN" %s' % (getcommitrange('HEAD'))]).split('\n')
  453. lines.reverse()
  454. files = 0; inserted = 0; deleted = 0
  455. author = None
  456. stamp = 0
  457. for line in lines:
  458. if len(line) == 0:
  459. continue
  460. # <stamp> <author>
  461. if re.search('files? changed', line) == None:
  462. pos = line.find(' ')
  463. if pos != -1:
  464. try:
  465. oldstamp = stamp
  466. (stamp, author) = (int(line[:pos]), line[pos+1:])
  467. if author in conf['merge_authors']:
  468. author = conf['merge_authors'][author]
  469. if oldstamp > stamp:
  470. # clock skew, keep old timestamp to avoid having ugly graph
  471. stamp = oldstamp
  472. if author not in self.authors:
  473. self.authors[author] = { 'lines_added' : 0, 'lines_removed' : 0, 'commits' : 0}
  474. self.authors[author]['commits'] = self.authors[author].get('commits', 0) + 1
  475. self.authors[author]['lines_added'] = self.authors[author].get('lines_added', 0) + inserted
  476. self.authors[author]['lines_removed'] = self.authors[author].get('lines_removed', 0) + deleted
  477. if stamp not in self.changes_by_date_by_author:
  478. self.changes_by_date_by_author[stamp] = {}
  479. if author not in self.changes_by_date_by_author[stamp]:
  480. self.changes_by_date_by_author[stamp][author] = {}
  481. self.changes_by_date_by_author[stamp][author]['lines_added'] = self.authors[author]['lines_added']
  482. self.changes_by_date_by_author[stamp][author]['commits'] = self.authors[author]['commits']
  483. files, inserted, deleted = 0, 0, 0
  484. except ValueError:
  485. print 'Warning: unexpected line "%s"' % line
  486. else:
  487. print 'Warning: unexpected line "%s"' % line
  488. else:
  489. numbers = getstatsummarycounts(line);
  490. if len(numbers) == 3:
  491. (files, inserted, deleted) = map(lambda el : int(el), numbers)
  492. else:
  493. print 'Warning: failed to handle line "%s"' % line
  494. (files, inserted, deleted) = (0, 0, 0)
  495. def refine(self):
  496. # authors
  497. # name -> {place_by_commits, commits_frac, date_first, date_last, timedelta}
  498. self.authors_by_commits = getkeyssortedbyvaluekey(self.authors, 'commits')
  499. self.authors_by_commits.reverse() # most first
  500. for i, name in enumerate(self.authors_by_commits):
  501. self.authors[name]['place_by_commits'] = i + 1
  502. for name in self.authors.keys():
  503. a = self.authors[name]
  504. a['commits_frac'] = (100 * float(a['commits'])) / self.getTotalCommits()
  505. date_first = datetime.datetime.fromtimestamp(a['first_commit_stamp'])
  506. date_last = datetime.datetime.fromtimestamp(a['last_commit_stamp'])
  507. delta = date_last - date_first
  508. a['date_first'] = date_first.strftime('%Y-%m-%d')
  509. a['date_last'] = date_last.strftime('%Y-%m-%d')
  510. a['timedelta'] = delta
  511. if 'lines_added' not in a: a['lines_added'] = 0
  512. if 'lines_removed' not in a: a['lines_removed'] = 0
  513. def getActiveDays(self):
  514. return self.active_days
  515. def getActivityByDayOfWeek(self):
  516. return self.activity_by_day_of_week
  517. def getActivityByHourOfDay(self):
  518. return self.activity_by_hour_of_day
  519. def getAuthorInfo(self, author):
  520. return self.authors[author]
  521. def getAuthors(self, limit = None):
  522. res = getkeyssortedbyvaluekey(self.authors, 'commits')
  523. res.reverse()
  524. return res[:limit]
  525. def getCommitDeltaDays(self):
  526. return (self.last_commit_stamp / 86400 - self.first_commit_stamp / 86400) + 1
  527. def getDomainInfo(self, domain):
  528. return self.domains[domain]
  529. def getDomains(self):
  530. return self.domains.keys()
  531. def getFilesInCommit(self, rev):
  532. try:
  533. res = self.cache['files_in_tree'][rev]
  534. except:
  535. res = int(getpipeoutput(['git ls-tree -r --name-only "%s"' % rev, 'wc -l']).split('\n')[0])
  536. if 'files_in_tree' not in self.cache:
  537. self.cache['files_in_tree'] = {}
  538. self.cache['files_in_tree'][rev] = res
  539. return res
  540. def getFirstCommitDate(self):
  541. return datetime.datetime.fromtimestamp(self.first_commit_stamp)
  542. def getLastCommitDate(self):
  543. return datetime.datetime.fromtimestamp(self.last_commit_stamp)
  544. def getLinesInBlob(self, sha1):
  545. try:
  546. res = self.cache['lines_in_blob'][sha1]
  547. except:
  548. res = int(getpipeoutput(['git cat-file blob %s' % sha1, 'wc -l']).split()[0])
  549. if 'lines_in_blob' not in self.cache:
  550. self.cache['lines_in_blob'] = {}
  551. self.cache['lines_in_blob'][sha1] = res
  552. return res
  553. def getTags(self):
  554. lines = getpipeoutput(['git show-ref --tags', 'cut -d/ -f3'])
  555. return lines.split('\n')
  556. def getTagDate(self, tag):
  557. return self.revToDate('tags/' + tag)
  558. def getTotalAuthors(self):
  559. return self.total_authors
  560. def getTotalCommits(self):
  561. return self.total_commits
  562. def getTotalFiles(self):
  563. return self.total_files
  564. def getTotalLOC(self):
  565. return self.total_lines
  566. def getTotalSize(self):
  567. return self.total_size
  568. def revToDate(self, rev):
  569. stamp = int(getpipeoutput(['git log --pretty=format:%%at "%s" -n 1' % rev]))
  570. return datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d')
  571. class ReportCreator:
  572. """Creates the actual report based on given data."""
  573. def __init__(self):
  574. pass
  575. def create(self, data, path):
  576. self.data = data
  577. self.path = path
  578. def html_linkify(text):
  579. return text.lower().replace(' ', '_')
  580. def html_header(level, text):
  581. name = html_linkify(text)
  582. return '\n<h%d><a href="#%s" name="%s">%s</a></h%d>\n\n' % (level, name, name, text, level)
  583. class HTMLReportCreator(ReportCreator):
  584. def create(self, data, path):
  585. ReportCreator.create(self, data, path)
  586. self.title = data.projectname
  587. # copy static files. Looks in the binary directory, ../share/gitstats and /usr/share/gitstats
  588. binarypath = os.path.dirname(os.path.abspath(__file__))
  589. secondarypath = os.path.join(binarypath, '..', 'share', 'gitstats')
  590. basedirs = [binarypath, secondarypath, '/usr/share/gitstats']
  591. for file in ('bootstrap.js','bootstrap.css','gitstats.css', 'sortable.js', 'arrow-up.gif', 'arrow-down.gif', 'arrow-none.gif'):
  592. for base in basedirs:
  593. src = base + '/' + file
  594. if os.path.exists(src):
  595. shutil.copyfile(src, path + '/' + file)
  596. break
  597. else:
  598. print 'Warning: "%s" not found, so not copied (searched: %s)' % (file, basedirs)
  599. f = open(path + "/index.html", 'w')
  600. format = '%Y-%m-%d %H:%M:%S'
  601. self.printHeader(f)
  602. f.write('<h1>GitStats - %s</h1>' % data.projectname)
  603. self.printNav(f)
  604. f.write('<dl>')
  605. f.write('<dt>Project name</dt><dd>%s</dd>' % (data.projectname))
  606. f.write('<dt>Generated</dt><dd>%s (in %d seconds)</dd>' % (datetime.datetime.now().strftime(format), time.time() - data.getStampCreated()))
  607. f.write('<dt>Generator</dt><dd><a href="http://gitstats.sourceforge.net/">GitStats</a> (version %s), %s, %s</dd>' % (getversion(), getgitversion(), getgnuplotversion()))
  608. f.write('<dt>Report Period</dt><dd>%s to %s</dd>' % (data.getFirstCommitDate().strftime(format), data.getLastCommitDate().strftime(format)))
  609. f.write('<dt>Age</dt><dd>%d days, %d active days (%3.2f%%)</dd>' % (data.getCommitDeltaDays(), len(data.getActiveDays()), (100.0 * len(data.getActiveDays()) / data.getCommitDeltaDays())))
  610. f.write('<dt>Total Files</dt><dd>%s</dd>' % data.getTotalFiles())
  611. f.write('<dt>Total Lines of Code</dt><dd>%s (%d added, %d removed)</dd>' % (data.getTotalLOC(), data.total_lines_added, data.total_lines_removed))
  612. f.write('<dt>Total Commits</dt><dd>%s (average %.1f commits per active day, %.1f per all days)</dd>' % (data.getTotalCommits(), float(data.getTotalCommits()) / len(data.getActiveDays()), float(data.getTotalCommits()) / data.getCommitDeltaDays()))
  613. f.write('<dt>Authors</dt><dd>%s (average %.1f commits per author)</dd>' % (data.getTotalAuthors(), (1.0 * data.getTotalCommits()) / data.getTotalAuthors()))
  614. f.write('</dl>')
  615. f.write('</body>\n</html>')
  616. f.close()
  617. ###
  618. # Activity
  619. f = open(path + '/activity.html', 'w')
  620. self.printHeader(f)
  621. f.write('<h1>Activity</h1>')
  622. self.printNav(f)
  623. #f.write('<h2>Last 30 days</h2>')
  624. #f.write('<h2>Last 12 months</h2>')
  625. # Weekly activity
  626. WEEKS = 32
  627. f.write(html_header(2, 'Weekly activity'))
  628. f.write('<p>Last %d weeks</p>' % WEEKS)
  629. # generate weeks to show (previous N weeks from now)
  630. now = datetime.datetime.now()
  631. deltaweek = datetime.timedelta(7)
  632. weeks = []
  633. stampcur = now
  634. for i in range(0, WEEKS):
  635. weeks.insert(0, stampcur.strftime('%Y-%W'))
  636. stampcur -= deltaweek
  637. # top row: commits & bar
  638. f.write('<table class="noborders"><tr>')
  639. for i in range(0, WEEKS):
  640. commits = 0
  641. if weeks[i] in data.activity_by_year_week:
  642. commits = data.activity_by_year_week[weeks[i]]
  643. percentage = 0
  644. if weeks[i] in data.activity_by_year_week:
  645. percentage = float(data.activity_by_year_week[weeks[i]]) / data.activity_by_year_week_peak
  646. height = max(1, int(200 * percentage))
  647. f.write('<td style="text-align: center; vertical-align: bottom">%d<div style="display: block; background-color: red; width: 20px; height: %dpx"></div></td>' % (commits, height))
  648. # bottom row: year/week
  649. f.write('</tr><tr>')
  650. for i in range(0, WEEKS):
  651. f.write('<td>%s</td>' % (WEEKS - i))
  652. f.write('</tr></table>')
  653. # Hour of Day
  654. f.write(html_header(2, 'Hour of Day'))
  655. hour_of_day = data.getActivityByHourOfDay()
  656. f.write('<table><tr><th>Hour</th>')
  657. for i in range(0, 24):
  658. f.write('<th>%d</th>' % i)
  659. f.write('</tr>\n<tr><th>Commits</th>')
  660. fp = open(path + '/hour_of_day.dat', 'w')
  661. for i in range(0, 24):
  662. if i in hour_of_day:
  663. r = 127 + int((float(hour_of_day[i]) / data.activity_by_hour_of_day_busiest) * 128)
  664. f.write('<td style="background-color: rgb(%d, 0, 0)">%d</td>' % (r, hour_of_day[i]))
  665. fp.write('%d %d\n' % (i, hour_of_day[i]))
  666. else:
  667. f.write('<td>0</td>')
  668. fp.write('%d 0\n' % i)
  669. fp.close()
  670. f.write('</tr>\n<tr><th>%</th>')
  671. totalcommits = data.getTotalCommits()
  672. for i in range(0, 24):
  673. if i in hour_of_day:
  674. r = 127 + int((float(hour_of_day[i]) / data.activity_by_hour_of_day_busiest) * 128)
  675. f.write('<td style="background-color: rgb(%d, 0, 0)">%.2f</td>' % (r, (100.0 * hour_of_day[i]) / totalcommits))
  676. else:
  677. f.write('<td>0.00</td>')
  678. f.write('</tr></table>')
  679. f.write('<img src="hour_of_day.png" alt="Hour of Day" />')
  680. fg = open(path + '/hour_of_day.dat', 'w')
  681. for i in range(0, 24):
  682. if i in hour_of_day:
  683. fg.write('%d %d\n' % (i + 1, hour_of_day[i]))
  684. else:
  685. fg.write('%d 0\n' % (i + 1))
  686. fg.close()
  687. # Day of Week
  688. f.write(html_header(2, 'Day of Week'))
  689. day_of_week = data.getActivityByDayOfWeek()
  690. f.write('<div class="vtable"><table>')
  691. f.write('<tr><th>Day</th><th>Total (%)</th></tr>')
  692. fp = open(path + '/day_of_week.dat', 'w')
  693. for d in range(0, 7):
  694. commits = 0
  695. if d in day_of_week:
  696. commits = day_of_week[d]
  697. fp.write('%d %s %d\n' % (d + 1, WEEKDAYS[d], commits))
  698. f.write('<tr>')
  699. f.write('<th>%s</th>' % (WEEKDAYS[d]))
  700. if d in day_of_week:
  701. f.write('<td>%d (%.2f%%)</td>' % (day_of_week[d], (100.0 * day_of_week[d]) / totalcommits))
  702. else:
  703. f.write('<td>0</td>')
  704. f.write('</tr>')
  705. f.write('</table></div>')
  706. f.write('<img src="day_of_week.png" alt="Day of Week" />')
  707. fp.close()
  708. # Hour of Week
  709. f.write(html_header(2, 'Hour of Week'))
  710. f.write('<table>')
  711. f.write('<tr><th>Weekday</th>')
  712. for hour in range(0, 24):
  713. f.write('<th>%d</th>' % (hour))
  714. f.write('</tr>')
  715. for weekday in range(0, 7):
  716. f.write('<tr><th>%s</th>' % (WEEKDAYS[weekday]))
  717. for hour in range(0, 24):
  718. try:
  719. commits = data.activity_by_hour_of_week[weekday][hour]
  720. except KeyError:
  721. commits = 0
  722. if commits != 0:
  723. f.write('<td')
  724. r = 127 + int((float(commits) / data.activity_by_hour_of_week_busiest) * 128)
  725. f.write(' style="background-color: rgb(%d, 0, 0)"' % r)
  726. f.write('>%d</td>' % commits)
  727. else:
  728. f.write('<td></td>')
  729. f.write('</tr>')
  730. f.write('</table>')
  731. # Month of Year
  732. f.write(html_header(2, 'Month of Year'))
  733. f.write('<div class="vtable"><table>')
  734. f.write('<tr><th>Month</th><th>Commits (%)</th></tr>')
  735. fp = open (path + '/month_of_year.dat', 'w')
  736. for mm in range(1, 13):
  737. commits = 0
  738. if mm in data.activity_by_month_of_year:
  739. commits = data.activity_by_month_of_year[mm]
  740. f.write('<tr><td>%d</td><td>%d (%.2f %%)</td></tr>' % (mm, commits, (100.0 * commits) / data.getTotalCommits()))
  741. fp.write('%d %d\n' % (mm, commits))
  742. fp.close()
  743. f.write('</table></div>')
  744. f.write('<img src="month_of_year.png" alt="Month of Year" />')
  745. # Commits by year/month
  746. f.write(html_header(2, 'Commits by year/month'))
  747. f.write('<div class="vtable"><table><tr><th>Month</th><th>Commits</th><th>Lines added</th><th>Lines removed</th></tr>')
  748. for yymm in reversed(sorted(data.commits_by_month.keys())):
  749. f.write('<tr><td>%s</td><td>%d</td><td>%d</td><td>%d</td></tr>' % (yymm, data.commits_by_month.get(yymm,0), data.lines_added_by_month.get(yymm,0), data.lines_removed_by_month.get(yymm,0)))
  750. f.write('</table></div>')
  751. f.write('<img src="commits_by_year_month.png" alt="Commits by year/month" />')
  752. fg = open(path + '/commits_by_year_month.dat', 'w')
  753. for yymm in sorted(data.commits_by_month.keys()):
  754. fg.write('%s %s\n' % (yymm, data.commits_by_month[yymm]))
  755. fg.close()
  756. # Commits by year
  757. f.write(html_header(2, 'Commits by Year'))
  758. f.write('<div class="vtable"><table><tr><th>Year</th><th>Commits (% of all)</th><th>Lines added</th><th>Lines removed</th></tr>')
  759. for yy in reversed(sorted(data.commits_by_year.keys())):
  760. f.write('<tr><td>%s</td><td>%d (%.2f%%)</td><td>%d</td><td>%d</td></tr>' % (yy, data.commits_by_year.get(yy,0), (100.0 * data.commits_by_year.get(yy,0)) / data.getTotalCommits(), data.lines_added_by_year.get(yy,0), data.lines_removed_by_year.get(yy,0)))
  761. f.write('</table></div>')
  762. f.write('<img src="commits_by_year.png" alt="Commits by Year" />')
  763. fg = open(path + '/commits_by_year.dat', 'w')
  764. for yy in sorted(data.commits_by_year.keys()):
  765. fg.write('%d %d\n' % (yy, data.commits_by_year[yy]))
  766. fg.close()
  767. # Commits by timezone
  768. f.write(html_header(2, 'Commits by Timezone'))
  769. f.write('<table><tr>')
  770. f.write('<th>Timezone</th><th>Commits</th>')
  771. max_commits_on_tz = max(data.commits_by_timezone.values())
  772. for i in sorted(data.commits_by_timezone.keys(), key = lambda n : int(n)):
  773. commits = data.commits_by_timezone[i]
  774. r = 127 + int((float(commits) / max_commits_on_tz) * 128)
  775. f.write('<tr><th>%s</th><td style="background-color: rgb(%d, 0, 0)">%d</td></tr>' % (i, r, commits))
  776. f.write('</tr></table>')
  777. f.write('</body></html>')
  778. f.close()
  779. ###
  780. # Authors
  781. f = open(path + '/authors.html', 'w')
  782. self.printHeader(f)
  783. f.write('<h1>Authors</h1>')
  784. self.printNav(f)
  785. # Authors :: List of authors
  786. f.write(html_header(2, 'List of Authors'))
  787. f.write('<table class="authors sortable" id="authors">')
  788. f.write('<tr><th>Author</th><th>Commits (%)</th><th>+ lines</th><th>- lines</th><th>First commit</th><th>Last commit</th><th class="unsortable">Age</th><th>Active days</th><th># by commits</th></tr>')
  789. for author in data.getAuthors(conf['max_authors']):
  790. info = data.getAuthorInfo(author)
  791. f.write('<tr><td>%s</td><td>%d (%.2f%%)</td><td>%d</td><td>%d</td><td>%s</td><td>%s</td><td>%s</td><td>%d</td><td>%d</td></tr>' % (author, info['commits'], info['commits_frac'], info['lines_added'], info['lines_removed'], info['date_first'], info['date_last'], info['timedelta'], len(info['active_days']), info['place_by_commits']))
  792. f.write('</table>')
  793. allauthors = data.getAuthors()
  794. if len(allauthors) > conf['max_authors']:
  795. rest = allauthors[conf['max_authors']:]
  796. f.write('<p class="moreauthors">These didn\'t make it to the top: %s</p>' % ', '.join(rest))
  797. f.write(html_header(2, 'Cumulated Added Lines of Code per Author'))
  798. f.write('<img src="lines_of_code_by_author.png" alt="Lines of code per Author" />')
  799. if len(allauthors) > conf['max_authors']:
  800. f.write('<p class="moreauthors">Only top %d authors shown</p>' % conf['max_authors'])
  801. f.write(html_header(2, 'Commits per Author'))
  802. f.write('<img src="commits_by_author.png" alt="Commits per Author" />')
  803. if len(allauthors) > conf['max_authors']:
  804. f.write('<p class="moreauthors">Only top %d authors shown</p>' % conf['max_authors'])
  805. fgl = open(path + '/lines_of_code_by_author.dat', 'w')
  806. fgc = open(path + '/commits_by_author.dat', 'w')
  807. lines_by_authors = {} # cumulated added lines by
  808. # author. to save memory,
  809. # changes_by_date_by_author[stamp][author] is defined
  810. # only at points where author commits.
  811. # lines_by_authors allows us to generate all the
  812. # points in the .dat file.
  813. # Don't rely on getAuthors to give the same order each
  814. # time. Be robust and keep the list in a variable.
  815. commits_by_authors = {} # cumulated added lines by
  816. self.authors_to_plot = data.getAuthors(conf['max_authors'])
  817. for author in self.authors_to_plot:
  818. lines_by_authors[author] = 0
  819. commits_by_authors[author] = 0
  820. for stamp in sorted(data.changes_by_date_by_author.keys()):
  821. fgl.write('%d' % stamp)
  822. fgc.write('%d' % stamp)
  823. for author in self.authors_to_plot:
  824. if author in data.changes_by_date_by_author[stamp].keys():
  825. lines_by_authors[author] = data.changes_by_date_by_author[stamp][author]['lines_added']
  826. commits_by_authors[author] = data.changes_by_date_by_author[stamp][author]['commits']
  827. fgl.write(' %d' % lines_by_authors[author])
  828. fgc.write(' %d' % commits_by_authors[author])
  829. fgl.write('\n')
  830. fgc.write('\n')
  831. fgl.close()
  832. fgc.close()
  833. # Authors :: Author of Month
  834. f.write(html_header(2, 'Author of Month'))
  835. f.write('<table class="sortable" id="aom">')
  836. f.write('<tr><th>Month</th><th>Author</th><th>Commits (%%)</th><th class="unsortable">Next top %d</th><th>Number of authors</th></tr>' % conf['authors_top'])
  837. for yymm in reversed(sorted(data.author_of_month.keys())):
  838. authordict = data.author_of_month[yymm]
  839. authors = getkeyssortedbyvalues(authordict)
  840. authors.reverse()
  841. commits = data.author_of_month[yymm][authors[0]]
  842. next = ', '.join(authors[1:conf['authors_top']+1])
  843. f.write('<tr><td>%s</td><td>%s</td><td>%d (%.2f%% of %d)</td><td>%s</td><td>%d</td></tr>' % (yymm, authors[0], commits, (100.0 * commits) / data.commits_by_month[yymm], data.commits_by_month[yymm], next, len(authors)))
  844. f.write('</table>')
  845. f.write(html_header(2, 'Author of Year'))
  846. f.write('<table class="sortable" id="aoy"><tr><th>Year</th><th>Author</th><th>Commits (%%)</th><th class="unsortable">Next top %d</th><th>Number of authors</th></tr>' % conf['authors_top'])
  847. for yy in reversed(sorted(data.author_of_year.keys())):
  848. authordict = data.author_of_year[yy]
  849. authors = getkeyssortedbyvalues(authordict)
  850. authors.reverse()
  851. commits = data.author_of_year[yy][authors[0]]
  852. next = ', '.join(authors[1:conf['authors_top']+1])
  853. f.write('<tr><td>%s</td><td>%s</td><td>%d (%.2f%% of %d)</td><td>%s</td><td>%d</td></tr>' % (yy, authors[0], commits, (100.0 * commits) / data.commits_by_year[yy], data.commits_by_year[yy], next, len(authors)))
  854. f.write('</table>')
  855. # Domains
  856. f.write(html_header(2, 'Commits by Domains'))
  857. domains_by_commits = getkeyssortedbyvaluekey(data.domains, 'commits')
  858. domains_by_commits.reverse() # most first
  859. f.write('<div class="vtable"><table>')
  860. f.write('<tr><th>Domains</th><th>Total (%)</th></tr>')
  861. fp = open(path + '/domains.dat', 'w')
  862. n = 0
  863. for domain in domains_by_commits:
  864. if n == conf['max_domains']:
  865. break
  866. commits = 0
  867. n += 1
  868. info = data.getDomainInfo(domain)
  869. fp.write('%s %d %d\n' % (domain, n , info['commits']))
  870. f.write('<tr><th>%s</th><td>%d (%.2f%%)</td></tr>' % (domain, info['commits'], (100.0 * info['commits'] / totalcommits)))
  871. f.write('</table></div>')
  872. f.write('<img src="domains.png" alt="Commits by Domains" />')
  873. fp.close()
  874. f.write('</body></html>')
  875. f.close()
  876. ###
  877. # Files
  878. f = open(path + '/files.html', 'w')
  879. self.printHeader(f)
  880. f.write('<h1>Files</h1>')
  881. self.printNav(f)
  882. f.write('<dl>\n')
  883. f.write('<dt>Total files</dt><dd>%d</dd>' % data.getTotalFiles())
  884. f.write('<dt>Total lines</dt><dd>%d</dd>' % data.getTotalLOC())
  885. try:
  886. f.write('<dt>Average file size</dt><dd>%.2f bytes</dd>' % (float(data.getTotalSize()) / data.getTotalFiles()))
  887. except ZeroDivisionError:
  888. pass
  889. f.write('</dl>\n')
  890. # Files :: File count by date
  891. f.write(html_header(2, 'File count by date'))
  892. # use set to get rid of duplicate/unnecessary entries
  893. files_by_date = set()
  894. for stamp in sorted(data.files_by_stamp.keys()):
  895. files_by_date.add('%s %d' % (datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d'), data.files_by_stamp[stamp]))
  896. fg = open(path + '/files_by_date.dat', 'w')
  897. for line in sorted(list(files_by_date)):
  898. fg.write('%s\n' % line)
  899. #for stamp in sorted(data.files_by_stamp.keys()):
  900. # fg.write('%s %d\n' % (datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d'), data.files_by_stamp[stamp]))
  901. fg.close()
  902. f.write('<img src="files_by_date.png" alt="Files by Date" />')
  903. #f.write('<h2>Average file size by date</h2>')
  904. # Files :: Extensions
  905. f.write(html_header(2, 'Extensions'))
  906. f.write('<table class="sortable" id="ext"><tr><th>Extension</th><th>Files (%)</th><th>Lines (%)</th><th>Lines/file</th></tr>')
  907. for ext in sorted(data.extensions.keys()):
  908. files = data.extensions[ext]['files']
  909. lines = data.extensions[ext]['lines']
  910. try:
  911. loc_percentage = (100.0 * lines) / data.getTotalLOC()
  912. except ZeroDivisionError:
  913. loc_percentage = 0
  914. f.write('<tr><td>%s</td><td>%d (%.2f%%)</td><td>%d (%.2f%%)</td><td>%d</td></tr>' % (ext, files, (100.0 * files) / data.getTotalFiles(), lines, loc_percentage, lines / files))
  915. f.write('</table>')
  916. f.write('</body></html>')
  917. f.close()
  918. ###
  919. # Lines
  920. f = open(path + '/lines.html', 'w')
  921. self.printHeader(f)
  922. f.write('<h1>Lines</h1>')
  923. self.printNav(f)
  924. f.write('<dl>\n')
  925. f.write('<dt>Total lines</dt><dd>%d</dd>' % data.getTotalLOC())
  926. f.write('</dl>\n')
  927. f.write(html_header(2, 'Lines of Code'))
  928. f.write('<img src="lines_of_code.png" />')
  929. fg = open(path + '/lines_of_code.dat', 'w')
  930. for stamp in sorted(data.changes_by_date.keys()):
  931. fg.write('%d %d\n' % (stamp, data.changes_by_date[stamp]['lines']))
  932. fg.close()
  933. f.write('</body></html>')
  934. f.close()
  935. ###
  936. # tags.html
  937. f = open(path + '/tags.html', 'w')
  938. self.printHeader(f)
  939. f.write('<h1>Tags</h1>')
  940. self.printNav(f)
  941. f.write('<dl>')
  942. f.write('<dt>Total tags</dt><dd>%d</dd>' % len(data.tags))
  943. if len(data.tags) > 0:
  944. f.write('<dt>Average commits per tag</dt><dd>%.2f</dd>' % (1.0 * data.getTotalCommits() / len(data.tags)))
  945. f.write('</dl>')
  946. f.write('<table class="tags">')
  947. f.write('<tr><th>Name</th><th>Date</th><th>Commits</th><th>Authors</th></tr>')
  948. # sort the tags by date desc
  949. tags_sorted_by_date_desc = map(lambda el : el[1], reversed(sorted(map(lambda el : (el[1]['date'], el[0]), data.tags.items()))))
  950. for tag in tags_sorted_by_date_desc:
  951. authorinfo = []
  952. self.authors_by_commits = getkeyssortedbyvalues(data.tags[tag]['authors'])
  953. for i in reversed(self.authors_by_commits):
  954. authorinfo.append('%s (%d)' % (i, data.tags[tag]['authors'][i]))
  955. f.write('<tr><td>%s</td><td>%s</td><td>%d</td><td>%s</td></tr>' % (tag, data.tags[tag]['date'], data.tags[tag]['commits'], ', '.join(authorinfo)))
  956. f.write('</table>')
  957. f.write('</body></html>')
  958. f.close()
  959. self.createGraphs(path)
  960. def createGraphs(self, path):
  961. print 'Generating graphs...'
  962. # hour of day
  963. f = open(path + '/hour_of_day.plot', 'w')
  964. f.write(GNUPLOT_COMMON)
  965. f.write(
  966. """
  967. set output 'hour_of_day.png'
  968. unset key
  969. set xrange [0.5:24.5]
  970. set xtics 4
  971. set grid y
  972. set ylabel "Commits"
  973. plot 'hour_of_day.dat' using 1:2:(0.5) w boxes fs solid
  974. """)
  975. f.close()
  976. # day of week
  977. f = open(path + '/day_of_week.plot', 'w')
  978. f.write(GNUPLOT_COMMON)
  979. f.write(
  980. """
  981. set output 'day_of_week.png'
  982. unset key
  983. set xrange [0.5:7.5]
  984. set xtics 1
  985. set grid y
  986. set ylabel "Commits"
  987. plot 'day_of_week.dat' using 1:3:(0.5):xtic(2) w boxes fs solid
  988. """)
  989. f.close()
  990. # Domains
  991. f = open(path + '/domains.plot', 'w')
  992. f.write(GNUPLOT_COMMON)
  993. f.write(
  994. """
  995. set output 'domains.png'
  996. unset key
  997. unset xtics
  998. set yrange [0:]
  999. set grid y
  1000. set ylabel "Commits"
  1001. plot 'domains.dat' using 2:3:(0.5) with boxes fs solid, '' using 2:3:1 with labels rotate by 45 offset 0,1
  1002. """)
  1003. f.close()
  1004. # Month of Year
  1005. f = open(path + '/month_of_year.plot', 'w')
  1006. f.write(GNUPLOT_COMMON)
  1007. f.write(
  1008. """
  1009. set output 'month_of_year.png'
  1010. unset key
  1011. set xrange [0.5:12.5]
  1012. set xtics 1
  1013. set grid y
  1014. set ylabel "Commits"
  1015. plot 'month_of_year.dat' using 1:2:(0.5) w boxes fs solid
  1016. """)
  1017. f.close()
  1018. # commits_by_year_month
  1019. f = open(path + '/commits_by_year_month.plot', 'w')
  1020. f.write(GNUPLOT_COMMON)
  1021. f.write(
  1022. """
  1023. set output 'commits_by_year_month.png'
  1024. unset key
  1025. set xdata time
  1026. set timefmt "%Y-%m"
  1027. set format x "%Y-%m"
  1028. set xtics rotate
  1029. set bmargin 5
  1030. set grid y
  1031. set ylabel "Commits"
  1032. plot 'commits_by_year_month.dat' using 1:2:(0.5) w boxes fs solid
  1033. """)
  1034. f.close()
  1035. # commits_by_year
  1036. f = open(path + '/commits_by_year.plot', 'w')
  1037. f.write(GNUPLOT_COMMON)
  1038. f.write(
  1039. """
  1040. set output 'commits_by_year.png'
  1041. unset key
  1042. set xtics 1 rotate
  1043. set grid y
  1044. set ylabel "Commits"
  1045. set yrange [0:]
  1046. plot 'commits_by_year.dat' using 1:2:(0.5) w boxes fs solid
  1047. """)
  1048. f.close()
  1049. # Files by date
  1050. f = open(path + '/files_by_date.plot', 'w')
  1051. f.write(GNUPLOT_COMMON)
  1052. f.write(
  1053. """
  1054. set output 'files_by_date.png'
  1055. unset key
  1056. set xdata time
  1057. set timefmt "%Y-%m-%d"
  1058. set format x "%Y-%m-%d"
  1059. set grid y
  1060. set ylabel "Files"
  1061. set xtics rotate
  1062. set ytics autofreq
  1063. set bmargin 6
  1064. plot 'files_by_date.dat' using 1:2 w steps
  1065. """)
  1066. f.close()
  1067. # Lines of Code
  1068. f = open(path + '/lines_of_code.plot', 'w')
  1069. f.write(GNUPLOT_COMMON)
  1070. f.write(
  1071. """
  1072. set output 'lines_of_code.png'
  1073. unset key
  1074. set xdata time
  1075. set timefmt "%s"
  1076. set format x "%Y-%m-%d"
  1077. set grid y
  1078. set ylabel "Lines"
  1079. set xtics rotate
  1080. set bmargin 6
  1081. plot 'lines_of_code.dat' using 1:2 w lines
  1082. """)
  1083. f.close()
  1084. # Lines of Code Added per author
  1085. f = open(path + '/lines_of_code_by_author.plot', 'w')
  1086. f.write(GNUPLOT_COMMON)
  1087. f.write(
  1088. """
  1089. set terminal png transparent size 640,480
  1090. set output 'lines_of_code_by_author.png'
  1091. set key left top
  1092. set xdata time
  1093. set timefmt "%s"
  1094. set format x "%Y-%m-%d"
  1095. set grid y
  1096. set ylabel "Lines"
  1097. set xtics rotate
  1098. set bmargin 6
  1099. plot """
  1100. )
  1101. i = 1
  1102. plots = []
  1103. for a in self.authors_to_plot:
  1104. i = i + 1
  1105. plots.append("""'lines_of_code_by_author.dat' using 1:%d title "%s" w lines""" % (i, a.replace("\"", "\\\"")))
  1106. f.write(", ".join(plots))
  1107. f.write('\n')
  1108. f.close()
  1109. # Commits per author
  1110. f = open(path + '/commits_by_author.plot', 'w')
  1111. f.write(GNUPLOT_COMMON)
  1112. f.write(
  1113. """
  1114. set terminal png transparent size 640,480
  1115. set output 'commits_by_author.png'
  1116. set key left top
  1117. set xdata time
  1118. set timefmt "%s"
  1119. set format x "%Y-%m-%d"
  1120. set grid y
  1121. set ylabel "Commits"
  1122. set xtics rotate
  1123. set bmargin 6
  1124. plot """
  1125. )
  1126. i = 1
  1127. plots = []
  1128. for a in self.authors_to_plot:
  1129. i = i + 1
  1130. plots.append("""'commits_by_author.dat' using 1:%d title "%s" w lines""" % (i, a.replace("\"", "\\\"")))
  1131. f.write(", ".join(plots))
  1132. f.write('\n')
  1133. f.close()
  1134. os.chdir(path)
  1135. files = glob.glob(path + '/*.plot')
  1136. for f in files:
  1137. out = getpipeoutput([gnuplot_cmd + ' "%s"' % f])
  1138. if len(out) > 0:
  1139. print out
  1140. def printHeader(self, f, title = ''):
  1141. f.write(
  1142. """<?xml version="1.0" encoding="UTF-8"?>
  1143. <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
  1144. <html xmlns="http://www.w3.org/1999/xhtml">
  1145. <head>
  1146. <title>GitStats - %s</title>
  1147. <link rel="stylesheet" href="%s" type="text/css" />
  1148. <link rel="stylesheet" href="bootstrap.css" type="text/css" />
  1149. <meta name="generator" content="GitStats %s" />
  1150. <script type="text/javascript" src="sortable.js"></script>
  1151. <script type="text/javascript" src="bootstrap.js"></script>
  1152. </head>
  1153. <body>
  1154. """ % (self.title, conf['style'], getversion()))
  1155. def printNav(self, f):
  1156. f.write("""
  1157. <div class="nav">
  1158. <ul>
  1159. <li><a href="index.html">General</a></li>
  1160. <li><a href="activity.html">Activity</a></li>
  1161. <li><a href="authors.html">Authors</a></li>
  1162. <li><a href="files.html">Files</a></li>
  1163. <li><a href="lines.html">Lines</a></li>
  1164. <li><a href="tags.html">Tags</a></li>
  1165. </ul>
  1166. </div>
  1167. """)
  1168. class GitStats:
  1169. def run(self, args_orig):
  1170. optlist, args = getopt.getopt(args_orig, 'c:')
  1171. for o,v in optlist:
  1172. if o == '-c':
  1173. key, value = v.split('=', 1)
  1174. if key not in conf:
  1175. raise KeyError('no such key "%s" in config' % key)
  1176. if isinstance(conf[key], int):
  1177. conf[key] = int(value)
  1178. elif isinstance(conf[key], dict):
  1179. kk,vv = value.split(',', 1)
  1180. conf[key][kk] = vv
  1181. else:
  1182. conf[key] = value
  1183. if len(args) < 2:
  1184. print """
  1185. Usage: gitstats [options] <gitpath..> <outputpath>
  1186. Options:
  1187. -c key=value Override configuration value
  1188. Default config values:
  1189. %s
  1190. """ % conf
  1191. sys.exit(0)
  1192. outputpath = os.path.abspath(args[-1])
  1193. rundir = os.getcwd()
  1194. try:
  1195. os.makedirs(outputpath)
  1196. except OSError:
  1197. pass
  1198. if not os.path.isdir(outputpath):
  1199. print 'FATAL: Output path is not a directory or does not exist'
  1200. sys.exit(1)
  1201. if not getgnuplotversion():
  1202. print 'gnuplot not found'
  1203. sys.exit(1)
  1204. print 'Output path: %s' % outputpath
  1205. cachefile = os.path.join(outputpath, 'gitstats.cache')
  1206. data = GitDataCollector()
  1207. data.loadCache(cachefile)
  1208. for gitpath in args[0:-1]:
  1209. print 'Git path: %s' % gitpath
  1210. os.chdir(gitpath)
  1211. print 'Collecting data...'
  1212. data.collect(gitpath)
  1213. print 'Refining data...'
  1214. data.saveCache(cachefile)
  1215. data.refine()
  1216. os.chdir(rundir)
  1217. print 'Generating report...'
  1218. report = HTMLReportCreator()
  1219. report.create(data, outputpath)
  1220. time_end = time.time()
  1221. exectime_internal = time_end - time_start
  1222. print 'Execution time %.5f secs, %.5f secs (%.2f %%) in external commands)' % (exectime_internal, exectime_external, (100.0 * exectime_external) / exectime_internal)
  1223. if sys.stdin.isatty():
  1224. print 'You may now run:'
  1225. print
  1226. print ' sensible-browser \'%s\'' % os.path.join(outputpath, 'index.html').replace("'", "'\\''")
  1227. print
  1228. if __name__=='__main__':
  1229. g = GitStats()
  1230. g.run(sys.argv[1:])